[llvm-commits] [test-suite] r46573 [2/3] - in /test-suite/trunk/MultiSource/Applications: ./ ClamAV/ ClamAV/dbdir/ ClamAV/inputs/ ClamAV/inputs/rtf-test/ ClamAV/scripts/
Evan Cheng
evan.cheng at apple.com
Wed Jan 30 13:17:12 PST 2008
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_binhex.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2004 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ * Change History:
+ * $Log: binhex.c,v $
+ * Revision 1.23 2007/02/12 20:46:08 njh
+ * Various tidy
+ *
+ * Revision 1.22 2006/07/31 09:19:52 njh
+ * Use MAP_PRIVATE
+ *
+ * Revision 1.21 2006/07/01 16:17:35 njh
+ * Added destroy flag
+ *
+ * Revision 1.20 2006/07/01 03:47:50 njh
+ * Don't loop if binhex runs out of memory
+ *
+ * Revision 1.19 2006/05/19 11:02:12 njh
+ * Just include mbox.h
+ *
+ * Revision 1.18 2006/04/09 19:59:27 kojm
+ * update GPL headers with new address for FSF
+ *
+ * Revision 1.17 2005/11/06 14:03:26 nigelhorne
+ * Ensure NAME_MAX isn't redefined on BeOS
+ *
+ * Revision 1.16 2005/05/14 16:13:25 nigelhorne
+ * Ensure munmap is the right size
+ *
+ * Revision 1.15 2005/05/13 19:30:34 nigelhorne
+ * Clean cli_realloc call
+ *
+ * Revision 1.14 2005/03/10 08:51:30 nigelhorne
+ * Tidy
+ *
+ * Revision 1.13 2005/01/19 05:29:41 nigelhorne
+ * tidy
+ *
+ * Revision 1.12 2004/12/27 14:17:14 nigelhorne
+ * Fix segfault if write to temporary file fails
+ *
+ * Revision 1.11 2004/12/17 12:03:38 nigelhorne
+ * Tidy up for machines without MMAP
+ *
+ * Revision 1.10 2004/12/16 15:29:51 nigelhorne
+ * Tidy
+ *
+ * Revision 1.9 2004/11/28 22:06:39 nigelhorne
+ * Tidy space only headers code
+ *
+ * Revision 1.8 2004/11/28 21:05:50 nigelhorne
+ * Handle headers with only spaces
+ *
+ * Revision 1.7 2004/11/23 09:05:26 nigelhorne
+ * Fix crash in base64 encoded binhex files
+ *
+ * Revision 1.6 2004/11/22 15:16:53 nigelhorne
+ * Use cli_realloc instead of many cli_mallocs
+ *
+ * Revision 1.5 2004/11/18 20:11:34 nigelhorne
+ * Fix segfault
+ *
+ * Revision 1.4 2004/11/18 19:30:29 kojm
+ * add support for Mac's HQX file format
+ *
+ * Revision 1.3 2004/11/18 18:24:45 nigelhorne
+ * Added binhex.h
+ *
+ * Revision 1.2 2004/11/18 18:09:06 nigelhorne
+ * First draft of binhex.c
+ *
+ */
+static char const rcsid[] = "$Id: binhex.c,v 1.23 2007/02/12 20:46:08 njh Exp $";
+
+#include "clamav.h"
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG /* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT /* for Solaris 2.8 */
+#endif
+#endif
+
+#ifdef HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include <stdio.h>
+#include <memory.h>
+#include <sys/stat.h>
+#include "others.h"
+
+#include "mbox.h"
+#include "binhex.h"
+
+int
+cli_binhex(const char *dir, int desc)
+{
+#ifndef HAVE_MMAP
+ cli_warnmsg("File not decoded - binhex decoding needs mmap() (for now)\n");
+ return CL_CLEAN;
+#else
+ struct stat statb;
+ char *buf, *start, *line;
+ size_t size;
+ long bytesleft;
+ message *m;
+ fileblob *fb;
+
+ if(fstat(desc, &statb) < 0)
+ return CL_EOPEN;
+
+ size = (size_t)statb.st_size;
+
+ if(size == 0)
+ return CL_CLEAN;
+
+ m = messageCreate();
+ if(m == NULL)
+ return CL_EMEM;
+
+ start = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+ if(buf == MAP_FAILED) {
+ messageDestroy(m);
+ return CL_EMEM;
+ }
+
+ cli_dbgmsg("mmap'ed binhex file\n");
+
+ bytesleft = (long)size;
+ line = NULL;
+
+ while(bytesleft > 0) {
+ int length = 0;
+ char *ptr, *newline;
+
+ /*printf("%d: ", bytesleft);*/
+
+ for(ptr = buf; bytesleft && (*ptr != '\n') && (*ptr != '\r'); ptr++) {
+ length++;
+ --bytesleft;
+ }
+
+ /*printf("%d: ", length);*/
+
+ newline = cli_realloc(line, (size_t)(length + 1));
+ if(newline == NULL)
+ break;
+
+ line = newline;
+
+ memcpy(line, buf, length);
+ line[length] = '\0';
+
+ /*puts(line);*/
+
+ if(messageAddStr(m, line) < 0)
+ break;
+
+ if((bytesleft > 0) && (*ptr == '\r')) {
+ ptr++;
+ bytesleft--;
+ }
+ buf = ++ptr;
+ bytesleft--;
+ }
+ munmap(start, size);
+
+ if(line)
+ free(line);
+
+ if(binhexBegin(m) == NULL) {
+ messageDestroy(m);
+ cli_errmsg("No binhex line found\n");
+ return CL_EFORMAT;
+ }
+
+ /* similar to binhexMessage */
+ messageSetEncoding(m, "x-binhex");
+
+ fb = messageToFileblob(m, dir, 1);
+ if(fb) {
+ cli_dbgmsg("Binhex file decoded to %s\n", fileblobGetFilename(fb));
+ fileblobDestroy(fb);
+ } else
+ cli_errmsg("Couldn't decode binhex file to %s\n", dir);
+ messageDestroy(m);
+
+ if(fb)
+ return CL_CLEAN; /* a lie - but it gets things going */
+ return CL_EIO; /* probably CL_EMEM, but we can't tell at this layer */
+#endif
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_blob.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,728 @@
+/*
+ * Copyright (C) 2002 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+static char const rcsid[] = "$Id: blob.c,v 1.64 2007/02/12 22:25:14 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef C_WINDOWS
+#include "stdafx.h"
+#include <io.h>
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h> /* for NAME_MAX */
+#endif
+
+#ifdef C_DARWIN
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "others.h"
+#include "mbox.h"
+#include "matcher.h"
+#include "scanners.h"
+
+#ifndef CL_DEBUG
+#define NDEBUG /* map CLAMAV debug onto standard */
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#include <assert.h>
+
+#if defined(C_MINGW) || defined(C_WINDOWS)
+#include <windows.h>
+#endif
+
+#define MAX_SCAN_SIZE 20*1024 /*
+ * The performance benefit of scanning
+ * early disappears on medium and
+ * large sized files
+ */
+
+static const char *blobGetFilename(const blob *b);
+
+blob *
+blobCreate(void)
+{
+#ifdef CL_DEBUG
+ blob *b = (blob *)cli_calloc(1, sizeof(blob));
+ if(b)
+ b->magic = BLOBCLASS;
+ cli_dbgmsg("blobCreate\n");
+ return b;
+#else
+ return (blob *)cli_calloc(1, sizeof(blob));
+#endif
+}
+
+void
+blobDestroy(blob *b)
+{
+#ifdef CL_DEBUG
+ cli_dbgmsg("blobDestroy %d\n", b->magic);
+#else
+ cli_dbgmsg("blobDestroy\n");
+#endif
+
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ if(b->name)
+ free(b->name);
+ if(b->data)
+ free(b->data);
+#ifdef CL_DEBUG
+ b->magic = INVALIDCLASS;
+#endif
+ free(b);
+}
+
+void
+blobArrayDestroy(blob *blobList[], int n)
+{
+ assert(blobList != NULL);
+
+ while(--n >= 0) {
+ cli_dbgmsg("blobArrayDestroy: %d\n", n);
+ if(blobList[n]) {
+ blobDestroy(blobList[n]);
+ blobList[n] = NULL;
+ }
+ }
+}
+
+/*ARGSUSED*/
+void
+blobSetFilename(blob *b, const char *dir, const char *filename)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+ assert(filename != NULL);
+
+ cli_dbgmsg("blobSetFilename: %s\n", filename);
+
+ if(b->name)
+ free(b->name);
+
+ b->name = cli_strdup(filename);
+
+ if(b->name)
+ sanitiseName(b->name);
+}
+
+static const char *
+blobGetFilename(const blob *b)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ return b->name;
+}
+
+/*
+ * Returns <0 for failure
+ */
+int
+blobAddData(blob *b, const unsigned char *data, size_t len)
+{
+#ifdef HAVE_GETPAGESIZE
+ static int pagesize;
+ int growth;
+#endif
+
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+ assert(data != NULL);
+
+ if(len == 0)
+ return 0;
+
+ if(b->isClosed) {
+ /*
+ * Should be cli_dbgmsg, but I want to see them for now,
+ * and cli_dbgmsg doesn't support debug levels
+ */
+ cli_warnmsg("Reopening closed blob\n");
+ b->isClosed = 0;
+ }
+ /*
+ * The payoff here is between reducing the number of calls to
+ * malloc/realloc and not overallocating memory. A lot of machines
+ * are more tight with memory than one may imagine which is why
+ * we don't just allocate a *huge* amount and be done with it. Closing
+ * the blob helps because that reclaims memory. If you know the maximum
+ * size of a blob before you start adding data, use blobGrow() that's
+ * the most optimum
+ */
+#ifdef HAVE_GETPAGESIZE
+ if(pagesize == 0) {
+ pagesize = getpagesize();
+ if(pagesize == 0)
+ pagesize = 4096;
+ }
+ growth = pagesize;
+ if(len >= (size_t)pagesize)
+ growth = ((len / pagesize) + 1) * pagesize;
+
+ /*cli_dbgmsg("blobGrow: b->size %lu, b->len %lu, len %lu, growth = %u\n",
+ b->size, b->len, len, growth);*/
+
+ if(b->data == NULL) {
+ assert(b->len == 0);
+ assert(b->size == 0);
+
+ b->size = growth;
+ b->data = cli_malloc(growth);
+ } else if(b->size < b->len + (off_t)len) {
+ unsigned char *p = cli_realloc(b->data, b->size + growth);
+
+ if(p == NULL)
+ return -1;
+
+ b->size += growth;
+ b->data = p;
+ }
+#else
+ if(b->data == NULL) {
+ assert(b->len == 0);
+ assert(b->size == 0);
+
+ b->size = (off_t)len * 4;
+ b->data = cli_malloc(b->size);
+ } else if(b->size < b->len + (off_t)len) {
+ unsigned char *p = cli_realloc(b->data, b->size + (len * 4));
+
+ if(p == NULL)
+ return -1;
+
+ b->size += (off_t)len * 4;
+ b->data = p;
+ }
+#endif
+
+ if(b->data) {
+ memcpy(&b->data[b->len], data, len);
+ b->len += (off_t)len;
+ }
+ return 0;
+}
+
+unsigned char *
+blobGetData(const blob *b)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ if(b->len == 0)
+ return NULL;
+ return b->data;
+}
+
+size_t
+blobGetDataSize(const blob *b)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ return b->len;
+}
+
+void
+blobClose(blob *b)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ if(b->isClosed) {
+ cli_warnmsg("Attempt to close a previously closed blob\n");
+ return;
+ }
+
+ /*
+ * Nothing more is going to be added to this blob. If it'll save more
+ * than a trivial amount (say 64 bytes) of memory, shrink the allocation
+ */
+ if((b->size - b->len) >= 64) {
+ if(b->len == 0) { /* Not likely */
+ free(b->data);
+ b->data = NULL;
+ cli_dbgmsg("blobClose: recovered all %lu bytes\n",
+ (unsigned long)b->size);
+ b->size = 0;
+ } else {
+ unsigned char *ptr = cli_realloc(b->data, b->len);
+
+ if(ptr == NULL)
+ return;
+
+ cli_dbgmsg("blobClose: recovered %lu bytes from %lu\n",
+ (unsigned long)(b->size - b->len),
+ (unsigned long)b->size);
+ b->size = b->len;
+ b->data = ptr;
+ }
+ }
+ b->isClosed = 1;
+}
+
+/*
+ * Returns 0 if the blobs are the same
+ */
+int
+blobcmp(const blob *b1, const blob *b2)
+{
+ size_t s1, s2;
+
+ assert(b1 != NULL);
+ assert(b2 != NULL);
+
+ if(b1 == b2)
+ return 0;
+
+ s1 = blobGetDataSize(b1);
+ s2 = blobGetDataSize(b2);
+
+ if(s1 != s2)
+ return 1;
+
+ if((s1 == 0) && (s2 == 0))
+ return 0;
+
+ return memcmp(blobGetData(b1), blobGetData(b2), s1);
+}
+
+/*
+ * Return clamav return code
+ */
+int
+blobGrow(blob *b, size_t len)
+{
+ assert(b != NULL);
+ assert(b->magic == BLOBCLASS);
+
+ if(len == 0)
+ return CL_SUCCESS;
+
+ if(b->isClosed) {
+ /*
+ * Should be cli_dbgmsg, but I want to see them for now,
+ * and cli_dbgmsg doesn't support debug levels
+ */
+ cli_warnmsg("Growing closed blob\n");
+ b->isClosed = 0;
+ }
+ if(b->data == NULL) {
+ assert(b->len == 0);
+ assert(b->size == 0);
+
+ b->data = cli_malloc(len);
+ if(b->data)
+ b->size = (off_t)len;
+ } else {
+ unsigned char *ptr = cli_realloc(b->data, b->size + len);
+
+ if(ptr) {
+ b->size += (off_t)len;
+ b->data = ptr;
+ }
+ }
+
+ return (b->data) ? CL_SUCCESS : CL_EMEM;
+}
+
+fileblob *
+fileblobCreate(void)
+{
+#ifdef CL_DEBUG
+ fileblob *fb = (fileblob *)cli_calloc(1, sizeof(fileblob));
+ if(fb)
+ fb->b.magic = BLOBCLASS;
+ cli_dbgmsg("blobCreate\n");
+ return fb;
+#else
+ return (fileblob *)cli_calloc(1, sizeof(fileblob));
+#endif
+}
+
+/*
+ * Returns CL_CLEAN or CL_VIRUS. Destroys the fileblob and removes the file
+ * if possible
+ */
+int
+fileblobScanAndDestroy(fileblob *fb)
+{
+ if(cli_leavetemps_flag) {
+ /* Can't remove the file, the caller must scan */
+ fileblobDestroy(fb);
+ return CL_CLEAN;
+ }
+
+ switch(fileblobScan(fb)) {
+ case CL_VIRUS:
+ fileblobDestructiveDestroy(fb);
+ return CL_VIRUS;
+ case CL_BREAK:
+ fileblobDestructiveDestroy(fb);
+ return CL_CLEAN;
+ default:
+ fileblobDestroy(fb);
+ return CL_CLEAN;
+ }
+}
+
+/*
+ * Destroy the fileblob, and remove the file associated with it
+ */
+void
+fileblobDestructiveDestroy(fileblob *fb)
+{
+ if(fb->fp && fb->fullname) {
+ fclose(fb->fp);
+ cli_dbgmsg("fileblobDestructiveDestroy: %s\n", fb->fullname);
+ if(unlink(fb->fullname) < 0)
+ cli_warnmsg("fileblobDestructiveDestroy: Can't delete file %s\n", fb->fullname);
+ free(fb->fullname);
+ fb->fp = NULL;
+ fb->fullname = NULL;
+ }
+ if(fb->b.name) {
+ free(fb->b.name);
+ fb->b.name = NULL;
+ }
+ fileblobDestroy(fb);
+}
+
+/*
+ * Destroy the fileblob, and remove the file associated with it if that file is
+ * empty
+ */
+void
+fileblobDestroy(fileblob *fb)
+{
+ assert(fb != NULL);
+ assert(fb->b.magic == BLOBCLASS);
+
+ if(fb->b.name && fb->fp) {
+ fclose(fb->fp);
+ if(fb->fullname) {
+ cli_dbgmsg("fileblobDestroy: %s\n", fb->fullname);
+ if(!fb->isNotEmpty) {
+ cli_dbgmsg("fileblobDestroy: not saving empty file\n");
+ if(unlink(fb->fullname) < 0)
+ cli_warnmsg("fileblobDestroy: Can't delete empty file %s\n", fb->fullname);
+ }
+ }
+ free(fb->b.name);
+
+ assert(fb->b.data == NULL);
+ } else if(fb->b.data) {
+ free(fb->b.data);
+ if(fb->b.name) {
+ cli_errmsg("fileblobDestroy: %s not saved: report to http://bugs.clamav.net\n",
+ (fb->fullname) ? fb->fullname : fb->b.name);
+ free(fb->b.name);
+ } else
+ cli_errmsg("fileblobDestroy: file not saved (%lu bytes): report to http://bugs.clamav.net\n",
+ (unsigned long)fb->b.len);
+ }
+ if(fb->fullname)
+ free(fb->fullname);
+#ifdef CL_DEBUG
+ fb->b.magic = INVALIDCLASS;
+#endif
+ free(fb);
+}
+
+void
+fileblobSetFilename(fileblob *fb, const char *dir, const char *filename)
+{
+ int fd;
+ char fullname[NAME_MAX + 1];
+
+ if(fb->b.name)
+ return;
+
+ assert(filename != NULL);
+ assert(dir != NULL);
+
+ blobSetFilename(&fb->b, dir, filename);
+
+ /*
+ * Reload the filename, it may be different from the one we've
+ * asked for, e.g. '/'s taken out
+ */
+ filename = blobGetFilename(&fb->b);
+
+ assert(filename != NULL);
+
+#ifdef C_QNX6
+ /*
+ * QNX6 support from mikep at kaluga.org to fix bug where mkstemp
+ * can return ETOOLONG even when the file name isn't too long
+ */
+ snprintf(fullname, sizeof(fullname), "%s/clamavtmpXXXXXXXXXXXXX", dir);
+#elif defined(C_WINDOWS)
+ sprintf_s(fullname, sizeof(fullname) - 1, "%s\\%.*sXXXXXX", dir,
+ (int)(sizeof(fullname) - 9 - strlen(dir)), filename);
+#else
+ sprintf(fullname, "%s/%.*sXXXXXX", dir,
+ (int)(sizeof(fullname) - 9 - strlen(dir)), filename);
+#endif
+
+#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN) || defined(C_QNX6)
+ cli_dbgmsg("fileblobSetFilename: mkstemp(%s)\n", fullname);
+ fd = mkstemp(fullname);
+ if((fd < 0) && (errno == EINVAL)) {
+ /*
+ * This happens with some Linux flavours when (mis)handling
+ * filenames with foreign characters
+ */
+ snprintf(fullname, sizeof(fullname), "%s/clamavtmpXXXXXXXXXXXXX", dir);
+ cli_dbgmsg("fileblobSetFilename: retry as mkstemp(%s)\n", fullname);
+ fd = mkstemp(fullname);
+ }
+#elif defined(C_WINDOWS)
+ cli_dbgmsg("fileblobSetFilename: _mktemp_s(%s)\n", fullname);
+ if(_mktemp_s(fullname, strlen(fullname) + 1) != 0) {
+ char *name;
+
+ /* _mktemp_s only allows 26 files */
+ cli_dbgmsg("fileblobSetFilename: _mktemp_s(%s) failed: %s\n", fullname, strerror(errno));
+ name = cli_gentemp(dir);
+ if(name == NULL)
+ return;
+ fd = open(name, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+ if(fd >= 0)
+ strncpy(fullname, name, sizeof(fullname) - 1);
+ free(name);
+ } else
+ fd = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#else
+ cli_dbgmsg("fileblobSetFilename: mktemp(%s)\n", fullname);
+ (void)mktemp(fullname);
+ fd = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#endif
+
+ if(fd < 0) {
+ cli_errmsg("Can't create temporary file %s: %s\n", fullname, strerror(errno));
+ cli_dbgmsg("%lu %lu\n", (unsigned long)sizeof(fullname),
+ (unsigned long)strlen(fullname));
+ return;
+ }
+
+ cli_dbgmsg("Creating %s\n", fullname);
+
+ fb->fp = fdopen(fd, "wb");
+
+ if(fb->fp == NULL) {
+ cli_errmsg("Can't create file %s: %s\n", fullname, strerror(errno));
+ cli_dbgmsg("%lu %lu\n", (unsigned long)sizeof(fullname),
+ (unsigned long)strlen(fullname));
+ close(fd);
+
+ return;
+ }
+ if(fb->b.data)
+ if(fileblobAddData(fb, fb->b.data, fb->b.len) == 0) {
+ free(fb->b.data);
+ fb->b.data = NULL;
+ fb->b.len = fb->b.size = 0;
+ fb->isNotEmpty = 1;
+ }
+
+ /*
+ * If this strdup fails, then if the file is empty it won't be removed
+ * until later. Since this is only a trivial issue, there is no need
+ * to error if it fails to allocate
+ */
+ fb->fullname = cli_strdup(fullname);
+}
+
+int
+fileblobAddData(fileblob *fb, const unsigned char *data, size_t len)
+{
+ if(len == 0)
+ return 0;
+
+ assert(data != NULL);
+
+ if(fb->fp) {
+#if defined(MAX_SCAN_SIZE) && (MAX_SCAN_SIZE > 0)
+ const cli_ctx *ctx = fb->ctx;
+
+ if(fb->isInfected) /* pretend all was written */
+ return 0;
+ if(ctx) {
+ int do_scan = 1;
+
+ if(ctx->limits)
+ if(fb->bytes_scanned >= ctx->limits->maxfilesize)
+ do_scan = 0;
+
+ if(fb->bytes_scanned > MAX_SCAN_SIZE)
+ do_scan = 0;
+ if(do_scan) {
+ if(ctx->scanned)
+ *ctx->scanned += (unsigned long)len / CL_COUNT_PRECISION;
+ fb->bytes_scanned += (unsigned long)len;
+
+ if((len > 5) && (cli_scanbuff(data, (unsigned int)len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS)) {
+ cli_dbgmsg("fileblobAddData: found %s\n", *ctx->virname);
+ fb->isInfected = 1;
+ }
+ }
+ }
+#endif
+
+ if(fwrite(data, len, 1, fb->fp) != 1) {
+ cli_errmsg("fileblobAddData: Can't write %lu bytes to temporary file %s: %s\n",
+ (unsigned long)len, fb->b.name, strerror(errno));
+ return -1;
+ }
+ fb->isNotEmpty = 1;
+ return 0;
+ }
+ return blobAddData(&(fb->b), data, len);
+}
+
+const char *
+fileblobGetFilename(const fileblob *fb)
+{
+ return blobGetFilename(&(fb->b));
+}
+
+void
+fileblobSetCTX(fileblob *fb, cli_ctx *ctx)
+{
+ fb->ctx = ctx;
+}
+
+/*
+ * Performs a full scan on the fileblob, returning ClamAV status:
+ * CL_BREAK means clean
+ * CL_CLEAN means unknown
+ * CL_VIRUS means infected
+ */
+int
+fileblobScan(const fileblob *fb)
+{
+#ifndef C_WINDOWS
+ int rc, fd;
+#endif
+
+ if(fb->isInfected)
+ return CL_VIRUS;
+ if(fb->fullname == NULL) {
+ /* shouldn't happen, scan called before fileblobSetFilename */
+ cli_warnmsg("fileblobScan, fullname == NULL\n");
+ return CL_ENULLARG; /* there is no CL_UNKNOWN */
+ }
+ if(fb->ctx == NULL) {
+ /* fileblobSetCTX hasn't been called */
+ cli_dbgmsg("fileblobScan, ctx == NULL\n");
+ return CL_CLEAN; /* there is no CL_UNKNOWN */
+ }
+#ifndef C_WINDOWS
+ /*
+ * FIXME: On Windows, cli_readn gives "bad file descriptor" when called
+ * by cli_check_mydoom_log from the call to cli_magic_scandesc here
+ * which implies that the file descriptor is getting closed somewhere,
+ * but I can't see where.
+ * One possible fix would be to duplicate cli_scanfile here.
+ */
+ fflush(fb->fp);
+ fd = dup(fileno(fb->fp));
+ if(fd == -1) {
+ cli_warnmsg("%s: dup failed\n", fb->fullname);
+ return CL_CLEAN;
+ }
+ /* cli_scanfile is static :-( */
+ /*if(cli_scanfile(fb->fullname, fb->ctx) == CL_VIRUS) {
+ cli_dbgmsg("%s is infected\n", fb->fullname);
+ return CL_VIRUS;
+ }*/
+
+ rc = cli_magic_scandesc(fd, fb->ctx);
+ close(fd);
+
+ if(rc == CL_VIRUS) {
+ cli_dbgmsg("%s is infected\n", fb->fullname);
+ return CL_VIRUS;
+ }
+ cli_dbgmsg("%s is clean\n", fb->fullname);
+ return CL_BREAK;
+#else /*C_WINDOWS*/
+ /* Ensure that the file is saved and scanned */
+ return CL_CLEAN; /* there is no CL_UNKNOWN :-( */
+#endif /*C_WINDOWS*/
+}
+
+/*
+ * Doesn't perform a full scan just lets the caller know if something suspicious has
+ * been seen yet
+ */
+int
+fileblobInfected(const fileblob *fb)
+{
+ return fb->isInfected;
+}
+
+/*
+ * Different operating systems allow different characters in their filenames
+ * FIXME: What does QNX want? There is no #ifdef C_QNX, but if there were
+ * it may be best to treat it like MSDOS
+ */
+void
+sanitiseName(char *name)
+{
+ while(*name) {
+#ifdef C_DARWIN
+ *name &= '\177';
+#endif
+ /* Also check for tab - "Heinz Martin" <Martin at hemag.ch> */
+#if defined(MSDOS) || defined(C_OS2)
+ /*
+ * Don't take it from this that ClamAV supports DOS, it doesn't
+ * I don't know if spaces are legal in OS/2.
+ */
+ if(strchr("%/*?<>|\\\"+=,;:\t ~", *name))
+#elif defined(C_CYGWIN) || defined(C_WINDOWS)
+ if(strchr("%/*?<>|\\\"+=,;:\t~", *name))
+#else
+ if(*name == '/')
+#endif
+ *name = '_';
+ name++;
+ }
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cab.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,711 @@
+/*
+ * Copyright (C) 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This code is based on the work of Stuart Caie and the official
+ * specification.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <fcntl.h>
+
+#include "cltypes.h"
+#include "others.h"
+#include "mspack.h"
+#include "cab.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x)
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+/* hard limits */
+#define CAB_FOLDER_LIMIT 5000
+#define CAB_FILE_LIMIT 5000
+
+/* Cabinet format data structures */
+
+struct cab_hdr {
+ uint32_t signature; /* file signature */
+ uint32_t res1; /* reserved */
+ uint32_t cbCabinet; /* size of cabinet file */
+ uint32_t res2; /* reserved */
+ uint32_t coffFiles; /* offset of the first file entry */
+ uint32_t res3; /* reserved */
+ uint8_t versionMinor; /* file format version, minor */
+ uint8_t versionMajor; /* file format version, major */
+ uint16_t cFolders; /* number of folder entries */
+ uint16_t cFiles; /* number of file entries */
+ uint16_t flags; /* option flags */
+ uint16_t setID; /* multiple cabs related */
+ uint16_t iCabinet; /* multiple cabs related */
+};
+
+struct cab_hdr_opt {
+ uint16_t cbCFHeader; /* size of reserved header area */
+ uint8_t cbCFFolder; /* size of reserved folder area */
+ uint8_t cbCFData; /* size of reserved block area */
+};
+
+struct cab_folder_hdr
+{
+ uint32_t coffCabStart; /* offset of the first data block */
+ uint16_t cCFData; /* number of data blocks */
+ uint16_t typeCompress; /* compression type */
+};
+
+struct cab_file_hdr
+{
+ uint32_t cbFile; /* uncompressed size */
+ uint32_t uoffFolderStart; /* uncompressed offset of file in folder */
+ uint16_t iFolder; /* folder index */
+ uint16_t date; /* date stamp */
+ uint16_t time; /* time stamp */
+ uint16_t attribs; /* attribute flags */
+};
+
+struct cab_block_hdr
+{
+ uint32_t csum; /* data block checksum */
+ uint16_t cbData; /* number of compressed bytes */
+ uint16_t cbUncomp; /* number of uncompressed bytes */
+};
+
+static char *cab_readstr(int fd, int *ret)
+{
+ int i, bread, found = 0;
+ char buff[256], *str;
+ off_t pos;
+
+
+ if((pos = lseek(fd, 0, SEEK_CUR)) == -1) {
+ *ret = CL_EIO;
+ return NULL;
+ }
+
+ bread = read(fd, buff, sizeof(buff));
+ for(i = 0; i < bread; i++) {
+ if(!buff[i]) {
+ found = 1;
+ break;
+ }
+ }
+
+ if(!found) {
+ *ret = CL_EFORMAT;
+ return NULL;
+ }
+
+ if(lseek(fd, (off_t) (pos + i + 1), SEEK_SET) == -1) {
+ *ret = CL_EIO;
+ return NULL;
+ }
+
+ if(!(str = cli_strdup(buff))) {
+ *ret = CL_EMEM;
+ return NULL;
+ }
+
+ *ret = CL_SUCCESS;
+ return str;
+}
+
+static int cab_chkname(const char *name)
+{
+ size_t i, len = strlen(name);
+
+
+ for(i = 0; i < len; i++) {
+ if(strchr("%/*?|\\\"+=<>;:\t ", name[i]) || !isascii(name[i])) {
+ cli_dbgmsg("cab_chkname: File name contains disallowed characters\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void cab_free(struct cab_archive *cab)
+{
+ struct cab_folder *folder;
+ struct cab_file *file;
+
+
+ while(cab->folders) {
+ folder = cab->folders;
+ cab->folders = cab->folders->next;
+ free(folder);
+ }
+
+ while(cab->files) {
+ file = cab->files;
+ cab->files = cab->files->next;
+ free(file->name);
+ free(file);
+ }
+}
+
+int cab_open(int fd, off_t offset, struct cab_archive *cab)
+{
+ unsigned int i, bscore = 0, badname = 0;
+ struct cab_file *file, *lfile = NULL;
+ struct cab_folder *folder, *lfolder = NULL;
+ struct cab_hdr hdr;
+ struct cab_hdr_opt hdr_opt;
+ struct cab_folder_hdr folder_hdr;
+ struct cab_file_hdr file_hdr;
+ struct stat sb;
+ uint16_t fidx;
+ char *pt;
+ int ret;
+ off_t resfold = 0, rsize;
+
+
+ if(lseek(fd, offset, SEEK_SET) == -1) {
+ cli_errmsg("cab_open: Can't lseek to %u (offset)\n", (unsigned int) offset);
+ return CL_EIO;
+ }
+
+ if(cli_readn(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) {
+ cli_dbgmsg("cab_open: Can't read cabinet header\n");
+ return CL_EIO;
+ }
+
+ if(EC32(hdr.signature) != 0x4643534d) {
+ cli_dbgmsg("cab_open: Incorrect CAB signature\n");
+ return CL_EFORMAT;
+ } else {
+ cli_dbgmsg("CAB: -------------- Cabinet file ----------------\n");
+ }
+
+ if(fstat(fd, &sb) == -1) {
+ cli_errmsg("cab_open: Can't fstat descriptor %d\n", fd);
+ return CL_EIO;
+ }
+ rsize = sb.st_size;
+
+ memset(cab, 0, sizeof(struct cab_archive));
+
+ cab->length = EC32(hdr.cbCabinet);
+ cli_dbgmsg("CAB: Cabinet length: %u\n", cab->length);
+ if((off_t) cab->length > rsize)
+ bscore++;
+
+ cab->nfolders = EC16(hdr.cFolders);
+ if(!cab->nfolders) {
+ cli_dbgmsg("cab_open: No folders in cabinet (fake cab?)\n");
+ return CL_EFORMAT;
+ } else {
+ cli_dbgmsg("CAB: Folders: %u\n", cab->nfolders);
+ if(cab->nfolders > CAB_FOLDER_LIMIT) {
+ cab->nfolders = CAB_FOLDER_LIMIT;
+ cli_dbgmsg("CAB: *** Number of folders limited to %u ***\n", cab->nfolders);
+ bscore++;
+ }
+ }
+
+ cab->nfiles = EC16(hdr.cFiles);
+ if(!cab->nfiles) {
+ cli_dbgmsg("cab_open: No files in cabinet (fake cab?)\n");
+ return CL_EFORMAT;
+ } else {
+ cli_dbgmsg("CAB: Files: %u\n", cab->nfiles);
+ if(cab->nfiles > CAB_FILE_LIMIT) {
+ cab->nfiles = CAB_FILE_LIMIT;
+ cli_dbgmsg("CAB: *** Number of files limited to %u ***\n", cab->nfiles);
+ bscore++;
+ }
+ }
+
+ cli_dbgmsg("CAB: File format version: %u.%u\n", hdr.versionMajor, hdr.versionMinor);
+ if(hdr.versionMajor != 1 || hdr.versionMinor != 3)
+ bscore++;
+
+ cab->flags = EC16(hdr.flags);
+ if(cab->flags & 0x0004) {
+ if(cli_readn(fd, &hdr_opt, sizeof(hdr_opt)) != sizeof(hdr_opt)) {
+ cli_dbgmsg("cab_open: Can't read file header (fake cab?)\n");
+ return CL_EIO;
+ }
+
+ cab->reshdr = EC16(hdr_opt.cbCFHeader);
+ resfold = hdr_opt.cbCFFolder;
+ cab->resdata = hdr_opt.cbCFData;
+
+ if(cab->reshdr) {
+ if(lseek(fd, cab->reshdr, SEEK_CUR) == -1) {
+ cli_dbgmsg("cab_open: Can't lseek to %u (fake cab?)\n", cab->reshdr);
+ return CL_EIO;
+ }
+ }
+ }
+
+ if(cab->flags & 0x0001) { /* preceeding cabinet */
+ /* name */
+ pt = cab_readstr(fd, &ret);
+ if(ret)
+ return ret;
+ if(cab_chkname(pt))
+ badname = 1;
+ else
+ cli_dbgmsg("CAB: Preceeding cabinet name: %s\n", pt);
+ free(pt);
+ /* info */
+ pt = cab_readstr(fd, &ret);
+ if(ret)
+ return ret;
+ if(cab_chkname(pt))
+ badname = 1;
+ else
+ cli_dbgmsg("CAB: Preceeding cabinet info: %s\n", pt);
+ free(pt);
+ }
+
+ if(cab->flags & 0x0002) { /* next cabinet */
+ /* name */
+ pt = cab_readstr(fd, &ret);
+ if(ret)
+ return ret;
+ if(cab_chkname(pt))
+ badname = 1;
+ else
+ cli_dbgmsg("CAB: Next cabinet name: %s\n", pt);
+ free(pt);
+ /* info */
+ pt = cab_readstr(fd, &ret);
+ if(ret)
+ return ret;
+ if(cab_chkname(pt))
+ badname = 1;
+ else
+ cli_dbgmsg("CAB: Next cabinet info: %s\n", pt);
+ free(pt);
+ }
+ bscore += badname;
+
+ if(bscore >= 4) {
+ cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+ return CL_EFORMAT;
+ }
+
+ /* folders */
+ for(i = 0; i < cab->nfolders; i++) {
+ if(cli_readn(fd, &folder_hdr, sizeof(folder_hdr)) != sizeof(folder_hdr)) {
+ cli_errmsg("cab_open: Can't read header for folder %u\n", i);
+ cab_free(cab);
+ return CL_EIO;
+ }
+
+ if(resfold) {
+ if(lseek(fd, resfold, SEEK_CUR) == -1) {
+ cli_errmsg("cab_open: Can't lseek to %u (resfold)\n", (unsigned int) resfold);
+ cab_free(cab);
+ return CL_EIO;
+ }
+ }
+
+ folder = (struct cab_folder *) cli_calloc(1, sizeof(struct cab_folder));
+ if(!folder) {
+ cli_errmsg("cab_open: Can't allocate memory for folder\n");
+ cab_free(cab);
+ return CL_EMEM;
+ }
+
+ folder->cab = (struct cab_archive *) cab;
+ folder->offset = (off_t) EC32(folder_hdr.coffCabStart) + offset;
+ if(folder->offset > rsize)
+ bscore++;
+ folder->nblocks = EC16(folder_hdr.cCFData);
+ folder->cmethod = EC16(folder_hdr.typeCompress);
+
+ cli_dbgmsg("CAB: Folder record %u\n", i);
+ cli_dbgmsg("CAB: Folder offset: %u\n", (unsigned int) folder->offset);
+ cli_dbgmsg("CAB: Folder compression method: %d\n", folder->cmethod);
+ if((folder->cmethod & 0x000f) > 3)
+ bscore++;
+
+ if(!lfolder)
+ cab->folders = folder;
+ else
+ lfolder->next = folder;
+
+ lfolder = folder;
+
+ if(bscore > 10) {
+ cab_free(cab);
+ cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+ return CL_EFORMAT;
+ }
+ }
+
+ /* files */
+ for(i = 0; i < cab->nfiles; i++) {
+ if(bscore > 10) {
+ cab_free(cab);
+ cli_dbgmsg("CAB: bscore == %u, most likely a fake cabinet\n", bscore);
+ return CL_EFORMAT;
+ }
+
+ if(cli_readn(fd, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+ cli_errmsg("cab_open: Can't read file %u header\n", i);
+ cab_free(cab);
+ return CL_EIO;
+ }
+
+ file = (struct cab_file *) cli_calloc(1, sizeof(struct cab_file));
+ if(!file) {
+ cli_errmsg("cab_open: Can't allocate memory for file\n");
+ cab_free(cab);
+ return CL_EMEM;
+ }
+
+ file->cab = cab;
+ file->fd = fd;
+ file->length = EC32(file_hdr.cbFile);
+ file->offset = EC32(file_hdr.uoffFolderStart);
+ file->attribs = EC32(file_hdr.attribs);
+ fidx = EC32(file_hdr.iFolder);
+
+ file->name = cab_readstr(fd, &ret);
+ if(ret) {
+ free(file);
+ cab_free(cab);
+ return ret;
+ }
+
+ cli_dbgmsg("CAB: File record %u\n", i);
+ cli_dbgmsg("CAB: File name: %s\n", file->name);
+ cli_dbgmsg("CAB: File offset: %u\n", (unsigned int) file->offset);
+ cli_dbgmsg("CAB: File folder index: %u\n", fidx);
+ cli_dbgmsg("CAB: File attribs: 0x%x\n", file->attribs);
+ if(file->attribs & 0x01)
+ cli_dbgmsg("CAB: * file is read-only\n");
+ if(file->attribs & 0x02)
+ cli_dbgmsg("CAB: * file is hidden\n");
+ if(file->attribs & 0x04)
+ cli_dbgmsg("CAB: * file is a system file\n");
+ if(file->attribs & 0x20)
+ cli_dbgmsg("CAB: * file modified since last backup\n");
+ if(file->attribs & 0x40)
+ cli_dbgmsg("CAB: * file to be run after extraction\n");
+ if(file->attribs & 0x80)
+ cli_dbgmsg("CAB: * file name contains UTF\n");
+
+ /* folder index */
+ if(fidx < 0xfffd) {
+ if(fidx > cab->nfolders) {
+ if(bscore < 3)
+ cli_dbgmsg("cab_open: File %s is not associated with any folder\n", file->name);
+ bscore++;
+ free(file->name);
+ free(file);
+ continue;
+ }
+
+ file->folder = cab->folders;
+ while(file->folder && fidx--)
+ file->folder = file->folder->next;
+
+ if(!file->folder) {
+ cli_errmsg("cab_open: Folder not found for file %s\n", file->name);
+ free(file->name);
+ free(file);
+ cab_free(cab);
+ return CL_EFORMAT;
+ }
+
+ } else {
+ cli_dbgmsg("CAB: File is split *skipping*\n");
+ free(file->name);
+ free(file);
+ continue;
+ }
+
+ if(!lfile)
+ cab->files = file;
+ else
+ lfile->next = file;
+
+ lfile = file;
+
+ }
+
+ return CL_SUCCESS;
+}
+
+static int cab_read_block(int fd, struct cab_state *state, uint16_t resdata)
+{
+ struct cab_block_hdr block_hdr;
+
+
+ if(cli_readn(fd, &block_hdr, sizeof(block_hdr)) != sizeof(block_hdr)) {
+ cli_dbgmsg("cab_read_block: Can't read block header\n");
+ return CL_EIO;
+ }
+
+ if(resdata && lseek(fd, (off_t) resdata, SEEK_CUR) == -1) {
+ cli_dbgmsg("cab_read_block: lseek failed\n");
+ return CL_EIO;
+ }
+
+ state->blklen = EC16(block_hdr.cbData);
+ if(state->blklen > CAB_INPUTMAX) {
+ cli_dbgmsg("cab_read_block: block size > CAB_INPUTMAX\n");
+ return CL_EFORMAT;
+ }
+
+ state->outlen = EC16(block_hdr.cbUncomp);
+
+ if(state->outlen > CAB_BLOCKMAX) {
+ cli_dbgmsg("cab_read_block: output size > CAB_BLOCKMAX\n");
+ return CL_EFORMAT;
+ }
+
+ if(cli_readn(fd, state->block, state->blklen) != state->blklen) {
+ cli_dbgmsg("cab_read_block: Can't read block data\n");
+ return CL_EIO;
+ }
+
+ state->pt = state->end = state->block;
+ state->end += state->blklen;
+
+ return CL_SUCCESS;
+}
+
+static int cab_read(struct cab_file *file, unsigned char *buffer, int bytes)
+{
+ uint16_t todo, left;
+
+
+ todo = bytes;
+ while(todo > 0) {
+ left = file->state->end - file->state->pt;
+
+ if(left) {
+ if(left > todo)
+ left = todo;
+
+ memcpy(buffer, file->state->pt, left);
+ file->state->pt += left;
+ buffer += left;
+ todo -= left;
+
+ } else {
+ if(file->state->blknum++ >= file->folder->nblocks) {
+ file->error = CL_EFORMAT;
+ break;
+ }
+
+ file->error = cab_read_block(file->fd, file->state, file->cab->resdata);
+ if(file->error)
+ return -1;
+
+ if((file->folder->cmethod & 0x000f) == 0x0002) /* Quantum hack */
+ *file->state->end++ = 0xff;
+
+ if(file->state->blknum >= file->folder->nblocks) {
+ if((file->folder->cmethod & 0x000f) == 0x0003) { /* LZX hack */
+ lzx_set_output_length(file->state->stream, (off_t) ((file->state->blknum - 1) * CAB_BLOCKMAX + file->state->outlen));
+ }
+ } else {
+ if(file->state->outlen != CAB_BLOCKMAX) {
+ cli_dbgmsg("cab_read: WARNING: partial data block\n");
+ }
+ }
+ }
+ }
+
+ return bytes - todo;
+}
+
+static int cab_unstore(struct cab_file *file, int bytes, uint8_t wflag)
+{
+ int todo;
+ unsigned char buff[4096];
+
+
+ if(bytes < 0) {
+ cli_warnmsg("cab_unstore: bytes < 0\n");
+ return CL_EFORMAT;
+ }
+
+ todo = bytes;
+
+ while(1) {
+
+ if((unsigned int) todo <= sizeof(buff)) {
+ if(cab_read(file, buff, todo) == -1) {
+ cli_dbgmsg("cab_unstore: cab_read failed for descriptor %d\n", file->fd);
+ return CL_EIO;
+ } else if(wflag && cli_writen(file->ofd, buff, todo) == -1) {
+ cli_dbgmsg("cab_unstore: Can't write to descriptor %d\n", file->ofd);
+ return CL_EIO;
+ }
+ break;
+
+ } else {
+ if(cab_read(file, buff, sizeof(buff)) == -1) {
+ cli_dbgmsg("cab_unstore: cab_read failed for descriptor %d\n", file->fd);
+ return CL_EIO;
+ } else if(wflag && cli_writen(file->ofd, buff, sizeof(buff)) == -1) {
+ cli_dbgmsg("cab_unstore: Can't write to descriptor %d\n", file->ofd);
+ return CL_EIO;
+ }
+ todo -= sizeof(buff);
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
+int cab_extract(struct cab_file *file, const char *name)
+{
+ struct cab_folder *folder;
+ int ret;
+
+
+ if(!file || !name) {
+ cli_errmsg("cab_extract: !file || !name\n");
+ return CL_ENULLARG;
+ }
+
+ if(!(folder = file->folder)) {
+ cli_errmsg("cab_extract: file->folder == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ if(lseek(file->fd, file->folder->offset, SEEK_SET) == -1) {
+ cli_errmsg("cab_extract: Can't lseek to %u\n", (unsigned int) file->folder->offset);
+ return CL_EIO;
+ }
+
+ file->state = (struct cab_state *) cli_calloc(1, sizeof(struct cab_state));
+ if(!file->state) {
+ cli_errmsg("cab_extract: Can't allocate memory for internal state\n");
+ return CL_EIO;
+ }
+
+ file->ofd = open(name, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+ if(file->ofd == -1) {
+ cli_errmsg("cab_extract: Can't open file %s in write mode\n", name);
+ free(file->state);
+ return CL_EIO;
+ }
+
+ switch(file->folder->cmethod & 0x000f) {
+ case 0x0000: /* STORE */
+ if(file->offset > 0)
+ cab_unstore(file, file->offset, 0);
+
+ ret = cab_unstore(file, file->length, 1);
+ break;
+
+ case 0x0001: /* MSZIP */
+ cli_dbgmsg("CAB: Compression method: MSZIP\n");
+ file->state->stream = (struct mszip_stream *) mszip_init(file->fd, file->ofd, 4096, 1, file, &cab_read);
+ if(!file->state->stream) {
+ free(file->state);
+ close(file->ofd);
+ return CL_EMSCAB;
+ }
+ if(file->offset > 0) {
+ ((struct mszip_stream *) file->state->stream)->wflag = 0;
+ ret = mszip_decompress(file->state->stream, file->offset);
+ ((struct mszip_stream *) file->state->stream)->wflag = 1;
+ if(ret < 0) {
+ mszip_free(file->state->stream);
+ memset(file->state, 0, sizeof(struct cab_state));
+ file->state->stream = (struct mszip_stream *) mszip_init(file->fd, file->ofd, 4096, 1, file, &cab_read);
+ if(!file->state->stream) {
+ free(file->state);
+ close(file->ofd);
+ return CL_EMSCAB;
+ }
+ lseek(file->fd, file->folder->offset, SEEK_SET);
+ }
+ }
+ ret = mszip_decompress(file->state->stream, file->length);
+ mszip_free(file->state->stream);
+ break;
+
+ case 0x0002: /* QUANTUM */
+ cli_dbgmsg("CAB: Compression method: QUANTUM\n");
+ file->state->stream = (struct qtm_stream *) qtm_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 4096, file, &cab_read);
+ if(!file->state->stream) {
+ free(file->state);
+ close(file->ofd);
+ return CL_EMSCAB;
+ }
+ if(file->offset > 0) {
+ ((struct qtm_stream *) file->state->stream)->wflag = 0;
+ qtm_decompress(file->state->stream, file->offset);
+ ((struct qtm_stream *) file->state->stream)->wflag = 1;
+ }
+ ret = qtm_decompress(file->state->stream, file->length);
+ qtm_free(file->state->stream);
+ break;
+
+ case 0x0003: /* LZX */
+ cli_dbgmsg("CAB: Compression method: LZX\n");
+ file->state->stream = (struct lzx_stream *) lzx_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 0, 4096, 0, file, &cab_read);
+ if(!file->state->stream) {
+ free(file->state);
+ close(file->ofd);
+ return CL_EMSCAB;
+ }
+ if(file->offset > 0) {
+ ((struct lzx_stream *) file->state->stream)->wflag = 0;
+ ret = lzx_decompress(file->state->stream, file->offset);
+ ((struct lzx_stream *) file->state->stream)->wflag = 1;
+ if(ret < 0) {
+ lzx_free(file->state->stream);
+ memset(file->state, 0, sizeof(struct cab_state));
+ file->state->stream = (struct lzx_stream *) lzx_init(file->fd, file->ofd, (int) (file->folder->cmethod >> 8) & 0x1f, 0, 4096, 0, file, &cab_read);
+ if(!file->state->stream) {
+ free(file->state);
+ close(file->ofd);
+ return CL_EMSCAB;
+ }
+ lseek(file->fd, file->folder->offset, SEEK_SET);
+ }
+ }
+ ret = lzx_decompress(file->state->stream, file->length);
+ lzx_free(file->state->stream);
+ break;
+
+ default:
+ cli_warnmsg("CAB: Not supported compression method: 0x%x\n", file->folder->cmethod & 0x000f);
+ ret = CL_EFORMAT;
+ }
+
+ free(file->state);
+ close(file->ofd);
+
+ return ret;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_chmunpack.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1059 @@
+/*
+ * Extract component parts of MS CHM files
+ *
+ * Copyright (C) 2004-2005 trog at uncon.org
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif /* HAVE_SYS_MMAN_H */
+#endif /* HAVE_MMAP */
+#else/* PACKED */
+#undef HAVE_MMAP
+#endif
+
+#include "others.h"
+#include "mspack.h"
+#include "cltypes.h"
+#include "chmunpack.h"
+
+#ifndef HAVE_ATTRIB_PACKED
+#define __attribute__(x)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack(1)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack 1
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#define CHM_ITSF_MIN_LEN (0x60)
+typedef struct itsf_header_tag
+{
+ unsigned char signature[4] __attribute__ ((packed));
+ int32_t version __attribute__ ((packed));
+ int32_t header_len __attribute__ ((packed));
+ uint32_t unknown __attribute__ ((packed));
+ uint32_t last_modified __attribute__ ((packed));
+ uint32_t lang_id __attribute__ ((packed));
+ unsigned char dir_clsid[16] __attribute__ ((packed));
+ unsigned char stream_clsid[16] __attribute__ ((packed));
+ uint64_t sec0_offset __attribute__ ((packed));
+ uint64_t sec0_len __attribute__ ((packed));
+ uint64_t dir_offset __attribute__ ((packed));
+ uint64_t dir_len __attribute__ ((packed));
+ uint64_t data_offset __attribute__ ((packed));
+} itsf_header_t;
+
+#define CHM_ITSP_LEN (0x54)
+typedef struct itsp_header_tag
+{
+ unsigned char signature[4] __attribute__ ((packed));
+ int32_t version __attribute__ ((packed));
+ int32_t header_len __attribute__ ((packed));
+ int32_t unknown1 __attribute__ ((packed));
+ uint32_t block_len __attribute__ ((packed));
+ int32_t blockidx_intvl __attribute__ ((packed));
+ int32_t index_depth __attribute__ ((packed));
+ int32_t index_root __attribute__ ((packed));
+ int32_t index_head __attribute__ ((packed));
+ int32_t index_tail __attribute__ ((packed));
+ int32_t unknown2 __attribute__ ((packed));
+ uint32_t num_blocks __attribute__ ((packed));
+ uint32_t lang_id __attribute__ ((packed));
+ unsigned char system_clsid[16] __attribute__ ((packed));
+ unsigned char unknown4[16] __attribute__ ((packed));
+} itsp_header_t;
+
+#define CHM_CHUNK_HDR_LEN (0x14)
+typedef struct chunk_header_tag
+{
+ unsigned char signature[4] __attribute__ ((packed));
+ uint32_t free_space __attribute__ ((packed));
+ uint32_t unknown __attribute__ ((packed));
+ int32_t block_prev __attribute__ ((packed));
+ int32_t block_next __attribute__ ((packed));
+ unsigned char *chunk_data;
+ uint16_t num_entries;
+} chunk_header_t;
+
+typedef struct file_list_tag
+{
+ unsigned char *name;
+ uint64_t section;
+ uint64_t offset;
+ uint64_t length;
+ struct file_list_tag *next;
+} file_list_t;
+
+#define CHM_CONTROL_LEN (0x18)
+typedef struct lzx_control_tag {
+ uint32_t length __attribute__ ((packed));
+ unsigned char signature[4] __attribute__ ((packed));
+ uint32_t version __attribute__ ((packed));
+ uint32_t reset_interval __attribute__ ((packed));
+ uint32_t window_size __attribute__ ((packed));
+ uint32_t cache_size __attribute__ ((packed));
+} lzx_control_t;
+
+/* Don't need to include rt_offset in the strucuture len*/
+#define CHM_RESET_TABLE_LEN (0x24)
+typedef struct lzx_reset_table_tag {
+ uint32_t num_entries __attribute__ ((packed));
+ uint32_t entry_size __attribute__ ((packed));
+ uint32_t table_offset __attribute__ ((packed));
+ uint64_t uncom_len __attribute__ ((packed));
+ uint64_t com_len __attribute__ ((packed));
+ uint64_t frame_len __attribute__ ((packed));
+ off_t rt_offset __attribute__ ((packed));
+} lzx_reset_table_t;
+
+typedef struct lzx_content_tag {
+ uint64_t offset;
+ uint64_t length;
+} lzx_content_t;
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack()
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack
+#endif
+
+#define chm_endian_convert_16(x) le16_to_host(x)
+#define chm_endian_convert_32(x) le32_to_host(x)
+#define chm_endian_convert_64(x) le64_to_host(x)
+
+/* Read in a block of data from either the mmap area or the given fd */
+static int chm_read_data(int fd, unsigned char *dest, off_t offset, off_t len,
+ unsigned char *m_area, off_t m_length)
+{
+ if ((offset < 0) || (len < 0) || ((offset+len) < 0)) {
+ return FALSE;
+ }
+ if (m_area != NULL) {
+ if ((offset+len) > m_length) {
+ return FALSE;
+ }
+ memcpy(dest, m_area+offset, len);
+ } else {
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ return FALSE;
+ }
+ if (cli_readn(fd, dest, len) != len) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+static uint64_t chm_copy_file_data(int ifd, int ofd, uint64_t len)
+{
+ unsigned char data[8192];
+ uint64_t count, rem;
+ unsigned int todo;
+
+ rem = len;
+
+ while (rem > 0) {
+ todo = MIN(8192, rem);
+ count = cli_readn(ifd, data, todo);
+ if (count != todo) {
+ return len-rem;
+ }
+ if (cli_writen(ofd, data, count) != (int64_t)count) {
+ return len-rem-count;
+ }
+ rem -= count;
+ }
+ return len;
+}
+
+static void free_file_list(file_list_t *file_l)
+{
+ file_list_t *next;
+
+ while (file_l) {
+ next = file_l->next;
+ if (file_l->name) {
+ free(file_l->name);
+ }
+ free(file_l);
+ file_l = next;
+ }
+}
+
+static void itsf_print_header(itsf_header_t *itsf_hdr)
+{
+ if (!itsf_hdr) {
+ return;
+ }
+
+ cli_dbgmsg("---- ITSF ----\n");
+ cli_dbgmsg("Signature:\t%c%c%c%c\n", itsf_hdr->signature[0],
+ itsf_hdr->signature[1],itsf_hdr->signature[2],itsf_hdr->signature[3]);
+ cli_dbgmsg("Version:\t%d\n", itsf_hdr->version);
+ cli_dbgmsg("Header len:\t%ld\n", itsf_hdr->header_len);
+ cli_dbgmsg("Lang ID:\t%d\n", itsf_hdr->lang_id);
+ cli_dbgmsg("Sec0 offset:\t%llu\n", itsf_hdr->sec0_offset);
+ cli_dbgmsg("Sec0 len:\t%llu\n", itsf_hdr->sec0_len);
+ cli_dbgmsg("Dir offset:\t%llu\n", itsf_hdr->dir_offset);
+ cli_dbgmsg("Dir len:\t%llu\n", itsf_hdr->dir_len);
+ if (itsf_hdr->version > 2) {
+ cli_dbgmsg("Data offset:\t%llu\n\n", itsf_hdr->data_offset);
+ }
+}
+
+static int itsf_read_header(int fd, itsf_header_t *itsf_hdr, unsigned char *m_area, off_t m_length)
+{
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (!chm_read_data(fd, (unsigned char *) itsf_hdr, 0, CHM_ITSF_MIN_LEN,
+ m_area, m_length)) {
+ return FALSE;
+ }
+#else
+ if (cli_readn(fd, &itsf_hdr->signature, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->version, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->header_len, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->unknown, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->last_modified, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->lang_id, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->dir_clsid, 16) != 16) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->stream_clsid, 16) != 16) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->sec0_offset, 8) != 8) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->sec0_len, 8) != 8) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->dir_offset, 8) != 8) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsf_hdr->dir_len, 8) != 8) {
+ return FALSE;
+ }
+ if (itsf_hdr->version > 2) {
+ if (cli_readn(fd, &itsf_hdr->data_offset, 8) != 8) {
+ return FALSE;
+ }
+ }
+#endif
+ if (memcmp(itsf_hdr->signature, "ITSF", 4) != 0) {
+ cli_dbgmsg("ITSF signature mismatch\n");
+ return FALSE;
+ }
+ itsf_hdr->version = chm_endian_convert_32(itsf_hdr->version);
+ itsf_hdr->header_len = chm_endian_convert_32(itsf_hdr->header_len);
+ itsf_hdr->last_modified = chm_endian_convert_32(itsf_hdr->last_modified);
+ itsf_hdr->lang_id = chm_endian_convert_32(itsf_hdr->lang_id);
+ itsf_hdr->sec0_offset = chm_endian_convert_64(itsf_hdr->sec0_offset);
+ itsf_hdr->sec0_len = chm_endian_convert_64(itsf_hdr->sec0_len);
+ itsf_hdr->dir_offset = chm_endian_convert_64(itsf_hdr->dir_offset);
+ itsf_hdr->dir_len = chm_endian_convert_64(itsf_hdr->dir_len);
+ if (itsf_hdr->version > 2) {
+ itsf_hdr->data_offset = chm_endian_convert_64(itsf_hdr->data_offset);
+ }
+ return TRUE;
+}
+
+static void itsp_print_header(itsp_header_t *itsp_hdr)
+{
+ if (!itsp_hdr) {
+ return;
+ }
+
+ cli_dbgmsg("---- ITSP ----\n");
+ cli_dbgmsg("Signature:\t%c%c%c%c\n", itsp_hdr->signature[0],
+ itsp_hdr->signature[1],itsp_hdr->signature[2],itsp_hdr->signature[3]);
+ cli_dbgmsg("Version:\t%d\n", itsp_hdr->version);
+ cli_dbgmsg("Block len:\t%ld\n", itsp_hdr->block_len);
+ cli_dbgmsg("Block idx int:\t%d\n", itsp_hdr->blockidx_intvl);
+ cli_dbgmsg("Index depth:\t%d\n", itsp_hdr->index_depth);
+ cli_dbgmsg("Index root:\t%d\n", itsp_hdr->index_root);
+ cli_dbgmsg("Index head:\t%u\n", itsp_hdr->index_head);
+ cli_dbgmsg("Index tail:\t%u\n", itsp_hdr->index_tail);
+ cli_dbgmsg("Num Blocks:\t%u\n", itsp_hdr->num_blocks);
+ cli_dbgmsg("Lang ID:\t%lu\n\n", itsp_hdr->lang_id);
+}
+
+static int itsp_read_header(int fd, itsp_header_t *itsp_hdr, off_t offset,
+ unsigned char *m_area, off_t m_length)
+{
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (!chm_read_data(fd, (unsigned char *) itsp_hdr, offset, CHM_ITSP_LEN,
+ m_area, m_length)) {
+ return FALSE;
+ }
+#else
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->signature, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->version, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->header_len, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->unknown1, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->block_len, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->blockidx_intvl, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->index_depth, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->index_root, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->index_head, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->index_tail, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->unknown2, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->num_blocks, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->lang_id, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->system_clsid, 16) != 16) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &itsp_hdr->unknown4, 16) != 16) {
+ return FALSE;
+ }
+#endif
+ if (memcmp(itsp_hdr->signature, "ITSP", 4) != 0) {
+ cli_dbgmsg("ITSP signature mismatch\n");
+ return FALSE;
+ }
+
+ itsp_hdr->version = chm_endian_convert_32(itsp_hdr->version);
+ itsp_hdr->header_len = chm_endian_convert_32(itsp_hdr->header_len);
+ itsp_hdr->block_len = chm_endian_convert_32(itsp_hdr->block_len);
+ itsp_hdr->blockidx_intvl = chm_endian_convert_32(itsp_hdr->blockidx_intvl);
+ itsp_hdr->index_depth = chm_endian_convert_32(itsp_hdr->index_depth);
+ itsp_hdr->index_root = chm_endian_convert_32(itsp_hdr->index_root);
+ itsp_hdr->index_head = chm_endian_convert_32(itsp_hdr->index_head);
+ itsp_hdr->index_tail = chm_endian_convert_32(itsp_hdr->index_tail);
+ itsp_hdr->num_blocks = chm_endian_convert_32(itsp_hdr->num_blocks);
+ itsp_hdr->lang_id = chm_endian_convert_32(itsp_hdr->lang_id);
+
+ if ((itsp_hdr->version != 1) || (itsp_hdr->header_len != CHM_ITSP_LEN)) {
+ cli_dbgmsg("ITSP header mismatch\n");
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static uint64_t read_enc_int(unsigned char **start, unsigned char *end)
+{
+ uint64_t retval=0;
+ unsigned char *current;
+
+ current = *start;
+
+ if (current > end) {
+ return 0;
+ }
+
+ do {
+ if (current > end) {
+ return 0;
+ }
+ retval = (retval << 7) | (*current & 0x7f);
+ } while (*current++ & 0x80);
+
+ *start = current;
+ return retval;
+}
+
+/* Read chunk entries */
+/* Note: the file lists end up in reverse order to the order in the chunk */
+static int read_chunk_entries(unsigned char *chunk, uint32_t chunk_len,
+ uint16_t num_entries,
+ file_list_t *file_l, file_list_t *sys_file_l)
+{
+ unsigned char *current, *end;
+ uint64_t name_len;
+ file_list_t *file_e;
+
+ end = chunk + chunk_len;
+ current = chunk + CHM_CHUNK_HDR_LEN;
+
+ while (num_entries--) {
+ if (current > end) {
+ cli_dbgmsg("read chunk entries failed\n");
+ return FALSE;
+ }
+
+ file_e = (file_list_t *) cli_malloc(sizeof(file_list_t));
+ if (!file_e) {
+ return FALSE;
+ }
+ file_e->next = NULL;
+
+ name_len = read_enc_int(¤t, end);
+ if (((current + name_len) > end) || ((current + name_len) < chunk)) {
+ cli_dbgmsg("Bad CHM name_len detected\n");
+ free(file_e);
+ return FALSE;
+ }
+ if (name_len > 0xFFFFFF) {
+ cli_dbgmsg("CHM file name too long: %llu\n", name_len);
+ file_e->name = (unsigned char *) cli_strdup("truncated");
+ if (!file_e->name) {
+ free(file_e);
+ return FALSE;
+ }
+ } else {
+ file_e->name = (unsigned char *) cli_malloc(name_len+1);
+ if (!file_e->name) {
+ free(file_e);
+ return FALSE;
+ }
+ strncpy(file_e->name, current, name_len);
+ file_e->name[name_len] = '\0';
+ }
+ current += name_len;
+ file_e->section = read_enc_int(¤t, end);
+ file_e->offset = read_enc_int(¤t, end);
+ file_e->length = read_enc_int(¤t, end);
+ if ((name_len >= 2) && (file_e->name[0] == ':') &&
+ (file_e->name[1] == ':')) {
+ file_e->next = sys_file_l->next;
+ sys_file_l->next = file_e;
+ } else {
+ file_e->next = file_l->next;
+ file_l->next = file_e;
+ }
+ cli_dbgmsg("Section: %llu Offset: %llu Length: %llu, Name: %s\n",
+ file_e->section, file_e->offset,
+ file_e->length, file_e->name);
+ }
+ return TRUE;
+}
+
+static void print_chunk(chunk_header_t *chunk)
+{
+
+ cli_dbgmsg("---- Chunk ----\n");
+ cli_dbgmsg("Signature:\t%c%c%c%c\n", chunk->signature[0],
+ chunk->signature[1],chunk->signature[2],chunk->signature[3]);
+ cli_dbgmsg("Free Space:\t%u\n", chunk->free_space);
+ if (memcmp(chunk->signature, "PMGL", 4) == 0) {
+ cli_dbgmsg("Prev Block:\t%d\n", chunk->block_prev);
+ cli_dbgmsg("Next Block:\t%d\n", chunk->block_next);
+ cli_dbgmsg("Num entries:\t%d\n\n", chunk->num_entries);
+ }
+ return;
+}
+
+static int read_chunk(int fd, off_t offset, uint32_t chunk_len,
+ unsigned char *m_area, off_t m_length,
+ file_list_t *file_l, file_list_t *sys_file_l)
+{
+ chunk_header_t *chunk_hdr;
+ int retval = FALSE;
+
+ if (chunk_len < 8 || chunk_len > 33554432) {
+ return FALSE;
+ }
+
+ chunk_hdr = (chunk_header_t *) cli_malloc(sizeof(chunk_header_t));
+ if (!chunk_hdr) {
+ return FALSE;
+ }
+
+ chunk_hdr->chunk_data = (unsigned char *) cli_malloc(chunk_len);
+ if (!chunk_hdr->chunk_data) {
+ free(chunk_hdr);
+ return FALSE;
+ }
+
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ /* 8 bytes reads the signature and the free_space */
+ if (!chm_read_data(fd, chunk_hdr->signature, offset, 8,
+ m_area, m_length)) {
+ goto abort;
+ }
+ if (!chm_read_data(fd, chunk_hdr->chunk_data, offset, chunk_len,
+ m_area, m_length)) {
+ goto abort;
+ }
+#else
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ goto abort;
+ }
+ if (cli_readn(fd, chunk_hdr->chunk_data, chunk_len) != chunk_len) {
+ goto abort;
+ }
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ goto abort;
+ }
+ if (cli_readn(fd, &chunk_hdr->signature, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &chunk_hdr->free_space, 4) != 4) {
+ goto abort;
+ }
+#endif
+ chunk_hdr->free_space = chm_endian_convert_32(chunk_hdr->free_space);
+
+ if (memcmp(chunk_hdr->signature, "PMGL", 4) == 0) {
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (!chm_read_data(fd, (unsigned char *) &chunk_hdr->unknown, offset+8, 12,
+ m_area, m_length)) {
+ goto abort;
+ }
+#else
+ if (cli_readn(fd, &chunk_hdr->unknown, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &chunk_hdr->block_next, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &chunk_hdr->block_prev, 4) != 4) {
+ goto abort;
+ }
+#endif
+ chunk_hdr->block_next = chm_endian_convert_32(chunk_hdr->block_next);
+ chunk_hdr->block_prev = chm_endian_convert_32(chunk_hdr->block_prev);
+
+ chunk_hdr->num_entries = (uint16_t)((((uint8_t const *)(chunk_hdr->chunk_data))[chunk_len-2] << 0)
+ | (((uint8_t const *)(chunk_hdr->chunk_data))[chunk_len-1] << 8));
+ read_chunk_entries(chunk_hdr->chunk_data, chunk_len,
+ chunk_hdr->num_entries, file_l, sys_file_l);
+ } else if (memcmp(chunk_hdr->signature, "PMGI", 4) != 0) {
+ goto abort;
+ }
+
+ print_chunk(chunk_hdr);
+ retval=TRUE;
+abort:
+ free(chunk_hdr->chunk_data);
+ free(chunk_hdr);
+ return retval;
+}
+
+static void print_sys_control(lzx_control_t *lzx_control)
+{
+ if (!lzx_control) {
+ return;
+ }
+
+ cli_dbgmsg("---- Control ----\n");
+ cli_dbgmsg("Length:\t\t%lu\n", lzx_control->length);
+ cli_dbgmsg("Signature:\t%c%c%c%c\n", lzx_control->signature[0],
+ lzx_control->signature[1],lzx_control->signature[2],lzx_control->signature[3]);
+ cli_dbgmsg("Version:\t%d\n", lzx_control->version);
+ cli_dbgmsg("Reset Interval:\t%d\n", lzx_control->reset_interval);
+ cli_dbgmsg("Window Size:\t%d\n", lzx_control->window_size);
+ cli_dbgmsg("Cache Size:\t%d\n\n", lzx_control->cache_size);
+}
+
+static lzx_control_t *read_sys_control(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e,
+ unsigned char *m_area, off_t m_length)
+{
+ off_t offset;
+ lzx_control_t *lzx_control;
+
+ if (file_e->length != 28) {
+ return NULL;
+ }
+ offset = itsf_hdr->data_offset + file_e->offset;
+ if (offset < 0) {
+ return NULL;
+ }
+
+ lzx_control = (lzx_control_t *) cli_malloc(sizeof(lzx_control_t));
+ if (!lzx_control) {
+ return NULL;
+ }
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (!chm_read_data(fd, (unsigned char *) lzx_control, offset, CHM_CONTROL_LEN,
+ m_area, m_length)) {
+ goto abort;
+ }
+#else
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->length, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->signature, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->version, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->reset_interval, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->window_size, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_control->cache_size, 4) != 4) {
+ goto abort;
+ }
+#endif
+ lzx_control->length = chm_endian_convert_32(lzx_control->length);
+ lzx_control->version = chm_endian_convert_32(lzx_control->version);
+ lzx_control->reset_interval = chm_endian_convert_32(lzx_control->reset_interval);
+ lzx_control->window_size = chm_endian_convert_32(lzx_control->window_size);
+ lzx_control->cache_size = chm_endian_convert_32(lzx_control->cache_size);
+
+ if (strncmp("LZXC", lzx_control->signature, 4) != 0) {
+ cli_dbgmsg("bad sys_control signature");
+ goto abort;
+ }
+ switch(lzx_control->version) {
+ case 1:
+ break;
+ case 2:
+ lzx_control->reset_interval *= LZX_FRAME_SIZE;
+ lzx_control->window_size *= LZX_FRAME_SIZE;
+ break;
+ default:
+ cli_dbgmsg("Unknown sys_control version:%d\n", lzx_control->version);
+ goto abort;
+ }
+
+ print_sys_control(lzx_control);
+ return lzx_control;
+abort:
+ free(lzx_control);
+ return NULL;
+}
+
+static void print_sys_content(lzx_content_t *lzx_content)
+{
+ if (!lzx_content) {
+ return;
+ }
+
+ cli_dbgmsg("---- Content ----\n");
+ cli_dbgmsg("Offset:\t%llu\n", lzx_content->offset);
+ cli_dbgmsg("Length:\t%llu\n\n", lzx_content->length);
+}
+
+static lzx_content_t *read_sys_content(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e)
+{
+ lzx_content_t *lzx_content;
+
+ lzx_content = (lzx_content_t *) cli_malloc(sizeof(lzx_content_t));
+ if (!lzx_content) {
+ return NULL;
+ }
+ lzx_content->offset = itsf_hdr->data_offset + file_e->offset;
+ lzx_content->length = file_e->length;
+
+ print_sys_content(lzx_content);
+ return lzx_content;
+}
+
+static void print_sys_reset_table(lzx_reset_table_t *lzx_reset_table)
+{
+ if (!lzx_reset_table) {
+ return;
+ }
+
+ cli_dbgmsg("---- Reset Table ----\n");
+ cli_dbgmsg("Num Entries:\t%lu\n", lzx_reset_table->num_entries);
+ cli_dbgmsg("Entry Size:\t%lu\n", lzx_reset_table->entry_size);
+ cli_dbgmsg("Table Offset:\t%lu\n", lzx_reset_table->table_offset);
+ cli_dbgmsg("Uncom Len:\t%llu\n", lzx_reset_table->uncom_len);
+ cli_dbgmsg("Com Len:\t%llu\n", lzx_reset_table->com_len);
+ cli_dbgmsg("Frame Len:\t%llu\n\n", lzx_reset_table->frame_len);
+}
+
+static lzx_reset_table_t *read_sys_reset_table(int fd, itsf_header_t *itsf_hdr, file_list_t *file_e,
+ unsigned char *m_area, off_t m_length)
+{
+ off_t offset;
+ lzx_reset_table_t *lzx_reset_table;
+
+ if (file_e->length < 40) {
+ return NULL;
+ }
+ /* Skip past unknown entry in offset calc */
+ offset = itsf_hdr->data_offset + file_e->offset + 4;
+
+ if (offset < 0) {
+ return NULL;
+ }
+
+ lzx_reset_table = (lzx_reset_table_t *) cli_malloc(sizeof(lzx_reset_table_t));
+ if (!lzx_reset_table) {
+ return NULL;
+ }
+
+ /* Save the entry offset for later use */
+ lzx_reset_table->rt_offset = offset-4;
+
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (!chm_read_data(fd, (unsigned char *) lzx_reset_table, offset, CHM_RESET_TABLE_LEN,
+ m_area, m_length)) {
+ goto abort;
+ }
+#else
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->num_entries, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->entry_size, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->table_offset, 4) != 4) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->uncom_len, 8) != 8) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->com_len, 8) != 8) {
+ goto abort;
+ }
+ if (cli_readn(fd, &lzx_reset_table->frame_len, 8) != 8) {
+ goto abort;
+ }
+#endif
+ lzx_reset_table->num_entries = chm_endian_convert_32(lzx_reset_table->num_entries);
+ lzx_reset_table->entry_size = chm_endian_convert_32(lzx_reset_table->entry_size);
+ lzx_reset_table->table_offset = chm_endian_convert_32(lzx_reset_table->table_offset);
+ lzx_reset_table->uncom_len = chm_endian_convert_64(lzx_reset_table->uncom_len);
+ lzx_reset_table->com_len = chm_endian_convert_64(lzx_reset_table->com_len);
+ lzx_reset_table->frame_len = chm_endian_convert_64(lzx_reset_table->frame_len);
+
+ if (lzx_reset_table->frame_len != LZX_FRAME_SIZE) {
+ cli_dbgmsg("bad sys_reset_table frame_len: 0x%x\n",lzx_reset_table->frame_len);
+ goto abort;
+ }
+ if ((lzx_reset_table->entry_size != 4) && (lzx_reset_table->entry_size != 8)) {
+ cli_dbgmsg("bad sys_reset_table entry_size: 0x%x\n",lzx_reset_table->entry_size);
+ goto abort;
+ }
+ print_sys_reset_table(lzx_reset_table);
+ return lzx_reset_table;
+abort:
+ free(lzx_reset_table);
+ return NULL;
+}
+
+/* *****************************************************************/
+/* This section interfaces to the mspack files. As such, this is a */
+/* little bit dirty compared to my usual code */
+
+#define CHM_SYS_CONTROL_NAME "::DataSpace/Storage/MSCompressed/ControlData"
+#define CHM_SYS_CONTENT_NAME "::DataSpace/Storage/MSCompressed/Content"
+#define CHM_SYS_RESETTABLE_NAME "::DataSpace/Storage/MSCompressed/Transform/{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable"
+
+static int chm_decompress_stream(int fd, const char *dirname, itsf_header_t *itsf_hdr,
+ file_list_t *file_l, file_list_t *sys_file_l,
+ unsigned char *m_area, off_t m_length)
+{
+ file_list_t *entry;
+ lzx_content_t *lzx_content=NULL;
+ lzx_reset_table_t *lzx_reset_table=NULL;
+ lzx_control_t *lzx_control=NULL;
+ int window_bits, count, length, tmpfd, ofd, retval=FALSE;
+ uint64_t com_offset;
+ struct lzx_stream * stream;
+ unsigned char filename[1024];
+
+ snprintf(filename, 1024, "%s/clamav-unchm.bin", dirname);
+ tmpfd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+ if (tmpfd<0) {
+ cli_dbgmsg("open failed for %s\n", filename);
+ return FALSE;
+ }
+
+ entry = sys_file_l->next;
+ while (entry) {
+ if (strcmp(entry->name, CHM_SYS_CONTROL_NAME) == 0) {
+ lzx_control = read_sys_control(fd, itsf_hdr, entry, m_area, m_length);
+ } else if (strcmp(entry->name, CHM_SYS_CONTENT_NAME) == 0) {
+ lzx_content = read_sys_content(fd, itsf_hdr, entry);
+ } else if (strcmp(entry->name, CHM_SYS_RESETTABLE_NAME) == 0) {
+ lzx_reset_table = read_sys_reset_table(fd, itsf_hdr, entry, m_area, m_length);
+ }
+ entry = entry->next;
+ }
+
+ if (!lzx_content || !lzx_reset_table || !lzx_control) {
+ goto abort;
+ }
+
+ switch (lzx_control->window_size) {
+ case 0x008000:
+ window_bits = 15;
+ break;
+ case 0x010000:
+ window_bits = 16;
+ break;
+ case 0x020000:
+ window_bits = 17;
+ break;
+ case 0x040000:
+ window_bits = 18;
+ break;
+ case 0x080000:
+ window_bits = 19;
+ break;
+ case 0x100000:
+ window_bits = 20;
+ break;
+ case 0x200000:
+ window_bits = 21;
+ break;
+ default:
+ cli_dbgmsg("bad control window size: 0x%x\n", lzx_control->window_size);
+ goto abort;
+ }
+
+ if (lzx_control->reset_interval % LZX_FRAME_SIZE) {
+ cli_dbgmsg("bad reset_interval: 0x%x\n", lzx_control->window_size);
+ goto abort;
+ }
+
+ length = lzx_reset_table->uncom_len;
+ length += lzx_control->reset_interval;
+ length &= -lzx_control->reset_interval;
+
+ com_offset = lzx_content->offset;
+ cli_dbgmsg("Compressed offset: %llu\n", com_offset);
+
+ stream = lzx_init(fd, tmpfd, window_bits,
+ lzx_control->reset_interval / LZX_FRAME_SIZE,
+ 4096, length, NULL, NULL);
+ lseek(fd, com_offset, SEEK_SET);
+ if (!stream) {
+ cli_dbgmsg("lzx_init failed\n");
+ goto abort;
+ }
+
+ lzx_decompress(stream, length);
+ lzx_free(stream);
+
+ entry = file_l->next;
+ close(tmpfd);
+
+ /* Reopen the file for reading */
+ tmpfd = open(filename, O_RDONLY|O_BINARY);
+ if (tmpfd < 0) {
+ cli_dbgmsg("re-open output failed\n");
+ goto abort;
+ }
+
+ /* Delete the file */
+ unlink(filename);
+
+ count=0;
+ while(entry) {
+ if (entry->section != 1) {
+ entry = entry->next;
+ continue;
+ }
+ if (lseek(tmpfd, entry->offset, SEEK_SET) != (off_t)entry->offset) {
+ cli_dbgmsg("seek in output failed\n");
+ entry = entry->next;
+ continue;
+ }
+
+ snprintf(filename, 1024, "%s/%d-%llu.chm", dirname, count, entry->offset);
+ ofd = open(filename, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+ if (ofd < 0) {
+ entry = entry->next;
+ continue;
+ }
+ if (chm_copy_file_data(tmpfd, ofd, entry->length) != entry->length) {
+ cli_dbgmsg("failed to copy %lu bytes\n", entry->length);
+ }
+
+ close(ofd);
+ entry = entry->next;
+ count++;
+ }
+ close(tmpfd);
+ tmpfd=-1;
+ retval = TRUE;
+
+abort:
+ if (tmpfd>=0) {
+ close(tmpfd);
+ }
+ if (lzx_content) {
+ free(lzx_content);
+ }
+ if (lzx_reset_table) {
+ free(lzx_reset_table);
+ }
+ if (lzx_control) {
+ free(lzx_control);
+ }
+ return retval;
+}
+
+/* ************ End dirty section ********************/
+
+int chm_unpack(int fd, const char *dirname)
+{
+ int retval=FALSE;
+ unsigned char *m_area=NULL;
+ off_t m_length=0, offset;
+ file_list_t *file_l, *sys_file_l;
+ struct stat statbuf;
+ itsf_header_t itsf_hdr;
+ itsp_header_t itsp_hdr;
+ uint32_t num_chunks;
+
+ /* These two lists contain the list of files and system files in
+ the archive. The first entry in the list is an empty entry */
+
+ file_l = (file_list_t *) cli_malloc(sizeof(file_list_t));
+ if (!file_l) {
+ return FALSE;
+ }
+ file_l->next = NULL;
+ file_l->name = NULL;
+ sys_file_l = (file_list_t *) cli_malloc(sizeof(file_list_t));
+ if (!sys_file_l) {
+ free(file_l);
+ return FALSE;
+ }
+ sys_file_l->next = NULL;
+ sys_file_l->name = NULL;
+
+#ifdef HAVE_MMAP
+ if (fstat(fd, &statbuf) == 0) {
+ if (statbuf.st_size < CHM_ITSF_MIN_LEN) {
+ goto abort;
+ }
+ m_length = statbuf.st_size;
+ m_area = (unsigned char *) mmap(NULL, m_length, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (m_area == MAP_FAILED) {
+ m_area = NULL;
+ }
+ }
+#endif
+
+ if (!itsf_read_header(fd, &itsf_hdr, m_area, m_length)) {
+ goto abort;
+ }
+ itsf_print_header(&itsf_hdr);
+
+ if (!itsp_read_header(fd, &itsp_hdr, itsf_hdr.dir_offset, m_area, m_length)) {
+ goto abort;
+ }
+ itsp_print_header(&itsp_hdr);
+
+ offset = itsf_hdr.dir_offset+CHM_ITSP_LEN;
+
+ /* TODO: need to check this first calculation,
+ currently have no files of this type */
+ if (itsp_hdr.index_head > 0) {
+ offset += itsp_hdr.index_head * itsp_hdr.block_len;
+ }
+
+ num_chunks = itsp_hdr.index_tail - itsp_hdr.index_head + 1;
+
+ /* Versions before 3 didn't have a data_offset */
+ /* TODO: need to check this calculation,
+ currently have no files of this type */
+ if (itsf_hdr.version < 3) {
+ itsf_hdr.data_offset = itsf_hdr.dir_offset + CHM_ITSP_LEN + (itsp_hdr.block_len*itsp_hdr.num_blocks);
+ }
+
+ while (num_chunks) {
+ if (!read_chunk(fd, offset, itsp_hdr.block_len, m_area,
+ m_length, file_l, sys_file_l)) {
+ goto abort;
+ }
+
+ num_chunks--;
+ offset += itsp_hdr.block_len;
+ }
+
+ chm_decompress_stream(fd, dirname, &itsf_hdr, file_l, sys_file_l, m_area, m_length);
+
+ /* Signal success */
+ retval = TRUE;
+abort:
+ free_file_list(file_l);
+ free_file_list(sys_file_l);
+
+#ifdef HAVE_MMAP
+ if (m_area) {
+ munmap(m_area, m_length);
+ }
+#endif
+ return retval;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_cvd.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,427 @@
+/*
+ * Copyright (C) 2003 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * untgz() is based on public domain minitar utility by Charles G. Waldman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include "zlib.h"
+#include <time.h>
+#include <errno.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "dsig.h"
+#include "str.h"
+#include "cvd.h"
+
+#define TAR_BLOCKSIZE 512
+
+int cli_untgz(int fd, const char *destdir)
+{
+ char *path, osize[13], name[101], type;
+ char block[TAR_BLOCKSIZE];
+ int nbytes, nread, nwritten, in_block = 0, fdd;
+ unsigned int size, pathlen = strlen(destdir) + 100 + 5;
+ FILE *outfile = NULL;
+ gzFile *infile;
+
+
+ cli_dbgmsg("in cli_untgz()\n");
+
+ if((fdd = dup(fd)) == -1) {
+ cli_errmsg("cli_untgz: Can't duplicate descriptor %d\n", fd);
+ return -1;
+ }
+
+ if((infile = gzdopen(fdd, "rb")) == NULL) {
+ cli_errmsg("cli_untgz: Can't gzdopen() descriptor %d, errno = %d\n", fdd, errno);
+ return -1;
+ }
+
+ path = (char *) cli_calloc(sizeof(char), pathlen);
+ if(!path) {
+ cli_errmsg("cli_untgz: Can't allocate memory for path\n");
+ return -1;
+ }
+
+ while(1) {
+
+ nread = gzread(infile, block, TAR_BLOCKSIZE);
+
+ if(!in_block && !nread)
+ break;
+
+ if(nread != TAR_BLOCKSIZE) {
+ cli_errmsg("cli_untgz: Incomplete block read\n");
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+
+ if(!in_block) {
+ if (block[0] == '\0') /* We're done */
+ break;
+
+ strncpy(name, block, 100);
+ name[100] = '\0';
+
+ if(strchr(name, '/')) {
+ cli_errmsg("cli_untgz: Slash separators are not allowed in CVD\n");
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+
+ snprintf(path, pathlen, "%s/%s", destdir, name);
+ cli_dbgmsg("cli_untgz: Unpacking %s\n", path);
+ type = block[156];
+
+ switch(type) {
+ case '0':
+ case '\0':
+ break;
+ case '5':
+ cli_errmsg("cli_untgz: Directories are not supported in CVD\n");
+ free(path);
+ gzclose(infile);
+ return -1;
+ default:
+ cli_errmsg("cli_untgz: Unknown type flag '%c'\n", type);
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+ in_block = 1;
+
+ if(outfile) {
+ if(fclose(outfile)) {
+ cli_errmsg("cli_untgz: Cannot close file %s\n", path);
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+ outfile = NULL;
+ }
+
+ if(!(outfile = fopen(path, "wb"))) {
+ cli_errmsg("cli_untgz: Cannot create file %s\n", path);
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+
+ strncpy(osize, block + 124, 12);
+ osize[12] = '\0';
+
+ if((sscanf(osize, "%o", &size)) == 0) {
+ cli_errmsg("cli_untgz: Invalid size in header\n");
+ free(path);
+ gzclose(infile);
+ fclose(outfile);
+ return -1;
+ }
+
+ } else { /* write or continue writing file contents */
+ nbytes = size > TAR_BLOCKSIZE ? TAR_BLOCKSIZE : size;
+ nwritten = fwrite(block, 1, nbytes, outfile);
+
+ if(nwritten != nbytes) {
+ cli_errmsg("cli_untgz: Wrote %d instead of %d (%s)\n", nwritten, nbytes, path);
+ free(path);
+ gzclose(infile);
+ return -1;
+ }
+
+ size -= nbytes;
+ if(size == 0)
+ in_block = 0;
+ }
+ }
+
+ if(outfile)
+ fclose(outfile);
+
+ gzclose(infile);
+ free(path);
+ return 0;
+}
+
+struct cl_cvd *cl_cvdparse(const char *head)
+{
+ struct cl_cvd *cvd;
+ char *pt;
+
+
+ if(strncmp(head, "ClamAV-VDB:", 11)) {
+ cli_errmsg("cli_cvdparse: Not a CVD file\n");
+ return NULL;
+ }
+
+ if(!(cvd = (struct cl_cvd *) cli_malloc(sizeof(struct cl_cvd)))) {
+ cli_errmsg("cl_cvdparse: Can't allocate memory for cvd\n");
+ return NULL;
+ }
+
+ if(!(cvd->time = cli_strtok(head, 1, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the creation time\n");
+ free(cvd);
+ return NULL;
+ }
+
+ if(!(pt = cli_strtok(head, 2, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the version number\n");
+ free(cvd->time);
+ free(cvd);
+ return NULL;
+ }
+ cvd->version = atoi(pt);
+ free(pt);
+
+ if(!(pt = cli_strtok(head, 3, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the number of signatures\n");
+ free(cvd->time);
+ free(cvd);
+ return NULL;
+ }
+ cvd->sigs = atoi(pt);
+ free(pt);
+
+ if(!(pt = cli_strtok(head, 4, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the functionality level\n");
+ free(cvd->time);
+ free(cvd);
+ return NULL;
+ }
+ cvd->fl = atoi(pt);
+ free(pt);
+
+ if(!(cvd->md5 = cli_strtok(head, 5, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the MD5 checksum\n");
+ free(cvd->time);
+ free(cvd);
+ return NULL;
+ }
+
+ if(!(cvd->dsig = cli_strtok(head, 6, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the digital signature\n");
+ free(cvd->time);
+ free(cvd->md5);
+ free(cvd);
+ return NULL;
+ }
+
+ if(!(cvd->builder = cli_strtok(head, 7, ":"))) {
+ cli_errmsg("cli_cvdparse: Can't parse the builder name\n");
+ free(cvd->time);
+ free(cvd->md5);
+ free(cvd->dsig);
+ free(cvd);
+ return NULL;
+ }
+
+ if((pt = cli_strtok(head, 8, ":"))) {
+ cvd->stime = atoi(pt);
+ free(pt);
+ } else {
+ cli_dbgmsg("cli_cvdparse: No creation time in seconds (old file format)\n");
+ cvd->stime = 0;
+ }
+
+ return cvd;
+}
+
+struct cl_cvd *cl_cvdhead(const char *file)
+{
+ FILE *fs;
+ char head[513], *pt;
+ int i;
+ unsigned int bread;
+
+
+ if((fs = fopen(file, "rb")) == NULL) {
+ cli_errmsg("cl_cvdhead: Can't open file %s\n", file);
+ return NULL;
+ }
+
+ if(!(bread = fread(head, 1, 512, fs))) {
+ cli_errmsg("cl_cvdhead: Can't read CVD header in %s\n", file);
+ fclose(fs);
+ return NULL;
+ }
+
+ fclose(fs);
+
+ head[bread] = 0;
+ if((pt = strpbrk(head, "\n\r")))
+ *pt = 0;
+
+ for(i = bread - 1; i > 0 && (head[i] == ' ' || head[i] == '\n' || head[i] == '\r'); head[i] = 0, i--);
+
+ return cl_cvdparse(head);
+}
+
+void cl_cvdfree(struct cl_cvd *cvd)
+{
+ free(cvd->time);
+ free(cvd->md5);
+ free(cvd->dsig);
+ free(cvd->builder);
+ free(cvd);
+}
+
+static int cli_cvdverify(FILE *fs, struct cl_cvd *cvdpt)
+{
+ struct cl_cvd *cvd;
+ char *md5, head[513];
+ int i;
+
+
+ fseek(fs, 0, SEEK_SET);
+ if(fread(head, 1, 512, fs) != 512) {
+ cli_errmsg("cli_cvdverify: Can't read CVD header\n");
+ return CL_ECVD;
+ }
+
+ head[512] = 0;
+ for(i = 511; i > 0 && (head[i] == ' ' || head[i] == 10); head[i] = 0, i--);
+
+ if((cvd = cl_cvdparse(head)) == NULL)
+ return CL_ECVD;
+
+ if(cvdpt)
+ memcpy(cvdpt, cvd, sizeof(struct cl_cvd));
+
+ md5 = cli_md5stream(fs, NULL);
+ cli_dbgmsg("MD5(.tar.gz) = %s\n", md5);
+
+ if(strncmp(md5, cvd->md5, 32)) {
+ cli_dbgmsg("cli_cvdverify: MD5 verification error\n");
+ free(md5);
+ cl_cvdfree(cvd);
+ return CL_EMD5;
+ }
+
+#ifdef HAVE_GMP
+ if(cli_versig(md5, cvd->dsig)) {
+ cli_dbgmsg("cli_cvdverify: Digital signature verification error\n");
+ free(md5);
+ cl_cvdfree(cvd);
+ return CL_EDSIG;
+ }
+#endif
+
+ free(md5);
+ cl_cvdfree(cvd);
+ return 0;
+}
+
+int cl_cvdverify(const char *file)
+{
+ FILE *fs;
+ int ret;
+
+
+ if((fs = fopen(file, "rb")) == NULL) {
+ cli_errmsg("cl_cvdverify: Can't open file %s\n", file);
+ return CL_EOPEN;
+ }
+
+ ret = cli_cvdverify(fs, NULL);
+ fclose(fs);
+
+ return ret;
+}
+
+int cli_cvdload(FILE *fs, struct cl_engine **engine, unsigned int *signo, short warn, unsigned int options)
+{
+ char *dir;
+ struct cl_cvd cvd;
+ int ret;
+ time_t s_time;
+ int cfd;
+
+ cli_dbgmsg("in cli_cvdload()\n");
+
+ /* verify */
+
+ if((ret = cli_cvdverify(fs, &cvd)))
+ return ret;
+
+ if(cvd.stime && warn) {
+ time(&s_time);
+ if((int) s_time - cvd.stime > 604800) {
+ cli_warnmsg("**************************************************\n");
+ cli_warnmsg("*** The virus database is older than 7 days! ***\n");
+ cli_warnmsg("*** Please update it as soon as possible. ***\n");
+ cli_warnmsg("**************************************************\n");
+ }
+ }
+
+ if(cvd.fl > cl_retflevel()) {
+ cli_warnmsg("***********************************************************\n");
+ cli_warnmsg("*** This version of the ClamAV engine is outdated. ***\n");
+ cli_warnmsg("*** DON'T PANIC! Read http://www.clamav.net/support/faq ***\n");
+ cli_warnmsg("***********************************************************\n");
+ }
+
+ dir = cli_gentemp(NULL);
+ if(mkdir(dir, 0700)) {
+ cli_errmsg("cli_cvdload(): Can't create temporary directory %s\n", dir);
+ free(dir);
+ return CL_ETMPDIR;
+ }
+
+ cfd = fileno(fs);
+
+ /* use only operations on file descriptors, and not on the FILE* from here on
+ * if we seek the FILE*, the underlying descriptor may not seek as expected
+ * (for example on OpenBSD, cygwin, etc.).
+ * So seek the descriptor directly.
+ */
+
+ if(lseek(cfd, 512, SEEK_SET) == -1) {
+ cli_errmsg("cli_cvdload(): lseek(fs, 512, SEEK_SET) failed\n");
+ return CL_EIO;
+ }
+
+ if(cli_untgz(cfd, dir)) {
+ cli_errmsg("cli_cvdload(): Can't unpack CVD file.\n");
+ free(dir);
+ return CL_ECVDEXTR;
+ }
+
+ /* load extracted directory */
+ ret = cl_load(dir, engine, signo, options);
+
+ cli_rmdirs(dir);
+ free(dir);
+
+ return ret;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dconf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "cltypes.h"
+#include "dconf.h"
+#include "readdb.h"
+#include "str.h"
+#include "others.h"
+
+struct dconf_module {
+ const char *mname; /* module name */
+ const char *sname; /* submodule name */
+ uint32_t bflag; /* bit flag */
+ uint8_t state; /* default state (on/off) */
+};
+
+#ifdef CL_EXPERIMENTAL
+#define DCONF_ENABLE_EXPERIMENTAL 1
+#else
+#define DCONF_ENABLE_EXPERIMENTAL 0
+#endif
+
+static struct dconf_module modules[] = {
+
+ { "PE", "PARITE", PE_CONF_PARITE, 1 },
+ { "PE", "KRIZ", PE_CONF_KRIZ, 1 },
+ { "PE", "MAGISTR", PE_CONF_MAGISTR, 1 },
+ { "PE", "POLIPOS", PE_CONF_POLIPOS, 1 },
+ { "PE", "MD5SECT", PE_CONF_MD5SECT, 1 },
+ { "PE", "UPX", PE_CONF_UPX, 1 },
+ { "PE", "FSG", PE_CONF_FSG, 1 },
+
+ { "PE", "PETITE", PE_CONF_PETITE, 1 },
+ { "PE", "PESPIN", PE_CONF_PESPIN, 1 },
+ { "PE", "YC", PE_CONF_YC, 1 },
+ { "PE", "WWPACK", PE_CONF_WWPACK, 1 },
+
+ { "PE", "NSPACK", PE_CONF_NSPACK, 1 },
+ { "PE", "MEW", PE_CONF_MEW, 1 },
+ { "PE", "UPACK", PE_CONF_UPACK, 1 },
+ { "PE", "ASPACK", PE_CONF_ASPACK, 1 },
+
+ { "ELF", NULL, 0x1, 1 },
+
+ { "ARCHIVE", "RAR", ARCH_CONF_RAR, 1 },
+ { "ARCHIVE", "ZIP", ARCH_CONF_ZIP, 1 },
+ { "ARCHIVE", "GZIP", ARCH_CONF_GZ, 1 },
+ { "ARCHIVE", "BZIP", ARCH_CONF_BZ, 1 },
+ { "ARCHIVE", "ARJ", ARCH_CONF_ARJ, 1 },
+ { "ARCHIVE", "SZDD", ARCH_CONF_SZDD, 1 },
+ { "ARCHIVE", "CAB", ARCH_CONF_CAB, 1 },
+ { "ARCHIVE", "CHM", ARCH_CONF_CHM, 1 },
+ { "ARCHIVE", "OLE2", ARCH_CONF_OLE2, 1 },
+ { "ARCHIVE", "TAR", ARCH_CONF_TAR, 1 },
+ { "ARCHIVE", "BINHEX", ARCH_CONF_BINHEX, 1 },
+ { "ARCHIVE", "SIS", ARCH_CONF_SIS, 1 },
+ { "ARCHIVE", "NSIS", ARCH_CONF_NSIS, 1 },
+ { "ARCHIVE", "AUTOIT", ARCH_CONF_AUTOIT, 1 },
+
+ { "DOCUMENT", "HTML", DOC_CONF_HTML, 1 },
+ { "DOCUMENT", "RTF", DOC_CONF_RTF, 1 },
+ { "DOCUMENT", "PDF", DOC_CONF_PDF, 1 },
+
+ { "MAIL", "MBOX", MAIL_CONF_MBOX, 1 },
+ { "MAIL", "TNEF", MAIL_CONF_TNEF, 1 },
+ { "MAIL", "PST", MAIL_CONF_PST, 1 },
+
+ { "OTHER", "UUENCODED", OTHER_CONF_UUENC, 1 },
+ { "OTHER", "SCRENC", OTHER_CONF_SCRENC, 1 },
+ { "OTHER", "RIFF", OTHER_CONF_RIFF, 1 },
+ { "OTHER", "JPEG", OTHER_CONF_JPEG, 1 },
+ { "OTHER", "CRYPTFF", OTHER_CONF_CRYPTFF, 1 },
+
+ { "PHISHING", "ENGINE", PHISHING_CONF_ENGINE, 1 },
+ { "PHISHING", "ENTCONV", PHISHING_CONF_ENTCONV, DCONF_ENABLE_EXPERIMENTAL }, /* exp */
+
+ { NULL, NULL, 0, 0 }
+};
+
+struct cli_dconf *cli_dconf_init(void)
+{
+ unsigned int i;
+ struct cli_dconf *dconf;
+
+
+ dconf = (struct cli_dconf *) cli_calloc(sizeof(struct cli_dconf), 1);
+ if(!dconf)
+ return NULL;
+
+ for(i = 0; modules[i].mname; i++) {
+ if(!strcmp(modules[i].mname, "PE")) {
+ if(modules[i].state)
+ dconf->pe |= modules[i].bflag;
+
+ } else if(!strcmp(modules[i].mname, "ELF")) {
+ if(modules[i].state)
+ dconf->elf |= modules[i].bflag;
+
+ } else if(!strcmp(modules[i].mname, "ARCHIVE")) {
+ if(modules[i].state)
+ dconf->archive |= modules[i].bflag;
+
+ } else if(!strcmp(modules[i].mname, "DOCUMENT")) {
+ if(modules[i].state)
+ dconf->doc |= modules[i].bflag;
+
+ } else if(!strcmp(modules[i].mname, "MAIL")) {
+ if(modules[i].state)
+ dconf->mail |= modules[i].bflag;
+
+ } else if(!strcmp(modules[i].mname, "OTHER")) {
+ if(modules[i].state)
+ dconf->other |= modules[i].bflag;
+ } else if(!strcmp(modules[i].mname, "PHISHING")) {
+ if(modules[i].state)
+ dconf->phishing |= modules[i].bflag;
+ }
+ }
+
+ return dconf;
+}
+
+void cli_dconf_print(struct cli_dconf *dconf)
+{
+ uint8_t pe = 0, elf = 0, arch = 0, doc = 0, mail = 0, other = 0, phishing=0;
+ unsigned int i;
+
+
+ cli_dbgmsg("Dynamic engine configuration settings:\n");
+ cli_dbgmsg("--------------------------------------\n");
+
+ for(i = 0; modules[i].mname; i++) {
+ if(!strcmp(modules[i].mname, "PE")) {
+ if(!pe) {
+ cli_dbgmsg("Module PE: %s\n", dconf->pe ? "On" : "Off");
+ pe = 1;
+ }
+ if(dconf->pe)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->pe & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+
+ } else if(!strcmp(modules[i].mname, "ELF")) {
+ if(!elf) {
+ cli_dbgmsg("Module ELF: %s\n", dconf->elf ? "On" : "Off");
+ elf = 1;
+ }
+
+ } else if(!strcmp(modules[i].mname, "ARCHIVE")) {
+ if(!arch) {
+ cli_dbgmsg("Module ARCHIVE: %s\n", dconf->archive ? "On" : "Off");
+ arch = 1;
+ }
+ if(dconf->archive)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->archive & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+
+ } else if(!strcmp(modules[i].mname, "DOCUMENT")) {
+ if(!doc) {
+ cli_dbgmsg("Module DOCUMENT: %s\n", dconf->doc ? "On" : "Off");
+ doc = 1;
+ }
+ if(dconf->doc)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->doc & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+
+ } else if(!strcmp(modules[i].mname, "MAIL")) {
+ if(!mail) {
+ cli_dbgmsg("Module MAIL: %s\n", dconf->mail ? "On" : "Off");
+ mail = 1;
+ }
+ if(dconf->mail)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->mail & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+
+ } else if(!strcmp(modules[i].mname, "OTHER")) {
+ if(!other) {
+ cli_dbgmsg("Module OTHER: %s\n", dconf->other ? "On" : "Off");
+ other = 1;
+ }
+ if(dconf->other)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->other & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+ } else if(!strcmp(modules[i].mname, "PHISHING")) {
+ if(!phishing) {
+ cli_dbgmsg("Module PHISHING %s\n", dconf->phishing ? "On" : "Off");
+ phishing = 1;
+ }
+ if(dconf->phishing)
+ cli_dbgmsg(" * Submodule %10s:\t%s\n", modules[i].sname, (dconf->phishing & modules[i].bflag) ? "On" : "** Off **");
+ else
+ continue;
+ }
+ }
+}
+
+static int chkflevel(const char *entry, int field)
+{
+ char *pt;
+
+
+ if((pt = cli_strtok(entry, field, ":"))) { /* min version */
+ if(!isdigit(*pt)) {
+ free(pt);
+ return 0;
+ }
+
+ if((unsigned int) atoi(pt) > cl_retflevel()) {
+ free(pt);
+ return 0;
+ }
+
+ free(pt);
+
+ if((pt = cli_strtok(entry, field + 1, ":"))) { /* max version */
+ if(!isdigit(*pt)) {
+ free(pt);
+ return 0;
+ }
+
+ if((unsigned int) atoi(pt) < cl_retflevel()) {
+ free(pt);
+ return 0;
+ }
+
+ free(pt);
+ }
+ }
+
+ return 1;
+}
+
+int cli_dconf_load(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+ char buffer[FILEBUFF];
+ unsigned int line = 0;
+ int ret = 0;
+ struct cli_dconf *dconf;
+ uint32_t val;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ dconf = (struct cli_dconf *) (*engine)->dconf;
+
+ while(fgets(buffer, FILEBUFF, fd)) {
+ line++;
+ cli_chomp(buffer);
+
+ if(!strncmp(buffer, "PE:", 3) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 3, "0x%x", &val) == 1) {
+ dconf->pe = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "ELF:", 4) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 4, "0x%x", &val) == 1) {
+ dconf->elf = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "ARCHIVE:", 8) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 8, "0x%x", &val) == 1) {
+ dconf->archive = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "DOCUMENT:", 9) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 9, "0x%x", &val) == 1) {
+ dconf->doc = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "MAIL:", 5) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 5, "0x%x", &val) == 1) {
+ dconf->mail = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "OTHER:", 6) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 6, "0x%x", &val) == 1) {
+ dconf->other = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!strncmp(buffer, "PHISHING:", 9) && chkflevel(buffer, 2)) {
+ if(sscanf(buffer + 9, "0x%x", &val) == 1) {
+ dconf->phishing = val;
+ } else {
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+ }
+
+ if(ret) {
+ cli_errmsg("Problem parsing configuration file at line %u\n", line);
+ cl_free(*engine);
+ return ret;
+ }
+
+ return CL_SUCCESS;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_dsig.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2003 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * Number encoding rutines are based on yyyRSA by Erik Thiele
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef HAVE_GMP
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <gmp.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "dsig.h"
+#include "str.h"
+
+#define CLI_NSTR "118640995551645342603070001658453189751527774412027743746599405743243142607464144767361060640655844749760788890022283424922762488917565551002467771109669598189410434699034532232228621591089508178591428456220796841621637175567590476666928698770143328137383952820383197532047771780196576957695822641224262693037"
+
+#define CLI_ESTR "100001027"
+
+static unsigned char cli_ndecode(unsigned char value)
+{
+ unsigned int i;
+ char ncodec[] = {
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
+ 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
+ 'y', 'z',
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
+ 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
+ 'Y', 'Z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+ '+', '/'
+ };
+
+
+ for(i = 0; i < 64; i++)
+ if(ncodec[i] == value)
+ return i;
+
+ cli_errmsg("cli_ndecode: value out of range\n");
+ return -1;
+}
+
+unsigned char *cli_decodesig(const char *sig, unsigned int plen, mpz_t e, mpz_t n)
+{
+ int i, slen = strlen(sig), dec;
+ unsigned char *plain;
+ mpz_t r, p, c;
+
+
+ mpz_init(r);
+ mpz_init(c);
+
+ for(i = 0; i < slen; i++) {
+ if((dec = cli_ndecode(sig[i])) < 0) {
+ mpz_clear(r);
+ mpz_clear(c);
+ return NULL;
+ }
+
+ mpz_set_ui(r, dec);
+ mpz_mul_2exp(r, r, 6 * i);
+ mpz_add(c, c, r);
+ }
+
+ plain = (unsigned char *) cli_calloc(plen + 1, sizeof(unsigned char));
+ if(!plain) {
+ cli_errmsg("cli_decodesig: Can't allocate memory for 'plain'\n");
+ mpz_clear(r);
+ mpz_clear(c);
+ return NULL;
+ }
+
+ mpz_init(p);
+ mpz_powm(p, c, e, n); /* plain = cipher^e mod n */
+ mpz_clear(c);
+
+ for(i = plen - 1; i >= 0; i--) { /* reverse */
+ mpz_tdiv_qr_ui(p, r, p, 256);
+ plain[i] = mpz_get_ui(r);
+ }
+
+ mpz_clear(p);
+ mpz_clear(r);
+
+ return plain;
+}
+
+int cli_versig(const char *md5, const char *dsig)
+{
+ mpz_t n, e;
+ char *pt, *pt2;
+
+
+ if(strlen(md5) != 32 || !isalnum(md5[0])) {
+ /* someone is trying to fool us with empty/malformed MD5 ? */
+ cli_errmsg("SECURITY WARNING: MD5 basic test failure.\n");
+ return CL_EMD5;
+ }
+
+ mpz_init_set_str(n, CLI_NSTR, 10);
+ mpz_init_set_str(e, CLI_ESTR, 10);
+
+ if(!(pt = (char *) cli_decodesig(dsig, 16, e, n))) {
+ mpz_clear(n);
+ mpz_clear(e);
+ return CL_EDSIG;
+ }
+
+ pt2 = cli_str2hex(pt, 16);
+ free(pt);
+
+ cli_dbgmsg("cli_versig: Decoded signature: %s\n", pt2);
+
+ if(strncmp(md5, pt2, 32)) {
+ cli_dbgmsg("cli_versig: Signature doesn't match.\n");
+ free(pt2);
+ mpz_clear(n);
+ mpz_clear(e);
+ return CL_EDSIG;
+ }
+
+ free(pt2);
+ mpz_clear(n);
+ mpz_clear(e);
+
+ cli_dbgmsg("cli_versig: Digital signature is correct.\n");
+ return CL_SUCCESS;
+}
+#endif
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_elf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,536 @@
+/*
+ * Copyright (C) 2005 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <time.h>
+
+#include "cltypes.h"
+#include "elf.h"
+#include "clamav.h"
+#include "execs.h"
+
+static inline uint16_t EC16(uint16_t v, uint8_t c)
+{
+ if(!c)
+ return v;
+ else
+ return ((v >> 8) + (v << 8));
+}
+
+static inline uint32_t EC32(uint32_t v, uint8_t c)
+{
+ if(!c)
+ return v;
+ else
+ return ((v >> 24) | ((v & 0x00FF0000) >> 8) | ((v & 0x0000FF00) << 8) | (v << 24));
+}
+
+static uint32_t cli_rawaddr(uint32_t vaddr, struct elf_program_hdr32 *ph, uint16_t phnum, uint8_t conv, uint8_t *err)
+{
+ uint16_t i, found = 0;
+
+
+ for(i = 0; i < phnum; i++) {
+ if(EC32(ph[i].p_vaddr, conv) <= vaddr && EC32(ph[i].p_vaddr, conv) + EC32(ph[i].p_memsz, conv) > vaddr) {
+ found = 1;
+ break;
+ }
+ }
+
+ if(!found) {
+ *err = 1;
+ return 0;
+ }
+
+ *err = 0;
+ return vaddr - EC32(ph[i].p_vaddr, conv) + EC32(ph[i].p_offset, conv);
+}
+
+int cli_scanelf(int desc, cli_ctx *ctx)
+{
+ struct elf_file_hdr32 file_hdr;
+ struct elf_section_hdr32 *section_hdr;
+ struct elf_program_hdr32 *program_hdr;
+ uint16_t shnum, phnum, shentsize, phentsize;
+ uint32_t entry, fentry, shoff, phoff, i;
+ uint8_t conv = 0, err;
+
+
+ cli_dbgmsg("in cli_scanelf\n");
+
+ if(read(desc, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+ /* Not an ELF file? */
+ cli_dbgmsg("ELF: Can't read file header\n");
+ return CL_CLEAN;
+ }
+
+ if(memcmp(file_hdr.e_ident, "\x7f\x45\x4c\x46", 4)) {
+ cli_dbgmsg("ELF: Not an ELF file\n");
+ return CL_CLEAN;
+ }
+
+ if(file_hdr.e_ident[4] != 1) {
+ cli_dbgmsg("ELF: 64-bit binaries are not supported (yet)\n");
+ return CL_CLEAN;
+ }
+
+ if(file_hdr.e_ident[5] == 1) {
+#if WORDS_BIGENDIAN == 0
+ cli_dbgmsg("ELF: File is little-endian - conversion not required\n");
+#else
+ cli_dbgmsg("ELF: File is little-endian - data conversion enabled\n");
+ conv = 1;
+#endif
+ } else {
+#if WORDS_BIGENDIAN == 0
+ cli_dbgmsg("ELF: File is big-endian - data conversion enabled\n");
+ conv = 1;
+#else
+ cli_dbgmsg("ELF: File is big-endian - conversion not required\n");
+#endif
+ }
+
+ switch(EC16(file_hdr.e_type, conv)) {
+ case 0x0: /* ET_NONE */
+ cli_dbgmsg("ELF: File type: None\n");
+ break;
+ case 0x1: /* ET_REL */
+ cli_dbgmsg("ELF: File type: Relocatable\n");
+ break;
+ case 0x2: /* ET_EXEC */
+ cli_dbgmsg("ELF: File type: Executable\n");
+ break;
+ case 0x3: /* ET_DYN */
+ cli_dbgmsg("ELF: File type: Core\n");
+ break;
+ case 0x4: /* ET_CORE */
+ cli_dbgmsg("ELF: File type: Core\n");
+ break;
+ default:
+ cli_dbgmsg("ELF: File type: Unknown (%d)\n", EC16(file_hdr.e_type, conv));
+ }
+
+ switch(EC16(file_hdr.e_machine, conv)) {
+ /* Due to a huge list, we only include the most popular machines here */
+ case 0x0: /* EM_NONE */
+ cli_dbgmsg("ELF: Machine type: None\n");
+ break;
+ case 0x2: /* EM_SPARC */
+ cli_dbgmsg("ELF: Machine type: SPARC\n");
+ break;
+ case 0x3: /* EM_386 */
+ cli_dbgmsg("ELF: Machine type: Intel 80386\n");
+ break;
+ case 0x4: /* EM_68K */
+ cli_dbgmsg("ELF: Machine type: Motorola 68000\n");
+ break;
+ case 0x8: /* EM_MIPS */
+ cli_dbgmsg("ELF: Machine type: MIPS RS3000\n");
+ break;
+ case 0x15: /* EM_PARISC */
+ cli_dbgmsg("ELF: Machine type: HPPA\n");
+ break;
+ case 0x20: /* EM_PPC */
+ cli_dbgmsg("ELF: Machine type: PowerPC\n");
+ break;
+ case 0x21: /* EM_PPC64 */
+ cli_dbgmsg("ELF: Machine type: PowerPC 64-bit\n");
+ break;
+ case 0x22: /* EM_S390 */
+ cli_dbgmsg("ELF: Machine type: IBM S390\n");
+ break;
+ case 0x40: /* EM_ARM */
+ cli_dbgmsg("ELF: Machine type: ARM\n");
+ break;
+ case 0x41: /* EM_FAKE_ALPHA */
+ cli_dbgmsg("ELF: Machine type: Digital Alpha\n");
+ break;
+ case 0x43: /* EM_SPARCV9 */
+ cli_dbgmsg("ELF: Machine type: SPARC v9 64-bit\n");
+ break;
+ case 0x50: /* EM_IA_64 */
+ cli_dbgmsg("ELF: Machine type: IA64\n");
+ break;
+ default:
+ cli_dbgmsg("ELF: Machine type: Unknown (%d)\n", EC16(file_hdr.e_machine, conv));
+ }
+
+ entry = EC32(file_hdr.e_entry, conv);
+
+ /* Program headers */
+
+ phnum = EC16(file_hdr.e_phnum, conv);
+ cli_dbgmsg("ELF: Number of program headers: %d\n", phnum);
+ if(phnum > 128) {
+ cli_dbgmsg("ELF: Suspicious number of program headers\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_EFORMAT;
+ }
+
+ if(phnum && entry) {
+
+ phentsize = EC16(file_hdr.e_phentsize, conv);
+ if(phentsize != sizeof(struct elf_program_hdr32)) {
+ cli_dbgmsg("ELF: phentsize != sizeof(struct elf_program_hdr32)\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_EFORMAT;
+ }
+
+ phoff = EC32(file_hdr.e_phoff, conv);
+ cli_dbgmsg("ELF: Program header table offset: %d\n", phoff);
+ if((uint32_t) lseek(desc, phoff, SEEK_SET) != phoff) {
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ program_hdr = (struct elf_program_hdr32 *) cli_calloc(phnum, phentsize);
+ if(!program_hdr) {
+ cli_errmsg("ELF: Can't allocate memory for program headers\n");
+ return CL_EMEM;
+ }
+
+ cli_dbgmsg("------------------------------------\n");
+
+ for(i = 0; i < phnum; i++) {
+
+ if(read(desc, &program_hdr[i], sizeof(struct elf_program_hdr32)) != sizeof(struct elf_program_hdr32)) {
+ cli_dbgmsg("ELF: Can't read segment #%d\n", i);
+ cli_dbgmsg("ELF: Possibly broken ELF file\n");
+ free(program_hdr);
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ cli_dbgmsg("ELF: Segment #%d\n", i);
+ cli_dbgmsg("ELF: Segment type: 0x%x\n", EC32(program_hdr[i].p_type, conv));
+ cli_dbgmsg("ELF: Segment offset: 0x%x\n", EC32(program_hdr[i].p_offset, conv));
+ cli_dbgmsg("ELF: Segment virtual address: 0x%x\n", EC32(program_hdr[i].p_vaddr, conv));
+ cli_dbgmsg("ELF: Segment real size: 0x%x\n", EC32(program_hdr[i].p_filesz, conv));
+ cli_dbgmsg("ELF: Segment virtual size: 0x%x\n", EC32(program_hdr[i].p_memsz, conv));
+ cli_dbgmsg("------------------------------------\n");
+ }
+
+ fentry = cli_rawaddr(entry, program_hdr, phnum, conv, &err);
+ free(program_hdr);
+ if(err) {
+ cli_dbgmsg("ELF: Can't calculate file offset of entry point\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_EFORMAT;
+ }
+ cli_dbgmsg("ELF: Entry point address: 0x%.8x\n", entry);
+ cli_dbgmsg("ELF: Entry point offset: 0x%.8x (%d)\n", fentry, fentry);
+ }
+
+ /* Sections */
+
+ shnum = EC16(file_hdr.e_shnum, conv);
+ cli_dbgmsg("ELF: Number of sections: %d\n", shnum);
+ if(shnum > 256) {
+ cli_dbgmsg("ELF: Suspicious number of sections\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_EFORMAT;
+ }
+
+ shentsize = EC16(file_hdr.e_shentsize, conv);
+ if(shentsize != sizeof(struct elf_section_hdr32)) {
+ cli_dbgmsg("ELF: shentsize != sizeof(struct elf_section_hdr32)\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_EFORMAT;
+ }
+
+ shoff = EC32(file_hdr.e_shoff, conv);
+ cli_dbgmsg("ELF: Section header table offset: %d\n", shoff);
+ if((uint32_t) lseek(desc, shoff, SEEK_SET) != shoff) {
+ /* Possibly broken end of file */
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ section_hdr = (struct elf_section_hdr32 *) cli_calloc(shnum, shentsize);
+ if(!section_hdr) {
+ cli_errmsg("ELF: Can't allocate memory for section headers\n");
+ return CL_EMEM;
+ }
+
+ cli_dbgmsg("------------------------------------\n");
+
+ for(i = 0; i < shnum; i++) {
+
+ if(read(desc, §ion_hdr[i], sizeof(struct elf_section_hdr32)) != sizeof(struct elf_section_hdr32)) {
+ cli_dbgmsg("ELF: Can't read section header\n");
+ cli_dbgmsg("ELF: Possibly broken ELF file\n");
+ free(section_hdr);
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ cli_dbgmsg("ELF: Section %d\n", i);
+ cli_dbgmsg("ELF: Section offset: %d\n", EC32(section_hdr[i].sh_offset, conv));
+ cli_dbgmsg("ELF: Section size: %d\n", EC32(section_hdr[i].sh_size, conv));
+
+ switch(EC32(section_hdr[i].sh_type, conv)) {
+ case 0x6: /* SHT_DYNAMIC */
+ cli_dbgmsg("ELF: Section type: Dynamic linking information\n");
+ break;
+ case 0xb: /* SHT_DYNSYM */
+ cli_dbgmsg("ELF: Section type: Symbols for dynamic linking\n");
+ break;
+ case 0xf: /* SHT_FINI_ARRAY */
+ cli_dbgmsg("ELF: Section type: Array of pointers to termination functions\n");
+ break;
+ case 0x5: /* SHT_HASH */
+ cli_dbgmsg("ELF: Section type: Symbol hash table\n");
+ break;
+ case 0xe: /* SHT_INIT_ARRAY */
+ cli_dbgmsg("ELF: Section type: Array of pointers to initialization functions\n");
+ break;
+ case 0x8: /* SHT_NOBITS */
+ cli_dbgmsg("ELF: Section type: Empty section (NOBITS)\n");
+ break;
+ case 0x7: /* SHT_NOTE */
+ cli_dbgmsg("ELF: Section type: Note section\n");
+ break;
+ case 0x0: /* SHT_NULL */
+ cli_dbgmsg("ELF: Section type: Null (no associated section)\n");
+ break;
+ case 0x10: /* SHT_PREINIT_ARRAY */
+ cli_dbgmsg("ELF: Section type: Array of pointers to preinit functions\n");
+ break;
+ case 0x1: /* SHT_PROGBITS */
+ cli_dbgmsg("ELF: Section type: Program information\n");
+ break;
+ case 0x9: /* SHT_REL */
+ cli_dbgmsg("ELF: Section type: Relocation entries w/o explicit addends\n");
+ break;
+ case 0x4: /* SHT_RELA */
+ cli_dbgmsg("ELF: Section type: Relocation entries with explicit addends\n");
+ break;
+ case 0x3: /* SHT_STRTAB */
+ cli_dbgmsg("ELF: Section type: String table\n");
+ break;
+ case 0x2: /* SHT_SYMTAB */
+ cli_dbgmsg("ELF: Section type: Symbol table\n");
+ break;
+ case 0x6ffffffd: /* SHT_GNU_verdef */
+ cli_dbgmsg("ELF: Section type: Provided symbol versions\n");
+ break;
+ case 0x6ffffffe: /* SHT_GNU_verneed */
+ cli_dbgmsg("ELF: Section type: Required symbol versions\n");
+ break;
+ case 0x6fffffff: /* SHT_GNU_versym */
+ cli_dbgmsg("ELF: Section type: Symbol Version Table\n");
+ break;
+ default :
+ cli_dbgmsg("ELF: Section type: Unknown\n");
+ }
+
+ if(EC32(section_hdr[i].sh_flags, conv) & 0x1) /* SHF_WRITE */
+ cli_dbgmsg("ELF: Section contains writable data\n");
+
+ if(EC32(section_hdr[i].sh_flags, conv) & 0x2) /* SHF_ALLOC */
+ cli_dbgmsg("ELF: Section occupies memory\n");
+
+ if(EC32(section_hdr[i].sh_flags, conv) & 0x4) /* SHF_EXECINSTR */
+ cli_dbgmsg("ELF: Section contains executable code\n");
+
+ cli_dbgmsg("------------------------------------\n");
+ }
+
+ free(section_hdr);
+ return CL_CLEAN;
+}
+
+int cli_elfheader(int desc, struct cli_exe_info *elfinfo)
+{
+ struct elf_file_hdr32 file_hdr;
+ struct elf_section_hdr32 *section_hdr;
+ struct elf_program_hdr32 *program_hdr;
+ uint16_t shnum, phnum, shentsize, phentsize, i;
+ uint32_t entry, fentry = 0, shoff, phoff;
+ uint8_t conv = 0, err;
+
+
+ cli_dbgmsg("in cli_elfheader\n");
+
+ if(read(desc, &file_hdr, sizeof(file_hdr)) != sizeof(file_hdr)) {
+ /* Not an ELF file? */
+ cli_dbgmsg("ELF: Can't read file header\n");
+ return -1;
+ }
+
+ if(memcmp(file_hdr.e_ident, "\x7f\x45\x4c\x46", 4)) {
+ cli_dbgmsg("ELF: Not an ELF file\n");
+ return -1;
+ }
+
+ if(file_hdr.e_ident[4] != 1) {
+ cli_dbgmsg("ELF: 64-bit binaries are not supported (yet)\n");
+ return -1;
+ }
+
+ if(file_hdr.e_ident[5] == 1) {
+#if WORDS_BIGENDIAN == 1
+ conv = 1;
+#endif
+ } else {
+#if WORDS_BIGENDIAN == 0
+ conv = 1;
+#endif
+ }
+
+ phnum = EC16(file_hdr.e_phnum, conv);
+ if(phnum > 128) {
+ cli_dbgmsg("ELF: Suspicious number of program headers\n");
+ return -1;
+ }
+ entry = EC32(file_hdr.e_entry, conv);
+
+ if(phnum && entry) {
+ phentsize = EC16(file_hdr.e_phentsize, conv);
+ if(phentsize != sizeof(struct elf_program_hdr32)) {
+ cli_dbgmsg("ELF: phentsize != sizeof(struct elf_program_hdr32)\n");
+ return -1;
+ }
+
+ phoff = EC32(file_hdr.e_phoff, conv);
+ if((uint32_t) lseek(desc, phoff, SEEK_SET) != phoff) {
+ return -1;
+ }
+
+ program_hdr = (struct elf_program_hdr32 *) cli_calloc(phnum, phentsize);
+ if(!program_hdr) {
+ cli_errmsg("ELF: Can't allocate memory for program headers\n");
+ return -1;
+ }
+
+ for(i = 0; i < phnum; i++) {
+ if(read(desc, &program_hdr[i], sizeof(struct elf_program_hdr32)) != sizeof(struct elf_program_hdr32)) {
+ cli_dbgmsg("ELF: Can't read segment #%d\n", i);
+ free(program_hdr);
+ return -1;
+ }
+ }
+
+ fentry = cli_rawaddr(entry, program_hdr, phnum, conv, &err);
+ free(program_hdr);
+ if(err) {
+ cli_dbgmsg("ELF: Can't calculate file offset of entry point\n");
+ return -1;
+ }
+ }
+
+ elfinfo->ep = fentry;
+
+ shnum = EC16(file_hdr.e_shnum, conv);
+ if(shnum > 256) {
+ cli_dbgmsg("ELF: Suspicious number of sections\n");
+ return -1;
+ }
+ elfinfo->nsections = shnum;
+
+ shentsize = EC16(file_hdr.e_shentsize, conv);
+ if(shentsize != sizeof(struct elf_section_hdr32)) {
+ cli_dbgmsg("ELF: shentsize != sizeof(struct elf_section_hdr32)\n");
+ return -1;
+ }
+
+ shoff = EC32(file_hdr.e_shoff, conv);
+ if((uint32_t) lseek(desc, shoff, SEEK_SET) != shoff) {
+ /* Possibly broken end of file */
+ return -1;
+ }
+
+ elfinfo->section = (struct cli_exe_section *) cli_calloc(elfinfo->nsections, sizeof(struct cli_exe_section));
+ if(!elfinfo->section) {
+ cli_dbgmsg("ELF: Can't allocate memory for section headers\n");
+ return -1;
+ }
+
+ section_hdr = (struct elf_section_hdr32 *) cli_calloc(shnum, shentsize);
+ if(!section_hdr) {
+ cli_errmsg("ELF: Can't allocate memory for section headers\n");
+ free(elfinfo->section);
+ elfinfo->section = NULL;
+ return -1;
+ }
+
+ for(i = 0; i < shnum; i++) {
+
+ if(read(desc, §ion_hdr[i], sizeof(struct elf_section_hdr32)) != sizeof(struct elf_section_hdr32)) {
+ free(section_hdr);
+ free(elfinfo->section);
+ elfinfo->section = NULL;
+ return -1;
+ }
+
+ elfinfo->section[i].rva = EC32(section_hdr[i].sh_addr, conv);
+ elfinfo->section[i].raw = EC32(section_hdr[i].sh_offset, conv);
+ elfinfo->section[i].rsz = EC32(section_hdr[i].sh_size, conv);
+ }
+
+ free(section_hdr);
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_entconv.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,981 @@
+/*
+ * HTML Entity & Encoding normalization.
+ *
+ * Copyright (C) 2006 Török Edvin <edwin at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+#include "clamav-config.h"
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "htmlnorm.h"
+#include "hashtab.h"
+#include "entconv.h"
+#include "entitylist.h"
+#include "cltypes.h"
+
+#ifdef HAVE_ICONV_H
+#include <iconv.h>
+#endif
+#include "encoding_aliases.h"
+
+
+#define MAX_LINE 1024
+
+#ifndef EILSEQ
+#define EILSEQ 84
+#endif
+
+unsigned char* entity_norm(const struct entity_conv* conv,const unsigned char* entity)
+{
+ struct element* e = hashtab_find(conv->ht,entity,strlen((const char*)entity));
+ if(e && e->key) {
+ const int val = e->data;
+ if(val == '<')/* this was an escaped <, so output it escaped*/
+ return (unsigned char*)cli_strdup("<");
+ else if(val == '>')/* see above */
+ return (unsigned char*)cli_strdup(">");
+ else if(val<127) {
+ unsigned char *e_out = cli_malloc(2);
+
+ if(!e_out)
+ return NULL;
+
+ e_out[0] = (unsigned char)val;
+ e_out[1] = '\0';
+ return e_out;
+ }
+ else if(val==160)
+ return (unsigned char*)cli_strdup(" ");
+ else {
+ unsigned char *ent_out = cli_malloc(10);
+
+ if(!ent_out)
+ return NULL;
+
+ snprintf((char*)ent_out,9,"&#%d;",val);
+ ent_out[9] = '\0';
+ return ent_out;
+ }
+ }
+ else
+ return NULL;
+}
+
+/* sane default, must be larger, than the longest possible return string,
+ * which is
+ * &#xxx;*/
+#define MIN_BUFFER_SIZE 32
+
+int init_entity_converter(struct entity_conv* conv,const unsigned char* encoding,size_t buffer_size)
+{
+ if(buffer_size < MIN_BUFFER_SIZE) {
+ cli_warnmsg("Entity converter: Supplied buffer size:%lu, smaller than minimum required: %d\n",(unsigned long)buffer_size,MIN_BUFFER_SIZE);
+ return CL_ENULLARG;
+ }
+ if(conv) {
+ conv->encoding = (unsigned char*) cli_strdup("ISO-8859-1");
+ conv->autodetected = OTHER;
+ conv->bom_cnt = 0;
+ conv->buffer_cnt = 0;
+ conv->bytes_read = 0;
+ conv->partial = 0;
+ conv->entity_buffcnt = 0;
+ conv->buffer_size = buffer_size;
+ conv->priority = NOPRIO;
+
+ conv->tmp_area.offset = 0;
+ conv->tmp_area.length = 0;
+ conv->tmp_area.buffer = cli_malloc(buffer_size);
+ if(!conv->tmp_area.buffer) {
+ return CL_EMEM;
+ }
+
+ conv->out_area.offset = 0;
+ conv->out_area.length = 0;
+ conv->out_area.buffer = cli_malloc(buffer_size);
+ if(!conv->out_area.buffer) {
+ free(conv->tmp_area.buffer);
+ return CL_EMEM;
+ }
+
+ conv->norm_area.offset = 0;
+ conv->norm_area.length = 0;
+ conv->norm_area.buffer = cli_malloc(buffer_size);
+ if(!conv->norm_area.buffer) {
+ free(conv->tmp_area.buffer);
+ free(conv->out_area.buffer);
+ return CL_EMEM;
+ }
+
+ conv->ht = &entities_htable;
+ conv->msg_zero_shown = 0;
+
+ return 0;
+ }
+ else
+ return CL_ENULLARG;
+}
+
+static size_t encoding_bytes(const unsigned char* fromcode, enum encodings* encoding)
+{
+ const unsigned char* from = (const unsigned char*) fromcode;
+ /* special case for these unusual byteorders */
+ *encoding=E_OTHER;
+ if(from == UCS4_2143)
+ *encoding = E_UCS4_2134;
+ else if (from == UCS4_3412)
+ *encoding = E_UCS4_3412;
+ else {
+ struct element * e = hashtab_find(&aliases_htable,from,strlen((const char*)fromcode));
+ if(e && e->key) {
+ *encoding = e->data;
+ }
+ }
+
+ switch(*encoding) {
+ case E_UCS4:
+ case E_UCS4_1234:
+ case E_UCS4_4321:
+ case E_UCS4_2134:
+ case E_UCS4_3412:
+ return 4;
+ case E_UTF16:
+ case E_UTF16_BE:
+ case E_UTF16_LE:
+ return 2;
+ case E_UTF8:
+ case E_UNKNOWN:
+ case E_OTHER:
+ default:
+ return 1;
+ }
+ }
+
+#ifndef HAVE_ICONV_H
+typedef struct {
+ enum encodings encoding;
+ size_t size;
+} * iconv_t;
+
+static iconv_t iconv_open(const char *tocode, const char* fromcode)
+{
+ iconv_t iconv = cli_malloc(sizeof(*iconv));
+ if(!iconv)
+ return NULL;
+ /* TODO: check that tocode is UTF16BE */
+ iconv->size = encoding_bytes(fromcode,&iconv->encoding);
+ return iconv;
+}
+
+static int iconv_close(iconv_t cd)
+{
+ if(cd)
+ free(cd);
+ return 0;
+}
+
+
+static int iconv(iconv_t iconv_struct,char **inbuf, size_t *inbytesleft,
+ char** outbuf, size_t *outbytesleft)
+{
+ const size_t maxcopy = (*inbytesleft > *outbytesleft ? *outbytesleft : *inbytesleft) & ~(iconv_struct->size - 1);
+ const uint8_t* input = (const uint8_t*)*inbuf;
+ uint8_t* output = (uint8_t*)*outbuf;
+ size_t i;
+
+ /*,maxcopy is aligned to data size */
+ /* output is always utf16be !*/
+ switch(iconv_struct->encoding) {
+ case E_UCS4:
+ case E_UCS4_1234:
+ {
+ for(i=0;i < maxcopy; i += 4) {
+ if(!input[i+2] && !input[i+3]) {
+ output[i/2] = input[i+1]; /* is compiler smart enough to replace /2, with >>1 ? */
+ output[i/2+1] = input[i];
+ }
+ else {
+ cli_dbgmsg("Warning: unicode character out of utf16 range!\n");
+ output[i/2] = 0xff;
+ output[i/2+1] = 0xff;
+ }
+ }
+ break;
+ }
+ case E_UCS4_4321:
+ {
+ const uint16_t *in = (const uint16_t*)input;/*UCS4_4321, and UTF16_BE have same endianness, no need for byteswap here*/
+ uint16_t *out = (uint16_t*)output;
+ for(i=0;i<maxcopy/2; i+=2) {
+ if(!in[i]) {
+ out[i/2] = in[i+1];
+ }
+ else {
+ out[i/2] = 0xffff;
+ }
+ }
+ break;
+ }
+ case E_UCS4_2134:
+ {
+ const uint16_t *in = (const uint16_t*)input;
+ uint16_t* out = (uint16_t*)output;
+ for(i=0;i<maxcopy/2;i+=2) {
+ if(!in[i+1])
+ out[i/2] = in[i];
+ else
+ out[i/2] = 0xffff;
+ }
+ break;
+ }
+ case E_UCS4_3412:
+ {
+ for(i=0;i < maxcopy;i += 4) {
+ if(!input[i] && !input[i+1]) {
+ output[i/2] = input[i+3];
+ output[i/2+1] = input[i+2];
+ }
+ else {
+ output[i/2] = 0xff;
+ output[i/2+1] = 0xff;
+ }
+ }
+ break;
+ }
+ case E_UTF16:
+ case E_UTF16_LE:
+ {
+ for(i=0;i < maxcopy;i += 2) {
+ output[i] = input[i+1];
+ output[i+1] = input[i];
+ }
+ break;
+ }
+ case E_UTF16_BE:
+ memcpy(output,input,maxcopy);
+ break;
+ case E_UNKNOWN:
+ case E_OTHER:
+ {
+ const size_t max_copy = *inbytesleft > (*outbytesleft/2) ? (*outbytesleft/2) : *inbytesleft;
+ for(i=0;i<max_copy;i++) {
+ output[i*2] = 0;
+ output[i*2+1] = input[i];
+ }
+ *outbytesleft -= max_copy*2;
+ *inbytesleft -= max_copy;
+ *inbuf += max_copy;
+ *outbuf += max_copy*2;
+ if(*inbytesleft)
+ return E2BIG;
+ return 0;
+ }
+ case E_UTF8:
+ {
+ const size_t maxread = *inbytesleft;
+ const size_t maxwrite = *outbytesleft;
+ size_t j;
+ for(i=0,j=0 ; i < maxread && j < maxwrite;) {
+ if(input[i] < 0x7F) {
+ output[j++] = 0;
+ output[j++] = input[i++];
+ }
+ else if( (input[i]&0xE0) == 0xC0 ) {
+ if ((input[i+1]&0xC0) == 0x80) {
+ /* 2 bytes long 110yyyyy zzzzzzzz -> 00000yyy yyzzzzzz*/
+ output[j++] = ((input[i] & 0x1F) >> 2) & 0x07;
+ output[j++] = ((input[i] & 0x1F) << 6) | (input[i+1] & 0x3F);
+ }
+ else {
+ cli_dbgmsg("invalid UTF8 character encountered\n");
+ break;
+ }
+ i+=2;
+ }
+ else if( (input[i]&0xE0) == 0xE0) {
+ if( (input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80) {
+ /* 3 bytes long 1110xxxx 10yyyyyy 10zzzzzzzz -> xxxxyyyy yyzzzzzz*/
+ output[j++] = (input[i] << 4) | ((input[i+1] >> 2) & 0x0F);
+ output[j++] = (input[i+1] << 6) | (input[i+2] & 0x3F);
+ }
+ else {
+ cli_dbgmsg("invalid UTF8 character encountered\n");
+ break;
+ }
+ i+=3;
+ }
+ else if( (input[i]&0xF8) == 0xF0) {
+ if((input[i+1]&0xC0) == 0x80 && (input[i+2]&0xC0) == 0x80 && (input[i+3]&0xC0) == 0x80) {
+ /* 4 bytes long 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz*/
+ cli_dbgmsg("UTF8 character out of UTF16 range encountered");
+ output[j++] = 0xff;
+ output[j++] = 0xff;
+
+ /*out[j++] = ((input[i] & 0x07) << 2) | ((input[i+1] >> 4) & 0x3);
+ out[j++] = (input[i+1] << 4) | ((input[i+2] >> 2) & 0x0F);
+ out[j++] = (input[i+2] << 6) | (input[i+2] & 0x3F);*/
+ }
+ else {
+ cli_dbgmsg("invalid UTF8 character encountered\n");
+ break;
+ }
+ i+=4;
+ }
+ else {
+ cli_dbgmsg("invalid UTF8 character encountered\n");
+ break;
+ }
+ }
+ *inbytesleft -= i;
+ *outbytesleft -= j;
+ *inbuf += i;
+ *outbuf += j;
+ if(*inbytesleft && *outbytesleft) {
+ errno = EILSEQ;/* we had an early exit */
+ return -1;
+ }
+ if(*inbytesleft) {
+ errno = E2BIG;
+ return -1;
+ }
+ return 0;
+ }
+ }
+
+ *outbytesleft -= maxcopy;
+ *inbytesleft -= maxcopy;
+ *inbuf += maxcopy;
+ *outbuf += maxcopy;
+ if(*inbytesleft) {
+ errno = E2BIG;
+ return -1;
+ }
+ return 0;
+}
+
+#else
+
+
+
+#endif
+
+/* new iconv() version */
+static inline void process_bom(struct entity_conv* conv)
+{
+ const unsigned char* bom = conv->bom;
+ const unsigned char* encoding = OTHER;
+ int has_bom = 0;
+ uint8_t enc_bytes = 4;/* default is UTF8, which has a maximum of 4 bytes*/
+
+ switch(bom[0]) {
+ case 0x00:
+ if(bom[1] == 0x00) {
+ if(bom[2] == 0xFE && bom[3] == 0xFF) {
+ encoding = UCS4_1234;/* UCS-4 big-endian*/
+ has_bom = 1;
+ }
+ else if(bom[2] == 0xFF && bom[3] == 0xFE) {
+ encoding = UCS4_2143;/* UCS-4 unusual order 2143 */
+ has_bom = 1;
+ }
+ else if(bom[2] == 0x00 && bom[3] == 0x3C) {
+ encoding = UNDECIDED_32_1234;
+ }
+ else if(bom[2] == 0x3C && bom[3] == 0x00) {
+ encoding = UNDECIDED_32_2143;
+ }
+ }/* 0x00 0x00 */
+ else if(bom[1] == 0x3C) {
+ if(bom[2] == 0x00) {
+ if(bom[3] == 0x00) {
+ encoding = UNDECIDED_32_3412;
+ }
+ else if(bom[3] == 0x3F) {
+ encoding = UNDECIDED_16_BE;
+ enc_bytes = 2;
+ }
+ }/*0x00 0x3C 0x00*/
+ }/*0x00 0x3C*/
+ break;
+ case 0xFF:
+ if(bom[1] == 0xFE) {
+ if(bom[2] == 0x00 && bom[3] == 0x00) {
+ encoding = UCS4_4321;
+ has_bom = 1;
+ }
+ else {
+ encoding = UTF16_LE;
+ has_bom = 1;
+ enc_bytes = 2;
+ }
+ }/*0xFF 0xFE*/
+ break;
+ case 0xFE:
+ if(bom[1] == 0xFF) {
+ if(bom[2] == 0x00 && bom[3] == 0x00) {
+ encoding = UCS4_3412;
+ has_bom = 1;
+ }
+ else {
+ encoding = UTF16_BE;
+ has_bom = 1;
+ enc_bytes = 2;
+ }
+ }/*0xFE 0xFF*/
+ break;
+ case 0xEF:
+ if(bom[1] == 0xBB && bom[2] == 0xBF) {
+ encoding = UTF8;
+ has_bom = 1;
+ /*enc_bytes = 4;- default, maximum 4 bytes*/
+ }/*0xEF 0xBB 0xBF*/
+ break;
+ case 0x3C:
+ if(bom[1] == 0x00) {
+ if(bom[2] == 0x00 && bom[3] == 0x00) {
+ encoding = UNDECIDED_32_4321;
+ }
+ else if(bom[2] == 0x3F && bom[3] == 0x00) {
+ encoding = UNDECIDED_16_LE;
+ enc_bytes = 2;
+ }
+ }/*0x3C 0x00*/
+ else if(bom[1] == 0x3F && bom[2] == 0x78 && bom[3]==0x6D) {
+ encoding = UNDECIDED_8;
+ enc_bytes = 1;
+ }/*0x3C 3F 78 6D*/
+ break;
+ case 0x4C:
+ if(bom[1] == 0x6F && bom[2] == 0xA7 && bom[3] == 0x94) {
+ encoding = EBCDIC;
+ enc_bytes = 1;
+ }/*4C 6F A7 94*/
+ break;
+ }/*switch*/
+ conv->autodetected = encoding;
+ conv->enc_bytes = enc_bytes;
+ conv->has_bom = has_bom;
+}
+
+static unsigned char* normalize_encoding(const unsigned char* enc)
+{
+ unsigned char* norm;
+ size_t i;
+ const size_t len = strlen((const char*)enc);
+ norm = cli_malloc( len+1);
+ if(!norm)
+ return NULL;
+ if(enc == OTHER)
+ enc = (const unsigned char*)"ISO-8859-1";
+ for(i=0;i < strlen((const char*)enc); i++)
+ norm[i] = toupper(enc[i]);
+ norm[len]='\0';
+ return norm;
+}
+
+static const unsigned char* encoding_name(unsigned char* encoding)
+{
+ if(!encoding)
+ return (const unsigned char*)"ISO-8859-1";
+ else
+ return encoding;
+}
+
+void process_encoding_set(struct entity_conv* conv,const unsigned char* encoding,enum encoding_priority prio)
+{
+ unsigned char *tmp_encoding;
+ enum encodings tmp;
+ size_t new_size,old_size;
+
+ cli_dbgmsg("Setting encoding for %p to %s, priority: %d\n",(void*)conv, encoding, prio);
+ if(encoding == OTHER)
+ return;
+ if(conv->priority == CONTENT_TYPE)
+ return;/* Content-type in header is highest priority, no overrides possible*/
+ if(conv->priority == BOM && prio == NOBOM_AUTODETECT)
+ return;
+
+ tmp_encoding = normalize_encoding(encoding);/* FIXME: better obey priorities*/
+ if(prio == META) {
+ old_size = encoding_bytes(conv->encoding,&tmp);
+ new_size = encoding_bytes(tmp_encoding,&tmp);
+ if(old_size != new_size) {
+ /* on x86 gcc wants %u for size_t, on x86_64 it wants %lu for size_t. So just cast to unsigned long to make warnings go away. */
+ cli_dbgmsg("process_encoding_set: refusing to override encoding - new encoding size differs: %s(%lu) != %s(%lu)\n",conv->encoding,(unsigned long)old_size,tmp_encoding,(unsigned long)new_size);
+ free(tmp_encoding);
+ return;
+ }
+ }
+ free(conv->encoding);
+ conv->encoding = tmp_encoding;
+ cli_dbgmsg("New encoding for %p:%s\n",(void*)conv,conv->encoding);
+ /* reset stream */
+}
+
+static int encoding_norm_done(struct entity_conv* conv)
+{
+ if(conv->encoding) {
+ free(conv->encoding);
+ conv->encoding = NULL;
+ }
+ conv->buffer_size = 0;
+ if(conv->tmp_area.buffer) {
+ free(conv->tmp_area.buffer);
+ conv->tmp_area.buffer = NULL;
+ }
+ if(conv->out_area.buffer) {
+ free(conv->out_area.buffer);
+ conv->out_area.buffer = NULL;
+ }
+ if(conv->norm_area.buffer) {
+ free(conv->norm_area.buffer);
+ conv->norm_area.buffer = NULL;
+ }
+ return 0;
+}
+
+int entity_norm_done(struct entity_conv* conv)
+{
+ return encoding_norm_done(conv);
+}
+
+static size_t read_raw(FILE *stream, m_area_t *m_area, int max_len, unsigned char* outbuff)
+{
+
+ /* Try and use the memory buffer first */
+ if (m_area) {
+ size_t area_maxcopy;
+ const unsigned char* src;
+ size_t copied;
+ if(m_area->offset >= m_area->length)
+ return 0;
+ area_maxcopy = (m_area->length > m_area->offset + max_len) ? max_len : m_area->length - m_area->offset;
+ src = m_area->buffer + m_area->offset;
+ m_area->offset += area_maxcopy;
+ copied = area_maxcopy;
+ while(area_maxcopy && *src != '\n') {
+ *outbuff++ = *src++;
+ area_maxcopy--;
+ }
+ if(area_maxcopy > 3) {
+ /*copy 3 more bytes, just in case its ucs4 */
+ *outbuff++ = *src++;
+ *outbuff++ = *src++;
+ *outbuff++ = *src++;
+ area_maxcopy -= 3;
+ }
+ m_area->offset -= area_maxcopy;
+ copied -= area_maxcopy;
+ return copied;
+ } else {
+ if (!stream) {
+ cli_dbgmsg("No HTML stream\n");
+ return 0;
+ }
+ else {
+ const size_t iread = fread(outbuff, 1, max_len, stream);
+ size_t i;
+ if(ferror(stream)) {
+ cli_errmsg("Error while reading HTML stream\n");
+ }
+ for(i=0; i < iread; i++)
+ if(outbuff[i] == '\n') {
+ return i+3 > iread ? iread : i+3;
+ }
+ return iread;
+ }
+ }
+}
+
+static void output_first(struct entity_conv* conv,unsigned char** out, unsigned char** in,size_t* inleft)
+{
+ if(conv->has_bom) {
+ switch(conv->enc_bytes) {
+ case 1:
+ if(conv->autodetected == UTF8) {
+ *in += 3;
+ *inleft -= 3;
+ }
+ break;
+ case 2:
+ *in += 2;
+ *inleft -= 2;
+ break;
+ case 4:
+ *in += 4;
+ *inleft -= 4;
+ break;
+ }
+ }
+}
+
+/* sarge leaks on iconv_open/iconv_close, so lets not open/close so many times,
+ * just keep on each thread its own pool of iconvs*/
+
+struct iconv_cache {
+ iconv_t* tab;
+ size_t len;
+ size_t last;
+ struct hashtable hashtab;
+};
+
+static void iconv_cache_init(struct iconv_cache* cache)
+{
+/* cache->tab = NULL;
+ cache->len = 0;
+ cache->used = 0; - already done by memset*/
+ cli_dbgmsg("Initializing iconv pool:%p\n",(void*)cache);
+ hashtab_init(&cache->hashtab, 32);
+}
+
+static void iconv_cache_destroy(struct iconv_cache* cache)
+{
+ size_t i;
+ cli_dbgmsg("Destroying iconv pool:%p\n",(void*)cache);
+ for(i=0;i < cache->last;i++) {
+ cli_dbgmsg("closing iconv:%p\n",cache->tab[i]);
+ iconv_close(cache->tab[i]);
+ }
+ hashtab_clear(&cache->hashtab);
+ free(cache->hashtab.htable);
+ free(cache->tab);
+ free(cache);
+}
+
+
+#ifdef CL_THREAD_SAFE
+static pthread_key_t iconv_pool_tls_key;
+static pthread_once_t iconv_pool_tls_key_once = PTHREAD_ONCE_INIT;
+
+/* destructor called for all threads that exit via pthread_exit, or cancellation. Unfortunately that doesn't include
+ * the main thread, so we have to call this manually for the main thread.*/
+
+static int cache_atexit_registered = 0;
+
+static void iconv_pool_tls_instance_destroy(void* ptr)
+{
+ if(ptr) {
+ iconv_cache_destroy(ptr);
+ }
+}
+
+static void iconv_cache_cleanup_main(void)
+{
+ struct iconv_cache* cache = pthread_getspecific(iconv_pool_tls_key);
+ if(cache) {
+ iconv_pool_tls_instance_destroy(cache);
+ pthread_setspecific(iconv_pool_tls_key,NULL);
+ }
+ pthread_key_delete(iconv_pool_tls_key);
+}
+
+static void iconv_pool_tls_key_alloc(void)
+{
+ pthread_key_create(&iconv_pool_tls_key, iconv_pool_tls_instance_destroy);
+ if(!cache_atexit_registered) {
+ cli_dbgmsg("iconv:registering atexit\n");
+ if(atexit(iconv_cache_cleanup_main)) {
+ cli_dbgmsg("failed to register atexit\n");
+ }
+ cache_atexit_registered = 1;
+ }
+}
+
+static void init_iconv_pool_ifneeded(void)
+{
+ pthread_once(&iconv_pool_tls_key_once, iconv_pool_tls_key_alloc);
+}
+
+static inline struct iconv_cache* cache_get_tls_instance(void)
+{
+ struct iconv_cache* cache = pthread_getspecific(iconv_pool_tls_key);
+ if(!cache) {
+ cache = cli_calloc(1,sizeof(*cache));
+ if(!cache) {
+ cli_dbgmsg("!Out of memory allocating TLS iconv instance\n");
+ return NULL;
+ }
+ iconv_cache_init(cache);
+ pthread_setspecific(iconv_pool_tls_key, cache);
+ }
+ return cache;
+}
+
+#else
+
+static struct iconv_cache* global_iconv_cache = NULL;
+static int iconv_global_inited = 0;
+
+
+static void iconv_cache_cleanup_main(void)
+{
+ iconv_cache_destroy(global_iconv_cache);
+}
+
+static inline void init_iconv_pool_ifneeded()
+{
+ if(!iconv_global_inited) {
+ global_iconv_cache = cli_calloc(1,sizeof(*global_iconv_cache));
+ if(global_iconv_cache) {
+ iconv_cache_init(global_iconv_cache);
+ atexit(iconv_cache_cleanup_main);
+ iconv_global_inited = 1;
+ }
+ }
+}
+
+
+static inline struct iconv_cache* cache_get_tls_instance(void)
+{
+ return global_iconv_cache;
+}
+
+#endif
+
+static iconv_t iconv_open_cached(const unsigned char* fromcode)
+{
+ struct iconv_cache * cache;
+ size_t idx;
+ const size_t fromcode_len = strlen((const char*)fromcode);
+ struct element * e;
+ iconv_t iconv_struct;
+
+ init_iconv_pool_ifneeded();
+ cache = cache_get_tls_instance();/* gets TLS iconv pool */
+ if(!cache) {
+ cli_dbgmsg("!Unable to get TLS iconv cache!\n");
+ errno = EINVAL;
+ return (iconv_t)-1;
+ }
+
+ e = hashtab_find(&cache->hashtab, fromcode, fromcode_len);
+ if(e && (e->data < 0 || (size_t)e->data > cache->len)) {
+ e = NULL;
+ }
+ if(e) {
+ return cache->tab[e->data];
+ }
+ cli_dbgmsg("iconv not found in cache, for encoding:%s\n",fromcode);
+ iconv_struct = iconv_open("UTF-16BE",(const char*)fromcode);
+ if(iconv_struct != (iconv_t)-1) {
+ idx = cache->last++;
+ if(idx >= cache->len) {
+ cache->len += 16;
+ cache->tab = cli_realloc2(cache->tab, cache->len*sizeof(cache->tab[0]));
+ if(!cache->tab) {
+ cli_dbgmsg("!Out of mem in iconv-pool\n");
+ errno = ENOMEM;
+ return (iconv_t)-1;
+ }
+ }
+
+ hashtab_insert(&cache->hashtab, fromcode, fromcode_len, idx);
+ cache->tab[idx] = iconv_struct;
+ cli_dbgmsg("iconv_open(),for:%s -> %p\n",fromcode,(void*)cache->tab[idx]);
+ return cache->tab[idx];
+}
+ return (iconv_t)-1;
+}
+
+
+/* tmp_m_area and conv->out_area are of size maxlen */
+unsigned char* encoding_norm_readline(struct entity_conv* conv, FILE* stream_in, m_area_t* in_m_area, const size_t maxlen)
+{
+ if(!conv || !conv->out_area.buffer || !conv->tmp_area.buffer || maxlen<2 )
+ return NULL;
+ else {
+ /* stream_in|in_m_area ->(read_raw) conv->tmp_area -> (iconv) conv->out_area -> (normalize) conv->norm_area -> (cli_readline) return value*/
+ const size_t tmp_move = conv->tmp_area.length - conv->tmp_area.offset;
+ const size_t tmp_available = conv->buffer_size - tmp_move;
+ const size_t max_read = maxlen < tmp_available ? maxlen : tmp_available;
+ unsigned char* tmpbuff = &conv->tmp_area.buffer[tmp_move];
+
+ const size_t out_move = conv->out_area.length < conv->out_area.offset ? 0 : conv->out_area.length - conv->out_area.offset;
+ size_t outleft = conv->buffer_size - out_move;
+ unsigned char* out = &conv->out_area.buffer[out_move];
+
+ const size_t norm_move = conv->norm_area.length - conv->norm_area.offset;
+
+ unsigned char* norm;
+ const unsigned char* norm_end;
+ iconv_t iconv_struct;
+
+ size_t rc, inleft;
+ ssize_t i;
+
+ signed char alignfix;
+
+ /* move whatever left in conv->tmp_area to beginning */
+ if(tmp_move)
+ memmove(conv->tmp_area.buffer, conv->tmp_area.buffer + conv->tmp_area.offset, tmp_move);
+ conv->tmp_area.offset = 0;
+
+ /* read raw data from stream, or in_m_area into conv->tmp_area*/
+ conv->tmp_area.length = tmp_move + read_raw(stream_in, in_m_area, max_read, tmpbuff);
+
+ /* move whatever left in conv->out_area to beginning */
+ if(out_move)
+ memmove(conv->out_area.buffer, conv->out_area.buffer + conv->out_area.offset, out_move);
+ conv->out_area.offset = 0;
+
+ tmpbuff = conv->tmp_area.buffer;
+ inleft = conv->tmp_area.length;
+ if(!conv->bom_cnt && conv->tmp_area.length >= 4) {/* detect Byte Order Mark */
+ memcpy( conv->bom, tmpbuff, 4);
+ process_bom(conv);
+ process_encoding_set(conv,conv->autodetected,conv->has_bom ? BOM : NOBOM_AUTODETECT);
+ output_first(conv,&out,&tmpbuff,&inleft);
+ conv->bom_cnt++;
+ }
+
+ /* convert encoding conv->tmp_area. conv->out_area */
+ alignfix = inleft%4;/* iconv gives an error if we give him 3 bytes to convert,
+ and we are using ucs4, ditto for utf16, and 1 byte*/
+ inleft -= alignfix;
+
+ if(!inleft && alignfix) {
+ size_t k;
+ for(k=0;k+alignfix < 4;k++)
+ tmpbuff[alignfix+k] = '\0';
+ inleft = 4;
+ alignfix = -inleft;
+ }
+
+ iconv_struct = iconv_open_cached(encoding_name(conv->encoding));
+
+ if(iconv_struct == (iconv_t)-1) {
+ cli_dbgmsg("Iconv init problem for encoding:%s, falling back to iso encoding!\n",encoding_name(conv->encoding));
+ /* message shown only once/file */
+ /* what can we do? just fall back for it being an ISO-8859-1 */
+ free(conv->encoding);
+ conv->encoding = (unsigned char*) cli_strdup("ISO-8859-1");
+ iconv_struct = iconv_open_cached(conv->encoding);
+ if(iconv_struct == (iconv_t)-1) {
+ cli_dbgmsg("fallback failed... bail out\n");
+ return cli_readline(NULL,&conv->tmp_area,maxlen);
+ }
+ }
+
+ if(inleft && outleft > conv->buffer_size/2 ) /* iconv doesn't like inleft to be 0 */ {
+ rc = iconv(iconv_struct, (char**) &tmpbuff, &inleft, (char**) &out, &outleft);
+ }
+ else
+ rc = 0;
+
+#if 0
+ iconv_close(iconv_struct);/* - don't close, we are using a cached instance */
+#endif
+
+ if(rc==(size_t)-1 && errno != E2BIG) {
+ cli_dbgmsg("iconv error:%s, silently resuming (%ld,%ld,%lu,%lu)\n",strerror(errno),(long)(out-conv->out_area.buffer),(long)(tmpbuff-conv->tmp_area.buffer),(unsigned long)inleft,(unsigned long)outleft);
+ /* output raw byte, and resume at next byte */
+ *out++ = 0;
+ *out++ = *tmpbuff++;
+ inleft--;
+/* return cli_readline(NULL, &conv->norm_area, maxlen);*/
+ }
+
+ conv->tmp_area.length = inleft + (alignfix > 0 ? alignfix : 0);
+ conv->out_area.length = out - conv->out_area.buffer - out_move;
+
+ conv->tmp_area.offset = tmpbuff - conv->tmp_area.buffer;
+ conv->tmp_area.length += conv->tmp_area.offset;
+
+
+ /* move whatever left in conv->norm_area to beginning */
+ if(norm_move) {
+ if(norm_move < conv->buffer_size/2) {
+ memmove(conv->norm_area.buffer, conv->norm_area.buffer + conv->norm_area.offset, norm_move);
+ conv->norm_area.offset = 0;
+ norm = conv->norm_area.buffer + norm_move;
+ }
+ else {
+ /* don't modify offset here */
+ norm = conv->norm_area.buffer + conv->norm_area.length;
+ }
+ }
+ else {
+ conv->norm_area.offset = 0;
+ norm = conv->norm_area.buffer;
+ }
+
+ /* now do the real normalization */
+ out = conv->out_area.buffer;/* skip over utf16 bom, FIXME: check if iconv really outputted a BOM */
+ norm_end = conv->norm_area.buffer + conv->buffer_size;
+ if(conv->out_area.length>0 && out[0] == 0xFF && out[1] == 0xFE)
+ i = 2;
+ else
+ i = 0;
+ for(; i < conv->out_area.length; i += 2) {
+ uint16_t u16 = ( ((uint16_t)out[i]) << 8 ) | out[i+1];
+ if(!u16) {
+ if(alignfix >= 0 && !conv->msg_zero_shown) /* if alignfix is negative, this 0 byte is on-purpose, its padding */ {
+ conv->msg_zero_shown = 1;
+ cli_dbgmsg("Skipping null character in html stream\n");
+ }
+ }
+ else if(u16 < 0x80) {
+ if(norm >= norm_end)
+ break;
+ if((unsigned char)u16 ==0)
+ cli_dbgmsg("Impossible\n");
+ *norm++ = (unsigned char)u16;
+ }
+ else if (u16 == 160) {/*nbsp*/
+ if(norm >= norm_end)
+ break;
+ *norm++ = 0x20;
+ }
+ else {
+ char buff[10];
+ int len;
+
+ snprintf(buff,9,"&#%d;",u16);
+ buff[9] = '\0';
+ len = strlen(buff);
+ if((norm_end - norm) <= len)
+ /* prevent buffer overflow */
+ break;
+ memcpy((char*)norm, buff, len);
+ norm += len;
+ }
+ }
+ conv->out_area.offset = i; /* so that we can resume next time from here */
+
+ conv->norm_area.length = norm - conv->norm_area.buffer;
+/*
+ conv->norm_area.buffer[conv->buffer_size-1]=0;DONT DO THIS
+ if( (o =strstr(conv->norm_area.buffer,"Content")) && strstr(conv->norm_area.buffer,"text/x-"))
+ printf("%s\n",o);*/
+ /* final cli_readline from conv->norm_area */
+ return cli_readline(NULL, &conv->norm_area, maxlen);
+ }
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_filetypes.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,405 @@
+/*
+ * Copyright (C) 2002 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ * With enhancements from Thomas Lamy <Thomas.Lamy at in-online.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "filetypes.h"
+#include "others.h"
+#include "readdb.h"
+#include "matcher-ac.h"
+#include "str.h"
+
+#include "htmlnorm.h"
+#include "entconv.h"
+
+struct cli_magic_s {
+ size_t offset;
+ const char *magic;
+ size_t length;
+ const char *descr;
+ cli_file_t type;
+};
+
+struct cli_smagic_s {
+ const char *sig;
+ const char *descr;
+ cli_file_t type;
+};
+
+static const struct cli_magic_s cli_magic[] = {
+
+ /* Executables */
+
+ {0, "MZ", 2, "DOS/W32 executable/library/driver", CL_TYPE_MSEXE},
+ {0, "\177ELF", 4, "ELF", CL_TYPE_ELF},
+
+ /* Archives */
+
+ {0, "Rar!", 4, "RAR", CL_TYPE_RAR},
+ {0, "PK\003\004", 4, "ZIP", CL_TYPE_ZIP},
+ {0, "PK00PK\003\004", 8, "ZIP", CL_TYPE_ZIP},
+ {0, "\037\213", 2, "GZip", CL_TYPE_GZ},
+ {0, "BZh", 3, "BZip", CL_TYPE_BZ},
+ {0, "\x60\xea", 2, "ARJ", CL_TYPE_ARJ},
+ {0, "SZDD", 4, "compress.exe'd", CL_TYPE_MSSZDD},
+ {0, "MSCF", 4, "MS CAB", CL_TYPE_MSCAB},
+ {0, "ITSF", 4, "MS CHM", CL_TYPE_MSCHM},
+ {8, "\x19\x04\x00\x10", 4, "SIS", CL_TYPE_SIS},
+ {0, "#@~^", 4, "SCRENC", CL_TYPE_SCRENC},
+ {0, "(This file must be converted with BinHex 4.0)",
+ 45, "BinHex", CL_TYPE_BINHEX},
+
+ /* Mail */
+
+ {0, "From ", 5, "MBox", CL_TYPE_MAIL},
+ {0, "Received: ", 10, "Raw mail", CL_TYPE_MAIL},
+ {0, "Return-Path: ", 13, "Maildir", CL_TYPE_MAIL},
+ {0, "Return-path: ", 13, "Maildir", CL_TYPE_MAIL},
+ {0, "Delivered-To: ", 14, "Mail", CL_TYPE_MAIL},
+ {0, "X-UIDL: ", 8, "Mail", CL_TYPE_MAIL},
+ {0, "X-Apparently-To: ", 17, "Mail", CL_TYPE_MAIL},
+ {0, "X-Envelope-From: ", 17, "Mail", CL_TYPE_MAIL},
+ {0, "X-Original-To: ", 15, "Mail", CL_TYPE_MAIL},
+ {0, "X-Symantec-", 11, "Symantec", CL_TYPE_MAIL},
+ {0, "X-EVS", 5, "EVS mail", CL_TYPE_MAIL},
+ {0, "X-Real-To: ", 11, "Mail", CL_TYPE_MAIL},
+ {0, "X-Sieve: ", 9, "Mail", CL_TYPE_MAIL},
+ {0, ">From ", 6, "Mail", CL_TYPE_MAIL},
+ {0, "Date: ", 6, "Mail", CL_TYPE_MAIL},
+ {0, "Message-Id: ", 12, "Mail", CL_TYPE_MAIL},
+ {0, "Message-ID: ", 12, "Mail", CL_TYPE_MAIL},
+ {0, "Envelope-to: ", 13, "Mail", CL_TYPE_MAIL},
+ {0, "Delivery-date: ", 15, "Mail", CL_TYPE_MAIL},
+ {0, "To: ", 4, "Mail", CL_TYPE_MAIL},
+ {0, "Subject: ", 9, "Mail", CL_TYPE_MAIL},
+ {0, "For: ", 5, "Eserv mail", CL_TYPE_MAIL},
+ {0, "From: ", 6, "Exim mail", CL_TYPE_MAIL},
+ {0, "v:\015\012Received: ", 14, "VPOP3 Mail (DOS)", CL_TYPE_MAIL},
+ {0, "v:\012Received: ", 13, "VPOP3 Mail (UNIX)", CL_TYPE_MAIL},
+ {0, "Hi. This is the qmail-send", 26, "Qmail bounce", CL_TYPE_MAIL},
+ {0, "\170\237\076\042", 4, "TNEF", CL_TYPE_TNEF},
+
+ {0, "begin ", 6, "UUencoded", CL_TYPE_UUENCODED},
+ {0, "\041\102\104\116", 4, "PST", CL_TYPE_PST},
+
+ /* Graphics (may contain exploits against MS systems) */
+
+ {0, "GIF", 3, "GIF", CL_TYPE_GRAPHICS},
+ {0, "BM", 2, "BMP", CL_TYPE_GRAPHICS},
+ {0, "\377\330\377", 3, "JPEG", CL_TYPE_GRAPHICS},
+ {6, "JFIF", 4, "JPEG", CL_TYPE_GRAPHICS},
+ {6, "Exif", 4, "JPEG", CL_TYPE_GRAPHICS},
+ {0, "\x89PNG", 4, "PNG", CL_TYPE_GRAPHICS},
+ {0, "RIFF", 4, "RIFF", CL_TYPE_RIFF},
+ {0, "RIFX", 4, "RIFX", CL_TYPE_RIFF},
+
+ /* Others */
+
+ {0, "\320\317\021\340\241\261\032\341", 8, "OLE2 container", CL_TYPE_MSOLE2},
+ {0, "%PDF-", 5, "PDF document", CL_TYPE_PDF},
+ {0, "\266\271\254\256\376\377\377\377", 8, "CryptFF", CL_TYPE_CRYPTFF},
+ {0, "{\\rtf", 5, "RTF", CL_TYPE_RTF},
+
+ /* Ignored types */
+
+ {0, "\000\000\001\263", 4, "MPEG video stream", CL_TYPE_DATA},
+ {0, "\000\000\001\272", 4, "MPEG sys stream", CL_TYPE_DATA},
+ {0, "OggS", 4, "Ogg Stream", CL_TYPE_DATA},
+ {0, "ID3", 3, "MP3", CL_TYPE_DATA},
+ {0, "\377\373\220", 3, "MP3", CL_TYPE_DATA},
+ {0, "%!PS-Adobe-", 11, "PostScript", CL_TYPE_DATA},
+ {0, "\060\046\262\165\216\146\317", 7, "WMA/WMV/ASF", CL_TYPE_DATA},
+ {0, ".RMF" , 4, "Real Media File", CL_TYPE_DATA},
+
+ {0, NULL, 0, NULL, CL_TYPE_UNKNOWN_DATA}
+};
+
+static const struct cli_smagic_s cli_smagic[] = {
+
+ /* "\nFrom: " * "\nContent-Type: " */
+ {"0a46726f6d3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+ /* "\nReceived: " * "\nContent-Type: " */
+ {"0a52656365697665643a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+ /* "\nReceived: " * "\nContent-type: " */
+ {"0a52656365697665643a20{-2048}0a436f6e74656e742d747970653a20", "Mail file", CL_TYPE_MAIL},
+
+ /* "MIME-Version: " * "\nContent-Type: " */
+ {"4d494d452d56657273696f6e3a20{-2048}0a436f6e74656e742d547970653a20", "Mail file", CL_TYPE_MAIL},
+
+ /* remember the matcher is case sensitive */
+ {"3c62723e", "HTML data", CL_TYPE_HTML}, /* <br> */
+ {"3c42723e", "HTML data", CL_TYPE_HTML}, /* <Br> */
+ {"3c42523e", "HTML data", CL_TYPE_HTML}, /* <BR> */
+ {"3c703e", "HTML data", CL_TYPE_HTML}, /* <p> */
+ {"3c503e", "HTML data", CL_TYPE_HTML}, /* <P> */
+ {"68726566", "HTML data", CL_TYPE_HTML}, /* href */
+ {"48726566", "HTML data", CL_TYPE_HTML}, /* Href */
+ {"48524546", "HTML data", CL_TYPE_HTML}, /* HREF */
+ {"3c68746d6c3e", "HTML data", CL_TYPE_HTML}, /* <html> */
+ {"3c48544d4c3e", "HTML data", CL_TYPE_HTML}, /* <HTML> */
+ {"3c48746d6c3e", "HTML data", CL_TYPE_HTML}, /* <Html> */
+ {"3c686561643e", "HTML data", CL_TYPE_HTML}, /* <head> */
+ {"3c484541443e", "HTML data", CL_TYPE_HTML}, /* <HEAD> */
+ {"3c486561643e", "HTML data", CL_TYPE_HTML}, /* <Head> */
+ {"3c666f6e74", "HTML data", CL_TYPE_HTML}, /* <font */
+ {"3c466f6e74", "HTML data", CL_TYPE_HTML}, /* <Font */
+ {"3c464f4e54", "HTML data", CL_TYPE_HTML}, /* <FONT */
+ {"3c696d67", "HTML data", CL_TYPE_HTML}, /* <img */
+ {"3c494d47", "HTML data", CL_TYPE_HTML}, /* <IMG */
+ {"3c496d67", "HTML data", CL_TYPE_HTML}, /* <Img */
+ {"3c736372697074", "HTML data", CL_TYPE_HTML}, /* <script */
+ {"3c536372697074", "HTML data", CL_TYPE_HTML}, /* <Script */
+ {"3c534352495054", "HTML data", CL_TYPE_HTML}, /* <SCRIPT */
+ {"3c6f626a656374", "HTML data", CL_TYPE_HTML}, /* <object */
+ {"3c4f626a656374", "HTML data", CL_TYPE_HTML}, /* <Object */
+ {"3c4f424a454354", "HTML data", CL_TYPE_HTML}, /* <OBJECT */
+ {"3c696672616d65", "HTML data", CL_TYPE_HTML}, /* <iframe */
+ {"3c494652414d45", "HTML data", CL_TYPE_HTML}, /* <IFRAME */
+ {"3c7461626c65", "HTML data", CL_TYPE_HTML}, /* <table */
+ {"3c5441424c45", "HTML data", CL_TYPE_HTML}, /* <TABLE */
+
+ {"526172211a0700", "RAR-SFX", CL_TYPE_RARSFX},
+ {"504b0304", "ZIP-SFX", CL_TYPE_ZIPSFX},
+ {"4d534346", "CAB-SFX", CL_TYPE_CABSFX},
+ {"60ea{7}0002", "ARJ-SFX", CL_TYPE_ARJSFX},
+ {"60ea{7}0102", "ARJ-SFX", CL_TYPE_ARJSFX},
+ {"60ea{7}0202", "ARJ-SFX", CL_TYPE_ARJSFX},
+ {"efbeadde4e756c6c736f6674496e7374", "NSIS", CL_TYPE_NULSFT},
+ {"a3484bbe986c4aa9994c530a86d6487d41553321454130(35|36)", "AUTOIT", CL_TYPE_AUTOIT},
+
+ {"4d5a{60-300}50450000", "PE", CL_TYPE_MSEXE},
+
+ {NULL, NULL, CL_TYPE_UNKNOWN_DATA}
+};
+
+static char internat[256] = {
+ /* TODO: Remember to buy a beer to Joerg Wunsch <joerg at FreeBSD.ORG> */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, /* 0x0X */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, /* 0x1X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x2X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x3X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x4X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x5X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x6X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 0x7X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x8X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x9X */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xaX */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xbX */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xcX */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xdX */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0xeX */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 /* 0xfX */
+};
+
+cli_file_t cli_filetype(const unsigned char *buf, size_t buflen)
+{
+ int i, text = 1, len;
+
+
+ for(i = 0; cli_magic[i].magic; i++) {
+ if(buflen >= cli_magic[i].offset+cli_magic[i].length) {
+ if(memcmp(buf+cli_magic[i].offset, cli_magic[i].magic, cli_magic[i].length) == 0) {
+ cli_dbgmsg("Recognized %s file\n", cli_magic[i].descr);
+ return cli_magic[i].type;
+ }
+ }
+ }
+
+/* improve or drop this code
+ * https://wwws.clamav.net/bugzilla/show_bug.cgi?id=373
+ *
+ buflen < 25 ? (len = buflen) : (len = 25);
+ for(i = 0; i < len; i++)
+ if(!iscntrl(buf[i]) && !isprint(buf[i]) && !internat[buf[i] & 0xff]) {
+ text = 0;
+ break;
+ }
+*/
+ return text ? CL_TYPE_UNKNOWN_TEXT : CL_TYPE_UNKNOWN_DATA;
+}
+
+int is_tar(unsigned char *buf, unsigned int nbytes);
+
+cli_file_t cli_filetype2(int desc, const struct cl_engine *engine)
+{
+ unsigned char smallbuff[MAGIC_BUFFER_SIZE + 1], *decoded, *bigbuff;
+ int bread, sret;
+ cli_file_t ret = CL_TYPE_UNKNOWN_DATA;
+ struct cli_matcher *root;
+ struct cli_ac_data mdata;
+
+
+ memset(smallbuff, 0, sizeof(smallbuff));
+ if((bread = read(desc, smallbuff, MAGIC_BUFFER_SIZE)) > 0)
+ ret = cli_filetype(smallbuff, bread);
+
+ if(engine && ret == CL_TYPE_UNKNOWN_TEXT) {
+ root = engine->root[0];
+ if(!root)
+ return ret;
+
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+ return ret;
+
+ sret = cli_ac_scanbuff(smallbuff, bread, NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+
+ cli_ac_freedata(&mdata);
+
+ if(sret >= CL_TYPENO) {
+ ret = sret;
+ } else {
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+ return ret;
+
+ decoded = (unsigned char *) cli_utf16toascii((char *) smallbuff, bread);
+ if(decoded) {
+ sret = cli_ac_scanbuff(decoded, strlen((char *) decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+ free(decoded);
+ if(sret == CL_TYPE_HTML)
+ ret = CL_TYPE_HTML_UTF16;
+ }
+ cli_ac_freedata(&mdata);
+
+ if((((struct cli_dconf*) engine->dconf)->phishing & PHISHING_CONF_ENTCONV) && ret != CL_TYPE_HTML_UTF16) {
+ struct entity_conv conv;
+ const size_t conv_size = 2*bread < 256 ? 256 : 2*bread;
+
+ if(init_entity_converter(&conv,UNKNOWN,conv_size) == 0) {
+ int end = 0;
+ m_area_t area;
+ area.buffer = (unsigned char *) smallbuff;
+ area.length = bread;
+ area.offset = 0;
+
+ while(!end) {
+ if(cli_ac_initdata(&mdata, root->ac_partsigs, AC_DEFAULT_TRACKLEN))
+ return ret;
+
+ decoded = encoding_norm_readline(&conv, NULL, &area, bread);
+
+ if(decoded) {
+ sret = cli_ac_scanbuff(decoded, strlen((const char *) decoded), NULL, engine->root[0], &mdata, 1, 0, 0, -1, NULL);
+ free(decoded);
+ if(sret == CL_TYPE_HTML) {
+ ret = CL_TYPE_HTML;
+ end = 1;
+ }
+ } else
+ end = 1;
+
+ cli_ac_freedata(&mdata);
+ }
+
+ entity_norm_done(&conv);
+
+ } else {
+ cli_warnmsg("cli_filetype2: Error initializing entity converter\n");
+ }
+ }
+ }
+ }
+
+ if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {
+
+ if(!(bigbuff = (unsigned char *) cli_calloc(37638 + 1, sizeof(unsigned char))))
+ return ret;
+
+ lseek(desc, 0, SEEK_SET);
+ if((bread = read(desc, bigbuff, 37638)) > 0) {
+
+ bigbuff[bread] = 0;
+
+ switch(is_tar(bigbuff, bread)) {
+ case 1:
+ ret = CL_TYPE_OLD_TAR;
+ cli_dbgmsg("Recognized old fashioned tar file\n");
+ break;
+ case 2:
+ ret = CL_TYPE_POSIX_TAR;
+ cli_dbgmsg("Recognized POSIX tar file\n");
+ break;
+ }
+ }
+
+ if(ret == CL_TYPE_UNKNOWN_DATA || ret == CL_TYPE_UNKNOWN_TEXT) {
+
+ if(!memcmp(bigbuff + 32769, "CD001" , 5) || !memcmp(bigbuff + 37633, "CD001" , 5)) {
+ cli_dbgmsg("Recognized ISO 9660 CD-ROM data\n");
+ ret = CL_TYPE_DATA;
+ } else if(!memcmp(bigbuff + 32776, "CDROM" , 5)) {
+ cli_dbgmsg("Recognized High Sierra CD-ROM data\n");
+ ret = CL_TYPE_DATA;
+ }
+ }
+
+ free(bigbuff);
+ }
+
+ return ret;
+}
+
+int cli_addtypesigs(struct cl_engine *engine)
+{
+ int i, ret;
+ struct cli_matcher *root;
+
+
+ if(!engine->root[0]) {
+ cli_dbgmsg("cli_addtypesigs: Need to allocate AC trie in engine->root[0]\n");
+ root = engine->root[0] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
+ if(!root) {
+ cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
+ return CL_EMEM;
+ }
+
+ if((ret = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
+ /* No need to free previously allocated memory here - all engine
+ * elements will be properly freed by cl_free()
+ */
+ cli_errmsg("cli_addtypesigs: Can't initialise AC pattern matcher\n");
+ return ret;
+ }
+ } else {
+ root = engine->root[0];
+ }
+
+ for(i = 0; cli_smagic[i].sig; i++) {
+ if((ret = cli_parse_add(root, cli_smagic[i].descr, cli_smagic[i].sig, cli_smagic[i].type, NULL, 0))) {
+ cli_errmsg("cli_addtypesigs: Problem adding signature for %s\n", cli_smagic[i].descr);
+ return ret;
+ }
+ }
+
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_fsg.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+/*
+** defsg.c
+**
+** 02/08/2k4 - Dumped and reversed
+** 02/08/2k4 - Done coding
+** 03/08/2k4 - Cleaning and securing
+** 04/08/2k4 - Done porting
+** 07/08/2k4 - Started adding support for 1.33
+*/
+
+/*
+** Unpacks an FSG compressed section.
+**
+** Czesc bart, good asm, nice piece of code ;)
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdlib.h>
+
+#include "cltypes.h"
+#include "rebuildpe.h"
+#include "others.h"
+#include "packlibs.h"
+#include "fsg.h"
+
+int unfsg_200(char *source, char *dest, int ssize, int dsize, uint32_t rva, uint32_t base, uint32_t ep, int file) {
+ char *tsrc;
+ struct cli_exe_section section; /* Yup, just one ;) */
+
+ if ( cli_unfsg(source, dest, ssize, dsize, NULL, NULL) ) return -1;
+
+ section.raw=0;
+ section.rsz = dsize;
+ section.vsz = dsize;
+ section.rva = rva;
+
+ if (!cli_rebuildpe(dest, §ion, 1, base, ep, 0, 0, file)) {
+ cli_dbgmsg("FSG: Rebuilding failed\n");
+ return 0;
+ }
+ return 1;
+}
+
+
+int unfsg_133(char *source, char *dest, int ssize, int dsize, struct cli_exe_section *sections, int sectcount, uint32_t base, uint32_t ep, int file) {
+ char *tsrc=source, *tdst=dest;
+ int i, upd=1, offs=0, lastsz=dsize;
+
+ for (i = 0 ; i <= sectcount ; i++) {
+ char *startd=tdst;
+ if ( cli_unfsg(tsrc, tdst, ssize - (tsrc - source), dsize - (tdst - dest), &tsrc, &tdst) == -1 )
+ return -1;
+
+ /* RVA has been filled already in pe.c */
+ sections[i].raw=offs;
+ sections[i].rsz=tdst-startd;
+ /* cli_dbgmsg("Unpacked section %d @%x size %x Vsize =%x \n", i, offs, tdst-startd, dsize - (startd - dest)); */
+ offs+=tdst-startd;
+ }
+
+ /* Sort out the sections */
+ while ( upd ) {
+ upd = 0;
+ for (i = 0; i < sectcount ; i++) {
+ uint32_t trva,trsz,traw;
+
+ if ( sections[i].rva <= sections[i+1].rva )
+ continue;
+ trva = sections[i].rva;
+ traw = sections[i].raw;
+ trsz = sections[i].rsz;
+ sections[i].rva = sections[i+1].rva;
+ sections[i].rsz = sections[i+1].rsz;
+ sections[i].raw = sections[i+1].raw;
+ sections[i+1].rva = trva;
+ sections[i+1].raw = traw;
+ sections[i+1].rsz = trsz;
+ upd = 1;
+ }
+ }
+
+ /* Cure Vsizes and debugspam */
+ for (i = 0; i <= sectcount ; i++) {
+ if ( i != sectcount ) {
+ sections[i].vsz = sections[i+1].rva - sections[i].rva;
+ lastsz-= sections[i+1].rva - sections[i].rva;
+ }
+ else
+ sections[i].vsz = lastsz;
+
+ cli_dbgmsg("FSG: .SECT%d RVA:%x VSize:%x ROffset: %x, RSize:%x\n", i, sections[i].rva, sections[i].vsz, sections[i].raw, sections[i].rsz);
+ }
+
+ if (!cli_rebuildpe(dest, sections, sectcount+1, base, ep, 0, 0, file)) {
+ cli_dbgmsg("FSG: Rebuilding failed\n");
+ return 0;
+ }
+ return 1;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_hashtab.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,413 @@
+/*
+ * HTML Entity & Encoding normalization.
+ *
+ * Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+#include <clamav-config.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "cltypes.h"
+#include "clamav.h"
+#include "others.h"
+#include "hashtab.h"
+
+
+static const size_t prime_list[] =
+{
+ 53ul, 97ul, 193ul, 389ul, 769ul,
+ 1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
+ 49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
+ 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
+ 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
+ 1610612741ul, 3221225473ul
+};
+
+
+static const size_t prime_n = sizeof(prime_list)/sizeof(prime_list[0]);
+
+static unsigned char DELETED_KEY[] = "";
+
+static size_t get_nearest_capacity(const size_t capacity)
+{
+ size_t i;
+ for(i=0 ;i < prime_n; i++) {
+ if (prime_list[i] > capacity)
+ return prime_list[i];
+ }
+ cli_errmsg("Requested hashtable size is too big!");
+ return prime_list[prime_n-1];
+}
+
+#ifdef PROFILE_HASHTABLE
+/* I know, this is ugly, most of these functions get a const s, that gets its const-ness discarded,
+ * and then these functions modify something the compiler assumes is readonly.
+ * Please, never use PROFILE_HASHTABLE in production code, and in releases. Use it for development only!*/
+
+static inline void PROFILE_INIT(struct hashtable *s)
+{
+ memset(&s->PROFILE_STRUCT,0,sizeof(s->PROFILE_STRUCT));
+}
+
+static inline void PROFILE_CALC_HASH(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.calc_hash++;
+}
+
+static inline void PROFILE_FIND_ELEMENT(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.find_req++;
+}
+
+static inline void PROFILE_FIND_NOTFOUND(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.not_found++;
+ s->PROFILE_STRUCT.not_found_tries += tries;
+}
+
+static inline void PROFILE_FIND_FOUND(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.found++;
+ s->PROFILE_STRUCT.found_tries += tries;
+}
+
+static inline void PROFILE_HASH_EXHAUSTED(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.hash_exhausted++;
+}
+
+static inline void PROFILE_GROW_START(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.grow++;
+}
+
+static inline void PROFILE_GROW_FOUND(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.grow_found++;
+ s->PROFILE_STRUCT.grow_found_tries += tries;
+}
+
+static inline void PROFILE_GROW_DONE(struct hashtable *s)
+{
+}
+
+static inline void PROFILE_DELETED_REUSE(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.deleted_reuse++;
+ s->PROFILE_STRUCT.deleted_tries += tries;
+}
+
+static inline void PROFILE_INSERT(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.inserts++;
+ s->PROFILE_STRUCT.insert_tries += tries;
+}
+
+static inline void PROFILE_DATA_UPDATE(struct hashtable *s, size_t tries)
+{
+ s->PROFILE_STRUCT.update++;
+ s->PROFILE_STRUCT.update_tries += tries;
+}
+
+static inline void PROFILE_HASH_DELETE(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.deletes++;
+}
+
+static inline void PROFILE_HASH_CLEAR(struct hashtable *s)
+{
+ s->PROFILE_STRUCT.clear++;
+}
+
+static inline void PROFILE_REPORT(const struct hashtable *s)
+{
+ size_t lookups, queries, insert_tries, inserts;
+ cli_dbgmsg("--------Hashtable usage report for %p--------------\n",(const void*)s);
+ cli_dbgmsg("hash function calculations:%ld\n",s->PROFILE_STRUCT.calc_hash);
+ cli_dbgmsg("successfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.found, s->PROFILE_STRUCT.find_req, s->PROFILE_STRUCT.found_tries);
+ cli_dbgmsg("unsuccessfull finds/total searches: %ld/%ld; lookups: %ld\n", s->PROFILE_STRUCT.not_found, s->PROFILE_STRUCT.find_req , s->PROFILE_STRUCT.not_found_tries);
+ cli_dbgmsg("successfull finds during grow:%ld; lookups: %ld\n",s->PROFILE_STRUCT.grow_found, s->PROFILE_STRUCT.grow_found_tries);
+ lookups = s->PROFILE_STRUCT.found_tries + s->PROFILE_STRUCT.not_found_tries + s->PROFILE_STRUCT.grow_found_tries;
+ queries = s->PROFILE_STRUCT.find_req + s->PROFILE_STRUCT.grow_found;
+ cli_dbgmsg("Find Lookups/total queries: %ld/%ld = %3f\n", lookups, queries, lookups*1.0/queries);
+ insert_tries = s->PROFILE_STRUCT.insert_tries + s->PROFILE_STRUCT.update_tries + s->PROFILE_STRUCT.deleted_tries;
+
+ cli_dbgmsg("new item insert tries/new items: %ld/%ld\n", s->PROFILE_STRUCT.insert_tries, s->PROFILE_STRUCT.inserts);
+ cli_dbgmsg("update tries/updates: %ld/%ld\n", s->PROFILE_STRUCT.update_tries, s->PROFILE_STRUCT.update);
+ cli_dbgmsg("deleted item reuse tries/deleted&reused items: %ld/%ld\n", s->PROFILE_STRUCT.deleted_tries, s->PROFILE_STRUCT.deleted_reuse);
+ inserts = s->PROFILE_STRUCT.inserts + s->PROFILE_STRUCT.update + s->PROFILE_STRUCT.deleted_reuse;
+ cli_dbgmsg("Insert tries/total inserts: %ld/%ld = %3f\n", insert_tries, inserts, insert_tries*1.0/inserts);
+
+ cli_dbgmsg("Grows: %ld, Deletes : %ld, hashtable clears: %ld\n",s->PROFILE_STRUCT.grow,s->PROFILE_STRUCT.deletes, s->PROFILE_STRUCT.clear);
+ cli_dbgmsg("--------Report end-------------\n");
+}
+
+#else
+#define PROFILE_INIT(s)
+#define PROFILE_CALC_HASH(s)
+#define PROFILE_FIND_ELEMENT(s)
+#define PROFILE_FIND_NOTFOUND(s, tries)
+#define PROFILE_FIND_FOUND(s, tries)
+#define PROFILE_HASH_EXHAUSTED(s)
+#define PROFILE_GROW_START(s)
+#define PROFILE_GROW_FOUND(s, tries)
+#define PROFILE_GROW_DONE(s)
+#define PROFILE_DELETED_REUSE(s, tries)
+#define PROFILE_INSERT(s, tries)
+#define PROFILE_DATA_UPDATE(s, tries)
+#define PROFILE_HASH_DELETE(s)
+#define PROFILE_HASH_CLEAR(s)
+#define PROFILE_REPORT(s)
+#endif
+
+int hashtab_init(struct hashtable *s,size_t capacity)
+{
+ if(!s)
+ return CL_ENULLARG;
+
+ PROFILE_INIT(s);
+
+ capacity = get_nearest_capacity(capacity);
+ s->htable = cli_calloc(capacity,sizeof(*s->htable));
+ if(!s->htable)
+ return CL_EMEM;
+ s->capacity = capacity;
+ s->used = 0;
+ s->maxfill = 8*capacity/10;
+ return 0;
+}
+
+static size_t hash(const unsigned char* k,const size_t len,const size_t SIZE)
+{
+ size_t Hash = 0;
+ size_t i;
+ for(i=len;i>0;i--)
+ Hash = ((Hash << 8) + k[i-1]) % SIZE;
+ return Hash;
+}
+
+/* if returned element has key==NULL, then key was not found in table */
+struct element* hashtab_find(const struct hashtable *s,const unsigned char* key,const size_t len)
+{
+ struct element* element;
+ size_t tries = 1;
+ size_t idx;
+
+ if(!s)
+ return NULL;
+ PROFILE_CALC_HASH(s);
+ PROFILE_FIND_ELEMENT(s);
+ idx = hash(key, len, s->capacity);
+ element = &s->htable[idx];
+ do {
+ if(!element->key) {
+ PROFILE_FIND_NOTFOUND(s, tries);
+ return NULL; /* element not found, place is empty*/
+ }
+ else if(element->key != DELETED_KEY && strncmp((const char*)key,(const char*)element->key,len)==0) {
+ PROFILE_FIND_FOUND(s, tries);
+ return element;/* found */
+ }
+ else {
+ idx = (idx + tries++) % s->capacity;
+ element = &s->htable[idx];
+ }
+ } while (tries <= s->capacity);
+ PROFILE_HASH_EXHAUSTED(s);
+ return NULL; /* not found */
+}
+
+static int hashtab_grow(struct hashtable *s)
+{
+ const size_t new_capacity = get_nearest_capacity(s->capacity);
+ struct element* htable = cli_calloc(new_capacity, sizeof(*s->htable));
+ size_t i,idx, used = 0;
+ if(new_capacity == s->capacity || !htable)
+ return CL_EMEM;
+
+ PROFILE_GROW_START(s);
+ cli_dbgmsg("hashtab.c: Warning: growing open-addressing hashtables is slow. Either allocate more storage when initializing, or use other hashtable types!\n");
+ for(i=0; i < s->capacity;i++) {
+ if(s->htable[i].key && s->htable[i].key != DELETED_KEY) {
+ struct element* element;
+ size_t tries = 1;
+
+ PROFILE_CALC_HASH(s);
+ idx = hash(s->htable[i].key, strlen((const char*)s->htable[i].key), new_capacity);
+ element = &htable[idx];
+
+ while(element->key && tries <= new_capacity) {
+ idx = (idx + tries++) % new_capacity;
+ element = &htable[idx];
+ }
+ if(!element->key) {
+ /* copy element from old hashtable to new */
+ PROFILE_GROW_FOUND(s, tries);
+ *element = s->htable[i];
+ used++;
+ }
+ else {
+ cli_errmsg("hashtab.c: Impossible - unable to rehash table");
+ return CL_EMEM;/* this means we didn't find enough room for all elements in the new table, should never happen */
+ }
+ }
+ }
+ free(s->htable);
+ s->htable = htable;
+ s->used = used;
+ s->capacity = new_capacity;
+ s->maxfill = new_capacity*8/10;
+ cli_dbgmsg("Table %p size after grow:%ld\n",(void*)s,s->capacity);
+ PROFILE_GROW_DONE(s);
+ return CL_SUCCESS;
+}
+
+
+int hashtab_insert(struct hashtable *s,const unsigned char* key,const size_t len,const element_data data)
+{
+ struct element* element;
+ struct element* deleted_element = NULL;
+ size_t tries = 1;
+ size_t idx;
+ if(!s)
+ return CL_ENULLARG;
+ do {
+ PROFILE_CALC_HASH(s);
+ idx = hash(key, len, s->capacity);
+ element = &s->htable[idx];
+
+ do {
+ if(!element->key) {
+ unsigned char* thekey;
+ /* element not found, place is empty, insert*/
+ if(deleted_element) {
+ /* reuse deleted elements*/
+ element = deleted_element;
+ PROFILE_DELETED_REUSE(s, tries);
+ }
+ else {
+ PROFILE_INSERT(s, tries);
+ }
+ thekey = cli_malloc(len+1);
+ if(!thekey)
+ return CL_EMEM;
+ strncpy((char*)thekey,(const char*)key,len+1);
+ element->key = thekey;
+ element->data = data;
+ s->used++;
+ if(s->used > s->maxfill) {
+ cli_dbgmsg("hashtab.c:Growing hashtable %p, because it has exceeded maxfill, old size:%ld\n",(void*)s,s->capacity);
+ hashtab_grow(s);
+ }
+ return 0;
+ }
+ else if(element->key == DELETED_KEY) {
+ deleted_element = element;
+ }
+ else if(strncmp((const char*)key,(const char*)element->key,len)==0) {
+ PROFILE_DATA_UPDATE(s, tries);
+ element->data = data;/* key found, update */
+ return 0;
+ }
+ else {
+ idx = (idx + tries++) % s->capacity;
+ element = &s->htable[idx];
+ }
+ } while (tries <= s->capacity);
+ /* no free place found*/
+ PROFILE_HASH_EXHAUSTED(s);
+ cli_dbgmsg("hashtab.c: Growing hashtable %p, because its full, old size:%ld.\n",(void*)s,s->capacity);
+ } while( hashtab_grow(s) >= 0 );
+ cli_warnmsg("hashtab.c: Unable to grow hashtable\n");
+ return CL_EMEM;
+}
+
+void hashtab_delete(struct hashtable *s,const unsigned char* key,const size_t len)
+{
+ struct element* e = hashtab_find(s,key,len);
+ if(e && e->key) {
+ PROFILE_HASH_DELETE(s);
+ free(e->key);/*FIXME: any way to shut up warnings here? if I make key char*, I get tons of warnings in entitylist.h */
+ e->key = DELETED_KEY;
+ s->used--;
+ }
+}
+
+void hashtab_clear(struct hashtable *s)
+{
+ size_t i;
+ PROFILE_HASH_CLEAR(s);
+ for(i=0;i < s->capacity;i++) {
+ if(s->htable[i].key && s->htable[i].key != DELETED_KEY)
+ free(s->htable[i].key);/*FIXME: shut up warnings */
+ }
+ memset(s->htable, 0, s->capacity);
+ s->used = 0;
+}
+
+
+int hashtab_store(const struct hashtable *s,FILE* out)
+{
+ size_t i;
+ for(i=0; i < s->capacity; i++) {
+ const struct element* e = &s->htable[i];
+ if(e->key && e->key != DELETED_KEY) {
+ fprintf(out,"%ld %s\n",e->data,e->key);
+ }
+ }
+ return CL_SUCCESS;
+}
+
+int hashtab_generate_c(const struct hashtable *s,const char* name)
+{
+ size_t i;
+ printf("/* TODO: include GPL headers */\n");
+ printf("#include <hashtab.h>\n");
+ printf("static struct element %s_elements[] = {\n",name);
+ for(i=0; i < s->capacity; i++) {
+ const struct element* e = &s->htable[i];
+ if(!e->key)
+ printf("\t{NULL, 0},\n");
+ else if(e->key == DELETED_KEY)
+ printf("\t{DELETED_KEY,0},\n");
+ else
+ printf("\t{(const unsigned char*)\"%s\", %ld},\n", e->key, e->data);
+ }
+ printf("};\n");
+ printf("const struct hashtable %s = {\n",name);
+ printf("\t%s_elements, %ld, %ld, %ld", name, s->capacity, s->used, s->maxfill);
+ printf("\n};\n");
+
+ PROFILE_REPORT(s);
+ return 0;
+}
+
+int hashtab_load(FILE* in, struct hashtable *s)
+{
+ char line[1024];
+ while (fgets(line, sizeof(line), in)) {
+ unsigned char l[1024];
+ int val;
+ sscanf(line,"%d %1023s",&val,l);
+ hashtab_insert(s,l,strlen((const char*)l),val);
+ }
+ return CL_SUCCESS;
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_htmlnorm.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1711 @@
+/*
+ * Normalise HTML text.
+ * Decode MS Script Encoder protection.
+ *
+ * Copyright (C) 2004 trog at uncon.org
+ *
+ * The ScrEnc decoder was initially based upon an analysis by Andreas Marx.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include "others.h"
+#include "htmlnorm.h"
+
+typedef enum {
+ INVALIDCLASS, BLOBCLASS
+} object_type;
+#include "blob.h"
+
+#include "entconv.h"
+
+#define HTML_STR_LENGTH 1024
+#define MAX_TAG_CONTENTS_LENGTH HTML_STR_LENGTH
+
+typedef enum {
+ HTML_BAD_STATE,
+ HTML_NORM,
+ HTML_COMMENT,
+ HTML_CHAR_REF,
+ HTML_ENTITY_REF_DECODE,
+ HTML_SKIP_WS,
+ HTML_TRIM_WS,
+ HTML_TAG,
+ HTML_TAG_ARG,
+ HTML_TAG_ARG_VAL,
+ HTML_TAG_ARG_EQUAL,
+ HTML_PROCESS_TAG,
+ HTML_CHAR_REF_DECODE,
+ HTML_SKIP_LENGTH,
+ HTML_JSDECODE,
+ HTML_JSDECODE_LENGTH,
+ HTML_JSDECODE_DECRYPT,
+ HTML_SPECIAL_CHAR,
+ HTML_RFC2397_TYPE,
+ HTML_RFC2397_INIT,
+ HTML_RFC2397_DATA,
+ HTML_RFC2397_FINISH,
+ HTML_RFC2397_ESC,
+ HTML_ESCAPE_CHAR
+} html_state;
+
+typedef enum {
+ SINGLE_QUOTED,
+ DOUBLE_QUOTED,
+ NOT_QUOTED
+} quoted_state;
+
+
+#define HTML_FILE_BUFF_LEN 8192
+
+typedef struct file_buff_tag {
+ int fd;
+ unsigned char buffer[HTML_FILE_BUFF_LEN];
+ int length;
+} file_buff_t;
+
+static const int base64_chars[256] = {
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
+ 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
+ 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
+ -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
+ 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+};
+
+int table_order[] = {
+ 00, 02, 01, 00, 02, 01, 02, 01, 01, 02, 01, 02, 00, 01, 02, 01,
+ 00, 01, 02, 01, 00, 00, 02, 01, 01, 02, 00, 01, 02, 01, 01, 02,
+ 00, 00, 01, 02, 01, 02, 01, 00, 01, 00, 00, 02, 01, 00, 01, 02,
+ 00, 01, 02, 01, 00, 00, 02, 01, 01, 00, 00, 02, 01, 00, 01, 02
+};
+
+int decrypt_tables[3][128] = {
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x57, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x2E, 0x47, 0x7A, 0x56, 0x42, 0x6A, 0x2F, 0x26, 0x49, 0x41, 0x34, 0x32, 0x5B, 0x76, 0x72, 0x43,
+ 0x38, 0x39, 0x70, 0x45, 0x68, 0x71, 0x4F, 0x09, 0x62, 0x44, 0x23, 0x75, 0x3C, 0x7E, 0x3E, 0x5E,
+ 0xFF, 0x77, 0x4A, 0x61, 0x5D, 0x22, 0x4B, 0x6F, 0x4E, 0x3B, 0x4C, 0x50, 0x67, 0x2A, 0x7D, 0x74,
+ 0x54, 0x2B, 0x2D, 0x2C, 0x30, 0x6E, 0x6B, 0x66, 0x35, 0x25, 0x21, 0x64, 0x4D, 0x52, 0x63, 0x3F,
+ 0x7B, 0x78, 0x29, 0x28, 0x73, 0x59, 0x33, 0x7F, 0x6D, 0x55, 0x53, 0x7C, 0x3A, 0x5F, 0x65, 0x46,
+ 0x58, 0x31, 0x69, 0x6C, 0x5A, 0x48, 0x27, 0x5C, 0x3D, 0x24, 0x79, 0x37, 0x60, 0x51, 0x20, 0x36},
+
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x7B, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x32, 0x30, 0x21, 0x29, 0x5B, 0x38, 0x33, 0x3D, 0x58, 0x3A, 0x35, 0x65, 0x39, 0x5C, 0x56, 0x73,
+ 0x66, 0x4E, 0x45, 0x6B, 0x62, 0x59, 0x78, 0x5E, 0x7D, 0x4A, 0x6D, 0x71, 0x3C, 0x60, 0x3E, 0x53,
+ 0xFF, 0x42, 0x27, 0x48, 0x72, 0x75, 0x31, 0x37, 0x4D, 0x52, 0x22, 0x54, 0x6A, 0x47, 0x64, 0x2D,
+ 0x20, 0x7F, 0x2E, 0x4C, 0x5D, 0x7E, 0x6C, 0x6F, 0x79, 0x74, 0x43, 0x26, 0x76, 0x25, 0x24, 0x2B,
+ 0x28, 0x23, 0x41, 0x34, 0x09, 0x2A, 0x44, 0x3F, 0x77, 0x3B, 0x55, 0x69, 0x61, 0x63, 0x50, 0x67,
+ 0x51, 0x49, 0x4F, 0x46, 0x68, 0x7C, 0x36, 0x70, 0x6E, 0x7A, 0x2F, 0x5F, 0x4B, 0x5A, 0x2C, 0x57},
+
+ {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x6E, 0x0A, 0x0B, 0x0C, 0x06, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x2D, 0x75, 0x52, 0x60, 0x71, 0x5E, 0x49, 0x5C, 0x62, 0x7D, 0x29, 0x36, 0x20, 0x7C, 0x7A, 0x7F,
+ 0x6B, 0x63, 0x33, 0x2B, 0x68, 0x51, 0x66, 0x76, 0x31, 0x64, 0x54, 0x43, 0x3C, 0x3A, 0x3E, 0x7E,
+ 0xFF, 0x45, 0x2C, 0x2A, 0x74, 0x27, 0x37, 0x44, 0x79, 0x59, 0x2F, 0x6F, 0x26, 0x72, 0x6A, 0x39,
+ 0x7B, 0x3F, 0x38, 0x77, 0x67, 0x53, 0x47, 0x34, 0x78, 0x5D, 0x30, 0x23, 0x5A, 0x5B, 0x6C, 0x48,
+ 0x55, 0x70, 0x69, 0x2E, 0x4C, 0x21, 0x24, 0x4E, 0x50, 0x09, 0x56, 0x73, 0x35, 0x61, 0x4B, 0x58,
+ 0x3B, 0x57, 0x22, 0x6D, 0x4D, 0x25, 0x28, 0x46, 0x4A, 0x32, 0x41, 0x3D, 0x5F, 0x4F, 0x42, 0x65}
+};
+
+unsigned char *cli_readline(FILE *stream, m_area_t *m_area, unsigned int max_len)
+{
+ unsigned char *line, *ptr, *start, *end;
+ unsigned int line_len, count;
+
+ line = (unsigned char *) cli_malloc(max_len);
+ if (!line) {
+ return NULL;
+ }
+
+ /* Try and use the memory buffer first */
+ if (m_area) {
+ start = ptr = m_area->buffer + m_area->offset;
+ end = m_area->buffer + m_area->length;
+ if (start >= end) {
+ free(line);
+ return NULL;
+ }
+ line_len = 1;
+ while ((ptr < end) && (*ptr != '\n') && (line_len < (max_len-1))) {
+ ptr++;
+ line_len++;
+ }
+ if (ptr == end) {
+ line_len--;
+ memcpy(line, start, line_len);
+ line[line_len] = '\0';
+ } else if (*ptr == '\n') {
+ memcpy(line, start, line_len);
+ line[line_len] = '\0';
+ } else {
+ /* Hit max_len */
+ /* Store the current line end and length*/
+ count = line_len;
+ while (!isspace(*ptr) && (line_len > 1)) {
+ ptr--;
+ line_len--;
+ }
+ if (line_len == 1) {
+ line_len=count;
+ }
+ memcpy(line, start, line_len);
+ line[line_len] = '\0';
+ }
+ m_area->offset += line_len;
+ } else {
+ if (!stream) {
+ cli_dbgmsg("No HTML stream\n");
+ free(line);
+ return NULL;
+ }
+ if (fgets(line, max_len, stream) == NULL) {
+ free(line);
+ return NULL;
+ }
+
+ line_len=strlen(line);
+ if (line_len == 0) {
+ free(line);
+ return NULL;
+ }
+ if (line_len == max_len-1) {
+ /* didn't find a whole line - rewind to a space*/
+ count = 0;
+ while (!isspace(line[--line_len])) {
+ count--;
+ if (line_len == 0) {
+ return line;
+ }
+ }
+ fseek(stream, count, SEEK_CUR);
+ line[line_len+1] = '\0';
+ }
+ }
+ return line;
+}
+
+static void html_output_flush(file_buff_t *fbuff)
+{
+ if (fbuff && (fbuff->length > 0)) {
+ cli_writen(fbuff->fd, fbuff->buffer, fbuff->length);
+ fbuff->length = 0;
+ }
+}
+
+static void html_output_c(file_buff_t *fbuff1, file_buff_t *fbuff2, unsigned char c)
+{
+ if (fbuff1) {
+ if (fbuff1->length == HTML_FILE_BUFF_LEN) {
+ html_output_flush(fbuff1);
+ }
+ fbuff1->buffer[fbuff1->length++] = c;
+ }
+ if (fbuff2) {
+ if (fbuff2->length == HTML_FILE_BUFF_LEN) {
+ html_output_flush(fbuff2);
+ }
+ fbuff2->buffer[fbuff2->length++] = c;
+ }
+}
+
+static void html_output_str(file_buff_t *fbuff, const unsigned char *str, int len)
+{
+ if (fbuff) {
+ if ((fbuff->length + len) >= HTML_FILE_BUFF_LEN) {
+ html_output_flush(fbuff);
+ }
+ if (len >= HTML_FILE_BUFF_LEN) {
+ html_output_flush(fbuff);
+ cli_writen(fbuff->fd, str, len);
+ } else {
+ memcpy(fbuff->buffer + fbuff->length, str, len);
+ fbuff->length += len;
+ }
+ }
+}
+
+static char *html_tag_arg_value(tag_arguments_t *tags, const char *tag)
+{
+ int i;
+
+ for (i=0; i < tags->count; i++) {
+ if (strcmp(tags->tag[i], tag) == 0) {
+ return tags->value[i];
+ }
+ }
+ return NULL;
+}
+
+static void html_tag_arg_set(tag_arguments_t *tags, const char *tag, const char *value)
+{
+ int i;
+
+ for (i=0; i < tags->count; i++) {
+ if (strcmp(tags->tag[i], tag) == 0) {
+ free(tags->value[i]);
+ tags->value[i] = cli_strdup(value);
+ return;
+ }
+ }
+ return;
+}
+static void html_tag_arg_add(tag_arguments_t *tags,
+ const unsigned char *tag, unsigned char *value)
+{
+ int len, i;
+ tags->count++;
+ tags->tag = (unsigned char **) cli_realloc2(tags->tag,
+ tags->count * sizeof(char *));
+ if (!tags->tag) {
+ goto abort;
+ }
+ tags->value = (unsigned char **) cli_realloc2(tags->value,
+ tags->count * sizeof(char *));
+ if (!tags->value) {
+ goto abort;
+ }
+ if(tags->scanContents) {
+ tags->contents= (blob **) cli_realloc2(tags->contents,
+ tags->count*sizeof(*tags->contents));
+ if(!tags->contents) {
+ goto abort;
+ }
+ tags->contents[tags->count-1]=NULL;
+ }
+ tags->tag[tags->count-1] = cli_strdup(tag);
+ if (value) {
+ if (*value == '"') {
+ tags->value[tags->count-1] = cli_strdup(value+1);
+ len = strlen(value+1);
+ if (len > 0) {
+ tags->value[tags->count-1][len-1] = '\0';
+ }
+ } else {
+ tags->value[tags->count-1] = cli_strdup(value);
+ }
+ } else {
+ tags->value[tags->count-1] = NULL;
+ }
+ return;
+
+abort:
+ /* Bad error - can't do 100% recovery */
+ tags->count--;
+ for (i=0; i < tags->count; i++) {
+ if (tags->tag) {
+ free(tags->tag[i]);
+ }
+ if (tags->value) {
+ free(tags->value[i]);
+ }
+ if(tags->contents) {
+ if(tags->contents[i])
+ blobDestroy(tags->contents[i]);
+ }
+ }
+ if (tags->tag) {
+ free(tags->tag);
+ }
+ if (tags->value) {
+ free(tags->value);
+ }
+ if (tags->contents)
+ free(tags->contents);
+ tags->contents=NULL;
+ tags->tag = tags->value = NULL;
+ tags->count = 0;
+ return;
+}
+
+static void html_output_tag(file_buff_t *fbuff, char *tag, tag_arguments_t *tags)
+{
+ int i, j, len;
+
+ html_output_c(fbuff, NULL, '<');
+ html_output_str(fbuff, tag, strlen(tag));
+ for (i=0; i < tags->count; i++) {
+ html_output_c(fbuff, NULL, ' ');
+ html_output_str(fbuff, tags->tag[i], strlen(tags->tag[i]));
+ if (tags->value[i]) {
+ html_output_str(fbuff, "=\"", 2);
+ len = strlen(tags->value[i]);
+ for (j=0 ; j<len ; j++) {
+ html_output_c(fbuff, NULL, tolower(tags->value[i][j]));
+ }
+ html_output_c(fbuff, NULL, '"');
+ }
+ }
+ html_output_c(fbuff, NULL, '>');
+}
+
+void html_tag_arg_free(tag_arguments_t *tags)
+{
+ int i;
+
+ for (i=0; i < tags->count; i++) {
+ free(tags->tag[i]);
+ if (tags->value[i]) {
+ free(tags->value[i]);
+ }
+ if(tags->contents)
+ if (tags->contents[i])
+ blobDestroy(tags->contents[i]);
+ }
+ if (tags->tag) {
+ free(tags->tag);
+ }
+ if (tags->value) {
+ free(tags->value);
+ }
+ if(tags->contents)
+ free(tags->contents);
+ tags->contents = NULL;
+ tags->tag = tags->value = NULL;
+ tags->count = 0;
+}
+
+/**
+ * this is used for img, and iframe tags. If they are inside an <a href> tag, then set the contents of the image|iframe to the real URL.
+ */
+static inline void html_tag_set_inahref(tag_arguments_t *tags,int idx,int in_ahref)
+{
+ tags->contents[idx-1]=blobCreate();
+ blobAddData(tags->contents[idx-1],tags->value[in_ahref-1],strlen(tags->value[in_ahref-1]));
+ blobAddData(tags->contents[idx-1], "",1);
+ blobClose(tags->contents[idx-1]);
+}
+
+/**
+ * the displayed text for an <a href> tag
+ */
+static inline void html_tag_contents_append(tag_arguments_t *tags,int idx,const unsigned char* begin,const unsigned char *end)
+{
+ if(end && (begin<end)) {
+ const size_t blob_len = blobGetDataSize(tags->contents[idx-1]);
+ const size_t blob_sizeleft = blob_len <= MAX_TAG_CONTENTS_LENGTH ? (MAX_TAG_CONTENTS_LENGTH - blob_len) : 0;
+ const size_t str_len = end - begin;
+ if(blob_sizeleft)
+ blobAddData(tags->contents[idx-1],begin, blob_sizeleft < str_len ? blob_sizeleft : str_len );
+ }
+}
+
+
+static inline void html_tag_contents_done(tag_arguments_t *tags,int idx)
+{
+ /* append NUL byte */
+ blobAddData(tags->contents[idx-1], "", 1);
+ blobClose(tags->contents[idx-1]);
+}
+
+static int cli_html_normalise(int fd, m_area_t *m_area, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+ int fd_tmp, tag_length, tag_arg_length, binary;
+ int retval=FALSE, escape, value = 0, hex, tag_val_length=0, table_pos, in_script=FALSE;
+ FILE *stream_in = NULL;
+ html_state state=HTML_NORM, next_state=HTML_BAD_STATE;
+ char filename[1024], tag[HTML_STR_LENGTH+1], tag_arg[HTML_STR_LENGTH+1];
+ char tag_val[HTML_STR_LENGTH+1], *tmp_file;
+ unsigned char *line, *ptr, *arg_value;
+ tag_arguments_t tag_args;
+ quoted_state quoted;
+ unsigned long length;
+ file_buff_t *file_buff_o1, *file_buff_o2, *file_buff_script;
+ file_buff_t *file_tmp_o1;
+ int in_ahref=0;/* index of <a> tag, whose contents we are parsing. Indexing starts from 1, 0 means outside of <a>*/
+ unsigned char* href_contents_begin=NULL;/*beginning of the next portion of <a> contents*/
+ unsigned char* ptrend=NULL;/*end of <a> contents*/
+ unsigned char* in_form_action = NULL;/* the action URL of the current <form> tag, if any*/
+
+ struct entity_conv conv;
+ int rc;
+ unsigned char entity_val[HTML_STR_LENGTH+1];
+ size_t entity_val_length = 0;
+ const int dconf_entconv = dconf && dconf->phishing&PHISHING_CONF_ENTCONV;
+ /* dconf for phishing engine sets scanContents, so no need for a flag here */
+
+
+ tag_args.scanContents=0;/* do we need to store the contents of <a></a>?*/
+ if (!m_area) {
+ if (fd < 0) {
+ cli_dbgmsg("Invalid HTML fd\n");
+ return FALSE;
+ }
+ lseek(fd, 0, SEEK_SET);
+ fd_tmp = dup(fd);
+ if (fd_tmp < 0) {
+ return FALSE;
+ }
+ stream_in = fdopen(fd_tmp, "r");
+ if (!stream_in) {
+ close(fd_tmp);
+ return FALSE;
+ }
+ }
+
+ if(dconf_entconv && (rc = init_entity_converter(&conv, UNKNOWN, 16384) )) {
+ if (!m_area) {
+ fclose(stream_in);
+ }
+ return rc;
+ }
+
+ tag_args.count = 0;
+ tag_args.tag = NULL;
+ tag_args.value = NULL;
+ tag_args.contents = NULL;
+
+ if (dirname) {
+ snprintf(filename, 1024, "%s/rfc2397", dirname);
+ if (mkdir(filename, 0700) && errno != EEXIST) {
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+ file_buff_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+ if (!file_buff_o1) {
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ file_buff_o2 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+ if (!file_buff_o2) {
+ free(file_buff_o1);
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ file_buff_script = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+ if (!file_buff_script) {
+ free(file_buff_o1);
+ free(file_buff_o2);
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ snprintf(filename, 1024, "%s/comment.html", dirname);
+ file_buff_o1->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+ if (!file_buff_o1->fd) {
+ cli_dbgmsg("open failed: %s\n", filename);
+ free(file_buff_o1);
+ free(file_buff_o2);
+ free(file_buff_script);
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ snprintf(filename, 1024, "%s/nocomment.html", dirname);
+ file_buff_o2->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+ if (!file_buff_o2->fd) {
+ cli_dbgmsg("open failed: %s\n", filename);
+ close(file_buff_o1->fd);
+ free(file_buff_o1);
+ free(file_buff_o2);
+ free(file_buff_script);
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ snprintf(filename, 1024, "%s/script.html", dirname);
+ file_buff_script->fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+ if (!file_buff_script->fd) {
+ cli_dbgmsg("open failed: %s\n", filename);
+ close(file_buff_o1->fd);
+ close(file_buff_o2->fd);
+ free(file_buff_o1);
+ free(file_buff_o2);
+ free(file_buff_script);
+ file_buff_o1 = file_buff_o2 = file_buff_script = NULL;
+ goto abort;
+ }
+
+ file_buff_o1->length = 0;
+ file_buff_o2->length = 0;
+ file_buff_script->length = 0;
+ } else {
+ file_buff_o1 = NULL;
+ file_buff_o2 = NULL;
+ file_buff_script = NULL;
+ }
+
+ binary = FALSE;
+
+ if(dconf_entconv)
+ ptr = line = encoding_norm_readline(&conv, stream_in, m_area, 8192);
+ else
+ ptr = line = cli_readline(stream_in, m_area, 8192);
+
+ while (line) {
+ if(href_contents_begin)
+ href_contents_begin=ptr;/*start of a new line, last line already appended to contents see below*/
+ while (*ptr && isspace(*ptr)) {
+ ptr++;
+ }
+ while (*ptr) {
+ if (!binary && *ptr == '\n') {
+ /* Convert it to a space and re-process */
+ *ptr = ' ';
+ continue;
+ }
+ if (!binary && *ptr == '\r') {
+ ptr++;
+ continue;
+ }
+ switch (state) {
+ case HTML_SPECIAL_CHAR:
+ cli_dbgmsg("Impossible, special_char can't occur here\n");
+ break;
+ case HTML_BAD_STATE:
+ /* An engine error has occurred */
+ cli_dbgmsg("HTML Engine Error\n");
+ goto abort;
+ case HTML_SKIP_LENGTH:
+ length--;
+ ptr++;
+ if (!length) {
+ state = next_state;
+ }
+ break;
+ case HTML_SKIP_WS:
+ if (isspace(*ptr)) {
+ ptr++;
+ } else {
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ }
+ break;
+ case HTML_TRIM_WS:
+ if (isspace(*ptr)) {
+ ptr++;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, ' ');
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ }
+ break;
+ case HTML_NORM:
+ if (*ptr == '<') {
+ ptrend=ptr; /* for use by scanContents */
+ html_output_c(file_buff_o1, file_buff_o2, '<');
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, '<');
+ }
+ if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin) {
+ /*append this text portion to the contents of <a>*/
+ html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
+ href_contents_begin=NULL;/*We just encountered another tag inside <a>, so skip it*/
+ }
+ ptr++;
+ state = HTML_SKIP_WS;
+ tag_length=0;
+ next_state = HTML_TAG;
+ } else if (isspace(*ptr)) {
+ state = HTML_TRIM_WS;
+ next_state = HTML_NORM;
+ } else if (*ptr == '&') {
+ state = HTML_CHAR_REF;
+ next_state = HTML_NORM;
+ ptr++;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, tolower(*ptr));
+ }
+ ptr++;
+ }
+ break;
+ case HTML_TAG:
+ if ((tag_length == 0) && (*ptr == '!')) {
+ /* Comment */
+ html_output_c(file_buff_o1, NULL, '!');
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, '!');
+ }
+ /* Need to rewind in the no-comment output stream */
+ if (file_buff_o2 && (file_buff_o2->length > 0)) {
+ file_buff_o2->length--;
+ }
+ state = HTML_COMMENT;
+ next_state = HTML_BAD_STATE;
+ ptr++;
+ } else if (*ptr == '>') {
+ html_output_c(file_buff_o1, file_buff_o2, '>');
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, '>');
+ }
+ ptr++;
+ tag[tag_length] = '\0';
+ state = HTML_SKIP_WS;
+ next_state = HTML_PROCESS_TAG;
+ } else if (!isspace(*ptr)) {
+ html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, tolower(*ptr));
+ }
+ if (tag_length < HTML_STR_LENGTH) {
+ tag[tag_length++] = tolower(*ptr);
+ }
+ ptr++;
+ } else {
+ tag[tag_length] = '\0';
+ state = HTML_SKIP_WS;
+ tag_arg_length = 0;
+ next_state = HTML_TAG_ARG;
+ }
+ break;
+ case HTML_TAG_ARG:
+ if (*ptr == '=') {
+ html_output_c(file_buff_o1, file_buff_o2, '=');
+ tag_arg[tag_arg_length] = '\0';
+ ptr++;
+ state = HTML_SKIP_WS;
+ escape = FALSE;
+ quoted = NOT_QUOTED;
+ tag_val_length = 0;
+ next_state = HTML_TAG_ARG_VAL;
+ } else if (isspace(*ptr)) {
+ ptr++;
+ tag_arg[tag_arg_length] = '\0';
+ state = HTML_SKIP_WS;
+ next_state = HTML_TAG_ARG_EQUAL;
+ } else if (*ptr == '>') {
+ html_output_c(file_buff_o1, file_buff_o2, '>');
+ if (tag_arg_length > 0) {
+ tag_arg[tag_arg_length] = '\0';
+ html_tag_arg_add(&tag_args, tag_arg, NULL);
+ }
+ ptr++;
+ state = HTML_PROCESS_TAG;
+ next_state = HTML_BAD_STATE;
+ } else {
+ if (tag_arg_length == 0) {
+ /* Start of new tag - add space */
+ html_output_c(file_buff_o1, file_buff_o2,' ');
+ }
+ html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+ if (tag_arg_length < HTML_STR_LENGTH) {
+ tag_arg[tag_arg_length++] = tolower(*ptr);
+ }
+ ptr++;
+ }
+ break;
+ case HTML_TAG_ARG_EQUAL:
+ if (*ptr == '=') {
+ html_output_c(file_buff_o1, file_buff_o2, '=');
+ ptr++;
+ state = HTML_SKIP_WS;
+ escape = FALSE;
+ quoted = NOT_QUOTED;
+ tag_val_length = 0;
+ next_state = HTML_TAG_ARG_VAL;
+ } else {
+ if (tag_arg_length > 0) {
+ tag_arg[tag_arg_length] = '\0';
+ html_tag_arg_add(&tag_args, tag_arg, NULL);
+ }
+ tag_arg_length=0;
+ state = HTML_TAG_ARG;
+ next_state = HTML_BAD_STATE;
+ }
+ break;
+ case HTML_TAG_ARG_VAL:
+ if ((tag_val_length == 5) && (strncmp(tag_val, "data:", 5) == 0)) {
+ /* RFC2397 inline data */
+
+ /* Rewind one byte so we don't recursuive */
+ if (file_buff_o1 && (file_buff_o1->length > 0)) {
+ file_buff_o1->length--;
+ }
+ if (file_buff_o2 && (file_buff_o2->length > 0)) {
+ file_buff_o2->length--;
+ }
+
+ if (quoted != NOT_QUOTED) {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ }
+ tag_val_length = 0;
+ state = HTML_RFC2397_TYPE;
+ next_state = HTML_TAG_ARG;
+ } else if ((tag_val_length == 6) && (strncmp(tag_val, "\"data:", 6) == 0)) {
+ /* RFC2397 inline data */
+
+ /* Rewind one byte so we don't recursuive */
+ if (file_buff_o1 && (file_buff_o1->length > 0)) {
+ file_buff_o1->length--;
+ }
+ if (file_buff_o2 && (file_buff_o2->length > 0)) {
+ file_buff_o2->length--;
+ }
+
+ if (quoted != NOT_QUOTED) {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ }
+
+ tag_val_length = 0;
+ state = HTML_RFC2397_TYPE;
+ next_state = HTML_TAG_ARG;
+ } else if (*ptr == '&') {
+ state = HTML_CHAR_REF;
+ next_state = HTML_TAG_ARG_VAL;
+ ptr++;
+ } else if (*ptr == '\'') {
+ if (tag_val_length == 0) {
+ quoted = SINGLE_QUOTED;
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ } else {
+ if (!escape && (quoted==SINGLE_QUOTED)) {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ tag_val[tag_val_length] = '\0';
+ html_tag_arg_add(&tag_args, tag_arg, tag_val);
+ ptr++;
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ }
+ }
+ } else if (*ptr == '"') {
+ if (tag_val_length == 0) {
+ quoted = DOUBLE_QUOTED;
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ } else {
+ if (!escape && (quoted==DOUBLE_QUOTED)) {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ tag_val[tag_val_length] = '\0';
+ html_tag_arg_add(&tag_args, tag_arg, tag_val);
+ ptr++;
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, '"');
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ }
+ }
+ } else if (isspace(*ptr) || (*ptr == '>')) {
+ if (quoted == NOT_QUOTED) {
+ tag_val[tag_val_length] = '\0';
+ html_tag_arg_add(&tag_args, tag_arg, tag_val);
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, *ptr);
+ if (tag_val_length < HTML_STR_LENGTH) {
+ if (isspace(*ptr)) {
+ tag_val[tag_val_length++] = ' ';
+ } else {
+ tag_val[tag_val_length++] = '>';
+ }
+ }
+ state = HTML_SKIP_WS;
+ escape = FALSE;
+ quoted = NOT_QUOTED;
+ next_state = HTML_TAG_ARG_VAL;
+ ptr++;
+ }
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = *ptr;
+ }
+ ptr++;
+ }
+
+ if (*ptr == '\\') {
+ escape = TRUE;
+ } else {
+ escape = FALSE;
+ }
+ break;
+ case HTML_COMMENT:
+ html_output_c(file_buff_o1, NULL, tolower(*ptr));
+ if (in_script) {
+ html_output_c(file_buff_script, NULL, tolower(*ptr));
+ }
+ if (*ptr == '>') {
+ state = HTML_SKIP_WS;
+ next_state = HTML_NORM;
+ }
+ ptr++;
+ break;
+ case HTML_PROCESS_TAG:
+
+ /* Default to no action for this tag */
+ state = HTML_SKIP_WS;
+ next_state = HTML_NORM;
+ if (tag[0] == '/') {
+ /* End tag */
+ state = HTML_SKIP_WS;
+ next_state = HTML_NORM;
+ if (strcmp(tag, "/script") == 0) {
+ in_script=FALSE;
+ html_output_c(file_buff_script, NULL, '\n');
+ }
+ if (hrefs && hrefs->scanContents && in_ahref) {
+ if(strcmp(tag,"/a") == 0) {
+ html_tag_contents_done(hrefs,in_ahref);
+ in_ahref=0;/* we are no longer inside an <a href>
+ nesting <a> tags not supported, and shouldn't be supported*/
+ }
+ href_contents_begin=ptr;
+ }
+ if (strcmp(tag, "/form") == 0) {
+ if (in_form_action)
+ free(in_form_action);
+ in_form_action = NULL;
+ }
+ } else if (strcmp(tag, "script") == 0) {
+ arg_value = html_tag_arg_value(&tag_args, "language");
+ if (arg_value && (strcasecmp(arg_value, "jscript.encode") == 0)) {
+ html_tag_arg_set(&tag_args, "language", "javascript");
+ state = HTML_SKIP_WS;
+ next_state = HTML_JSDECODE;
+ } else if (arg_value && (strcasecmp(arg_value, "vbscript.encode") == 0)) {
+ html_tag_arg_set(&tag_args, "language", "vbscript");
+ state = HTML_SKIP_WS;
+ next_state = HTML_JSDECODE;
+ } else {
+ in_script = TRUE;
+ }
+ html_output_tag(file_buff_script, tag, &tag_args);
+ } else if (dconf_entconv && strcmp(tag, "meta") == 0) {
+ const unsigned char* http_equiv = html_tag_arg_value(&tag_args, "http-equiv");
+ const unsigned char* http_content = html_tag_arg_value(&tag_args, "content");
+ if(http_equiv && http_content && strcasecmp(http_equiv,"content-type") == 0) {
+ size_t len = strlen((const char*)http_content);
+ unsigned char* http_content2 = cli_malloc( len + 1);
+ unsigned char* charset;
+ size_t i;
+
+ if(!http_content2)
+ return CL_EMEM;
+ for(i = 0; i < len; i++)
+ http_content2[i] = tolower(http_content[i]);
+ http_content2[len] = '\0';
+ charset = (unsigned char*) strstr((char*)http_content2,"charset");
+ if(charset) {
+ while(*charset && *charset != '=')
+ charset++;
+ if(*charset)
+ charset++;/* skip = */
+ len = strcspn((const char*)charset," \"'");
+ charset[len] = '\0';
+ if(len) {
+ process_encoding_set(&conv, charset, META);
+ }
+ }
+ free(http_content2);
+ }
+ } else if (hrefs) {
+ if(in_ahref && !href_contents_begin)
+ href_contents_begin=ptr;
+ if (strcmp(tag, "a") == 0) {
+ arg_value = html_tag_arg_value(&tag_args, "href");
+ if (arg_value && strlen(arg_value) > 0) {
+ if (hrefs->scanContents) {
+ unsigned char* arg_value_title = html_tag_arg_value(&tag_args,"title");
+ /*beginning of an <a> tag*/
+ if (in_ahref)
+ /*we encountered nested <a> tags, pretend previous closed*/
+ if (href_contents_begin) {
+ html_tag_contents_append(hrefs,in_ahref,
+ href_contents_begin,ptrend);
+ /*add pending contents between tags*/
+ html_tag_contents_done(hrefs,in_ahref);
+ in_ahref=0;
+ }
+ if (arg_value_title) {
+ /* title is a 'displayed link'*/
+ html_tag_arg_add(hrefs,"href_title",arg_value_title);
+ hrefs->contents[hrefs->count-1]=blobCreate();
+ html_tag_contents_append(hrefs,hrefs->count,arg_value,
+ arg_value+strlen(arg_value));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ if (in_form_action) {
+ /* form action is the real URL, and href is the 'displayed' */
+ html_tag_arg_add(hrefs,"form",arg_value);
+ hrefs->contents[hrefs->count-1] = blobCreate();
+ html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+ in_form_action + strlen(in_form_action));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ }
+ html_tag_arg_add(hrefs, "href", arg_value);
+ if (hrefs->scanContents) {
+ in_ahref=hrefs->count; /* index of this tag (counted from 1) */
+ href_contents_begin=ptr;/* contents begin after <a ..> ends */
+ hrefs->contents[hrefs->count-1]=blobCreate();
+ }
+ }
+ } else if (strcmp(tag,"form") == 0 && hrefs->scanContents) {
+ const unsigned char* arg_action_value = html_tag_arg_value(&tag_args,"action");
+ if (arg_action_value) {
+ if(in_form_action)
+ free(in_form_action);
+ in_form_action = cli_strdup(arg_action_value);
+ }
+ } else if (strcmp(tag, "img") == 0) {
+ arg_value = html_tag_arg_value(&tag_args, "src");
+ if (arg_value && strlen(arg_value) > 0) {
+ html_tag_arg_add(hrefs, "src", arg_value);
+ if(hrefs->scanContents && in_ahref)
+ /* "contents" of an img tag, is the URL of its parent <a> tag */
+ html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+ if (in_form_action) {
+ /* form action is the real URL, and href is the 'displayed' */
+ html_tag_arg_add(hrefs,"form",arg_value);
+ hrefs->contents[hrefs->count-1] = blobCreate();
+ html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+ in_form_action + strlen(in_form_action));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ }
+ arg_value = html_tag_arg_value(&tag_args, "dynsrc");
+ if (arg_value && strlen(arg_value) > 0) {
+ html_tag_arg_add(hrefs, "dynsrc", arg_value);
+ if(hrefs->scanContents && in_ahref)
+ /* see above */
+ html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+ if (in_form_action) {
+ /* form action is the real URL, and href is the 'displayed' */
+ html_tag_arg_add(hrefs,"form",arg_value);
+ hrefs->contents[hrefs->count-1] = blobCreate();
+ html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+ in_form_action + strlen(in_form_action));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ }
+ } else if (strcmp(tag, "iframe") == 0) {
+ arg_value = html_tag_arg_value(&tag_args, "src");
+ if (arg_value && strlen(arg_value) > 0) {
+ html_tag_arg_add(hrefs, "iframe", arg_value);
+ if(hrefs->scanContents && in_ahref)
+ /* see above */
+ html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+ if (in_form_action) {
+ /* form action is the real URL, and href is the 'displayed' */
+ html_tag_arg_add(hrefs,"form",arg_value);
+ hrefs->contents[hrefs->count-1] = blobCreate();
+ html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+ in_form_action + strlen(in_form_action));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ }
+ } else if (strcmp(tag,"area") == 0) {
+ arg_value = html_tag_arg_value(&tag_args,"href");
+ if (arg_value && strlen(arg_value) > 0) {
+ html_tag_arg_add(hrefs, "area", arg_value);
+ if(hrefs->scanContents && in_ahref)
+ /* see above */
+ html_tag_set_inahref(hrefs,hrefs->count,in_ahref);
+ if (in_form_action) {
+ /* form action is the real URL, and href is the 'displayed' */
+ html_tag_arg_add(hrefs,"form",arg_value);
+ hrefs->contents[hrefs->count-1] = blobCreate();
+ html_tag_contents_append(hrefs, hrefs->count, in_form_action,
+ in_form_action + strlen(in_form_action));
+ html_tag_contents_done(hrefs,hrefs->count);
+ }
+ }
+ }
+ /* TODO:imagemaps can have urls too */
+ }
+ html_tag_arg_free(&tag_args);
+ break;
+ case HTML_CHAR_REF:
+ if (*ptr == '#') {
+ value = 0;
+ hex = FALSE;
+ state = HTML_CHAR_REF_DECODE;
+ ptr++;
+ } else {
+ if(dconf_entconv)
+ state = HTML_ENTITY_REF_DECODE;
+ else {
+ if(next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '&';
+ }
+ html_output_c(file_buff_o1, file_buff_o2, '&');
+
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ }
+ }
+ break;
+ case HTML_ENTITY_REF_DECODE:
+ if(*ptr == ';') {
+ size_t i;
+ unsigned char* normalized;
+ entity_val[entity_val_length] = '\0';
+ normalized = entity_norm(&conv, entity_val);
+ if(normalized) {
+ for(i=0; i < strlen(normalized); i++) {
+ const char c = tolower(normalized[i]);
+ html_output_c(file_buff_o1, file_buff_o2, c);
+ if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = c;
+ }
+ }
+ free(normalized);
+ }
+ else {
+ html_output_c(file_buff_o1, file_buff_o2, '&');
+ if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '&';
+ }
+ for(i=0; i < entity_val_length; i++) {
+ const char c = tolower(entity_val[i]);
+ html_output_c(file_buff_o1, file_buff_o2, c);
+ if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = c;
+ }
+ }
+ if (next_state == HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = ';';
+ }
+ html_output_c(file_buff_o1, file_buff_o2, ';');
+ }
+ entity_val_length = 0;
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ ptr++;
+ }
+ else if ( (isalnum(*ptr) || *ptr=='_' || *ptr==':' || (*ptr=='-')) && entity_val_length < HTML_STR_LENGTH) {
+ entity_val[entity_val_length++] = *ptr++;
+ }
+ else {
+ /* entity too long, or not valid, dump it */
+ size_t i;
+ if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '&';
+ }
+ html_output_c(file_buff_o1, file_buff_o2, '&');
+ for(i=0; i < entity_val_length; i++) {
+ const char c = tolower(entity_val[i]);
+ html_output_c(file_buff_o1, file_buff_o2, c);
+ if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = c;
+ }
+ }
+
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ entity_val_length = 0;
+ }
+ break;
+ case HTML_CHAR_REF_DECODE:
+ if ((value==0) && ((*ptr == 'x') || (*ptr == 'X'))) {
+ hex=TRUE;
+ ptr++;
+ } else if (*ptr == ';') {
+ if (next_state==HTML_TAG_ARG_VAL && tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = value; /* store encoded values too */
+ }
+ if(dconf_entconv) {
+
+ if(value < 0x80)
+ html_output_c(file_buff_o1, file_buff_o2, tolower(value));
+ else {
+ unsigned char buff[10];
+ snprintf((char*)buff,9,"&#%d;",value);
+ buff[9] = '\0';
+ html_output_str(file_buff_o1, buff, strlen(buff));
+ html_output_str(file_buff_o2, buff, strlen(buff));
+ }
+ } else
+ html_output_c(file_buff_o1, file_buff_o2, tolower(value&0xff));
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ ptr++;
+ } else if (isdigit(*ptr) || (hex && isxdigit(*ptr))) {
+ if (hex) {
+ value *= 16;
+ } else {
+ value *= 10;
+ }
+ if (isdigit(*ptr)) {
+ value += (*ptr - '0');
+ } else {
+ value += (tolower(*ptr) - 'a' + 10);
+ }
+ ptr++;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, value);
+ state = next_state;
+ next_state = HTML_BAD_STATE;
+ }
+ break;
+ case HTML_JSDECODE:
+ /* Check for start marker */
+ if (strncmp(ptr, "#@~^", 4) == 0) {
+ ptr += 4;
+ state = HTML_JSDECODE_LENGTH;
+ next_state = HTML_BAD_STATE;
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, tolower(*ptr));
+ html_output_c(file_buff_script, NULL, tolower(*ptr));
+ ptr++;
+ }
+ break;
+ case HTML_JSDECODE_LENGTH:
+ if (strlen(ptr) < 8) {
+ state = HTML_NORM;
+ next_state = HTML_BAD_STATE;
+ break;
+ }
+ length = base64_chars[ptr[0]] << 2;
+ length += base64_chars[ptr[1]] >> 4;
+ length += (base64_chars[ptr[1]] & 0x0f) << 12;
+ length += (base64_chars[ptr[2]] >> 2) << 8;
+ length += (base64_chars[ptr[2]] & 0x03) << 22;
+ length += base64_chars[ptr[3]] << 16;
+ length += (base64_chars[ptr[4]] << 2) << 24;
+ length += (base64_chars[ptr[5]] >> 4) << 24;
+ table_pos = 0;
+ state = HTML_JSDECODE_DECRYPT;
+ next_state = HTML_BAD_STATE;
+ ptr += 8;
+ break;
+ case HTML_JSDECODE_DECRYPT:
+ if (length == 0) {
+ html_output_str(file_buff_script, "</script>\n", 10);
+ length = 12;
+ state = HTML_SKIP_LENGTH;
+ next_state = HTML_NORM;
+ break;
+ }
+ if (*ptr < 0x80) {
+ value = decrypt_tables[table_order[table_pos]][*ptr];
+ if (value == 0xFF) { /* special character */
+ ptr++;
+ length--;
+ switch (*ptr) {
+ case '\0':
+ /* Fixup for end of line */
+ ptr--;
+ break;
+ case 0x21:
+ html_output_c(file_buff_o1, file_buff_o2, 0x3c);
+ html_output_c(file_buff_script, NULL, 0x3c);
+ break;
+ case 0x23:
+ html_output_c(file_buff_o1, file_buff_o2, 0x0d);
+ html_output_c(file_buff_script, NULL, 0x0d);
+ break;
+ case 0x24:
+ html_output_c(file_buff_o1, file_buff_o2, 0x40);
+ html_output_c(file_buff_script, NULL, 0x40);
+ break;
+ case 0x26:
+ html_output_c(file_buff_o1, file_buff_o2, 0x0a);
+ html_output_c(file_buff_script, NULL, 0x0a);
+ break;
+ case 0x2a:
+ html_output_c(file_buff_o1, file_buff_o2, 0x3e);
+ html_output_c(file_buff_script, NULL, 0x3e);
+ break;
+ }
+ } else {
+ html_output_c(file_buff_o1, file_buff_o2, value);
+ html_output_c(file_buff_script, NULL, tolower(value));
+ }
+ }
+ table_pos = (table_pos + 1) % 64;
+ ptr++;
+ length--;
+ break;
+
+ case HTML_RFC2397_TYPE:
+ if (*ptr == '\'') {
+ if (!escape && (quoted==SINGLE_QUOTED)) {
+ /* Early end of data detected. Error */
+ ptr++;
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ }
+ } else if (*ptr == '"') {
+ if (!escape && (quoted==DOUBLE_QUOTED)) {
+ /* Early end of data detected. Error */
+ ptr++;
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = '"';
+ }
+ ptr++;
+ }
+ } else if (isspace(*ptr) || (*ptr == '>')) {
+ if (quoted == NOT_QUOTED) {
+ /* Early end of data detected. Error */
+ state = HTML_SKIP_WS;
+ tag_arg_length=0;
+ next_state = HTML_TAG_ARG;
+ } else {
+ if (tag_val_length < HTML_STR_LENGTH) {
+ if (isspace(*ptr)) {
+ tag_val[tag_val_length++] = ' ';
+ } else {
+ tag_val[tag_val_length++] = '>';
+ }
+ }
+ state = HTML_SKIP_WS;
+ escape = FALSE;
+ quoted = NOT_QUOTED;
+ next_state = HTML_RFC2397_TYPE;
+ ptr++;
+ }
+ } else if (*ptr == ',') {
+ /* Beginning of data */
+ tag_val[tag_val_length] = '\0';
+ state = HTML_RFC2397_INIT;
+ escape = FALSE;
+ next_state = HTML_BAD_STATE;
+ ptr++;
+
+ } else {
+ if (tag_val_length < HTML_STR_LENGTH) {
+ tag_val[tag_val_length++] = tolower(*ptr);
+ }
+ ptr++;
+ }
+ if (*ptr == '\\') {
+ escape = TRUE;
+ } else {
+ escape = FALSE;
+ }
+ break;
+ case HTML_RFC2397_INIT:
+ if (dirname) {
+ file_tmp_o1 = (file_buff_t *) cli_malloc(sizeof(file_buff_t));
+ if (!file_tmp_o1) {
+ goto abort;
+ }
+ snprintf(filename, 1024, "%s/rfc2397", dirname);
+ tmp_file = cli_gentemp(filename);
+ cli_dbgmsg("RFC2397 data file: %s\n", tmp_file);
+ file_tmp_o1->fd = open(tmp_file, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+ free(tmp_file);
+ if (!file_tmp_o1->fd) {
+ cli_dbgmsg("open failed: %s\n", filename);
+ free(file_tmp_o1);
+ goto abort;
+ }
+ file_tmp_o1->length = 0;
+
+ html_output_str(file_tmp_o1, "From html-normalise\n", 20);
+ html_output_str(file_tmp_o1, "Content-type: ", 14);
+ if ((tag_val_length == 0) && (*tag_val == ';')) {
+ html_output_str(file_tmp_o1, "text/plain\n", 11);
+ }
+ html_output_str(file_tmp_o1, tag_val, tag_val_length);
+ html_output_c(file_tmp_o1, NULL, '\n');
+ if (strstr(tag_val, ";base64") != NULL) {
+ html_output_str(file_tmp_o1, "Content-transfer-encoding: base64\n", 34);
+ }
+ html_output_c(file_tmp_o1, NULL, '\n');
+ } else {
+ file_tmp_o1 = NULL;
+ }
+ state = HTML_RFC2397_DATA;
+ binary = TRUE;
+ break;
+ case HTML_RFC2397_DATA:
+ if (*ptr == '&') {
+ state = HTML_CHAR_REF;
+ next_state = HTML_RFC2397_DATA;
+ ptr++;
+ } else if (*ptr == '%') {
+ length = 0;
+ value = 0;
+ state = HTML_ESCAPE_CHAR;
+ next_state = HTML_RFC2397_ESC;
+ ptr++;
+ } else if (*ptr == '\'') {
+ if (!escape && (quoted==SINGLE_QUOTED)) {
+ state = HTML_RFC2397_FINISH;
+ ptr++;
+ } else {
+ html_output_c(file_tmp_o1, NULL, *ptr);
+ ptr++;
+ }
+ } else if (*ptr == '\"') {
+ if (!escape && (quoted==DOUBLE_QUOTED)) {
+ state = HTML_RFC2397_FINISH;
+ ptr++;
+ } else {
+ html_output_c(file_tmp_o1, NULL, *ptr);
+ ptr++;
+ }
+ } else if (isspace(*ptr) || (*ptr == '>')) {
+ if (quoted == NOT_QUOTED) {
+ state = HTML_RFC2397_FINISH;
+ ptr++;
+ } else {
+ html_output_c(file_tmp_o1, NULL, *ptr);
+ ptr++;
+ }
+ } else {
+ html_output_c(file_tmp_o1, NULL, *ptr);
+ ptr++;
+ }
+ if (*ptr == '\\') {
+ escape = TRUE;
+ } else {
+ escape = FALSE;
+ }
+ break;
+ case HTML_RFC2397_FINISH:
+ if(file_tmp_o1) {
+ html_output_flush(file_tmp_o1);
+ close(file_tmp_o1->fd);
+ free(file_tmp_o1);
+ }
+ state = HTML_SKIP_WS;
+ escape = FALSE;
+ quoted = NOT_QUOTED;
+ next_state = HTML_TAG_ARG;
+ binary = FALSE;
+ break;
+ case HTML_RFC2397_ESC:
+ if (length == 2) {
+ html_output_c(file_tmp_o1, NULL, value);
+ } else if (length == 1) {
+ html_output_c(file_tmp_o1, NULL, '%');
+ html_output_c(file_tmp_o1, NULL, value+'0');
+ } else {
+ html_output_c(file_tmp_o1, NULL, '%');
+ }
+ state = HTML_RFC2397_DATA;
+ break;
+ case HTML_ESCAPE_CHAR:
+ value *= 16;
+ length++;
+ if (isxdigit(*ptr)) {
+ if (isdigit(*ptr)) {
+ value += (*ptr - '0');
+ } else {
+ value += (tolower(*ptr) - 'a' + 10);
+ }
+ } else {
+ state = next_state;
+ }
+ if (length == 2) {
+ state = next_state;
+ }
+ ptr++;
+ break;
+ }
+ }
+ if(hrefs && hrefs->scanContents && in_ahref && href_contents_begin)
+ /* end of line, append contents now, resume on next line */
+ html_tag_contents_append(hrefs,in_ahref,href_contents_begin,ptr);
+ ptrend = NULL;
+ free(line);
+ if(dconf_entconv)
+ ptr = line = encoding_norm_readline(&conv, stream_in, m_area, 8192);
+ else
+ ptr = line = cli_readline(stream_in, m_area, 8192);
+ }
+
+ if(dconf_entconv) {
+ /* handle "unfinished" entitites */
+ size_t i;
+ unsigned char* normalized;
+ entity_val[entity_val_length] = '\0';
+ normalized = entity_norm(&conv, entity_val);
+ if(normalized) {
+ for(i=0; i < strlen(normalized); i++)
+ html_output_c(file_buff_o1, file_buff_o2, tolower(normalized[i]));
+ free(normalized);
+ }
+ else {
+ if(entity_val_length) {
+ html_output_c(file_buff_o1, file_buff_o2, '&');
+ for(i=0; i < entity_val_length; i++)
+ html_output_c(file_buff_o1, file_buff_o2, tolower(entity_val[i]));
+ }
+ }
+ }
+ retval = TRUE;
+abort:
+ if (in_form_action)
+ free(in_form_action);
+ if (in_ahref) /* tag not closed, force closing */
+ html_tag_contents_done(hrefs,in_ahref);
+
+ if(dconf_entconv)
+ entity_norm_done(&conv);
+ html_tag_arg_free(&tag_args);
+ if (!m_area) {
+ fclose(stream_in);
+ }
+ if (file_buff_o1) {
+ html_output_flush(file_buff_o1);
+ close(file_buff_o1->fd);
+ free(file_buff_o1);
+ }
+ if (file_buff_o2) {
+ html_output_flush(file_buff_o2);
+ close(file_buff_o2->fd);
+ free(file_buff_o2);
+ }
+ if (file_buff_script) {
+ html_output_flush(file_buff_script);
+ close(file_buff_script->fd);
+ free(file_buff_script);
+ }
+ return retval;
+}
+
+int html_normalise_mem(unsigned char *in_buff, off_t in_size, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+ m_area_t m_area;
+
+ m_area.buffer = in_buff;
+ m_area.length = in_size;
+ m_area.offset = 0;
+
+ return cli_html_normalise(-1, &m_area, dirname, hrefs, dconf);
+}
+
+int html_normalise_fd(int fd, const char *dirname, tag_arguments_t *hrefs,const struct cli_dconf* dconf)
+{
+#if HAVE_MMAP
+ int retval=FALSE;
+ m_area_t m_area;
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) == 0) {
+ m_area.length = statbuf.st_size;
+ m_area.buffer = (unsigned char *) mmap(NULL, m_area.length, PROT_READ, MAP_PRIVATE, fd, 0);
+ m_area.offset = 0;
+ if (m_area.buffer == MAP_FAILED) {
+ cli_dbgmsg("mmap HTML failed\n");
+ retval = cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+ } else {
+ cli_dbgmsg("mmap'ed file\n");
+ retval = cli_html_normalise(-1, &m_area, dirname, hrefs, dconf);
+ munmap(m_area.buffer, m_area.length);
+ }
+ } else {
+ cli_dbgmsg("fstat HTML failed\n");
+ retval = cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+ }
+ return retval;
+#else
+ return cli_html_normalise(fd, NULL, dirname, hrefs, dconf);
+#endif
+}
+
+int html_screnc_decode(int fd, const char *dirname)
+{
+ int fd_tmp, table_pos=0, result, count, state, retval=FALSE;
+ unsigned char *line, tmpstr[6];
+ unsigned long length;
+ unsigned char *ptr, filename[1024];
+ FILE *stream_in;
+ file_buff_t file_buff;
+
+ lseek(fd, 0, SEEK_SET);
+ fd_tmp = dup(fd);
+ if (fd_tmp < 0) {
+ return FALSE;
+ }
+ stream_in = fdopen(fd_tmp, "r");
+ if (!stream_in) {
+ close(fd_tmp);
+ return FALSE;
+ }
+
+ snprintf(filename, 1024, "%s/screnc.html", dirname);
+ file_buff.fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, S_IWUSR|S_IRUSR);
+ file_buff.length = 0;
+
+ if (!file_buff.fd) {
+ cli_dbgmsg("open failed: %s\n", filename);
+ fclose(stream_in);
+ return FALSE;
+ }
+
+ while ((line = cli_readline(stream_in, NULL, 8192)) != NULL) {
+ ptr = strstr(line, "#@~^");
+ if (ptr) {
+ break;
+ }
+ free(line);
+ }
+ if (!line) {
+ goto abort;
+ }
+
+ /* Calculate the length of the encoded string */
+ ptr += 4;
+ count = 0;
+ do {
+ if (! *ptr) {
+ free(line);
+ ptr = line = cli_readline(stream_in, NULL, 8192);
+ if (!line) {
+ goto abort;
+ }
+ }
+ tmpstr[count++] = *ptr;
+ ptr++;
+ } while (count < 6);
+
+ length = base64_chars[tmpstr[0]] << 2;
+ length += base64_chars[tmpstr[1]] >> 4;
+ length += (base64_chars[tmpstr[1]] & 0x0f) << 12;
+ length += (base64_chars[tmpstr[2]] >> 2) << 8;
+ length += (base64_chars[tmpstr[2]] & 0x03) << 22;
+ length += base64_chars[tmpstr[3]] << 16;
+ length += (base64_chars[tmpstr[4]] << 2) << 24;
+ length += (base64_chars[tmpstr[5]] >> 4) << 24;
+
+ /* Move forward 2 bytes */
+ count = 2;
+ state = HTML_SKIP_LENGTH;
+
+ while (length && line) {
+ while (length && *ptr) {
+ if ((*ptr == '\n') || (*ptr == '\r')) {
+ ptr++;
+ continue;
+ }
+ switch (state) {
+ case HTML_SKIP_LENGTH:
+ ptr++;
+ count--;
+ if (count == 0) {
+ state = HTML_NORM;
+ }
+ break;
+ case HTML_SPECIAL_CHAR:
+ switch (*ptr) {
+ case 0x21:
+ html_output_c(&file_buff, NULL, 0x3c);
+ break;
+ case 0x23:
+ html_output_c(&file_buff, NULL, 0x0d);
+ break;
+ case 0x24:
+ html_output_c(&file_buff, NULL, 0x40);
+ break;
+ case 0x26:
+ html_output_c(&file_buff, NULL, 0x0a);
+ break;
+ case 0x2a:
+ html_output_c(&file_buff, NULL, 0x3e);
+ break;
+ }
+ ptr++;
+ length--;
+ state = HTML_NORM;
+ break;
+ case HTML_NORM:
+ if (*ptr < 0x80) {
+ result = decrypt_tables[table_order[table_pos]][*ptr];
+ if (result == 0xFF) { /* special character */
+ state = HTML_SPECIAL_CHAR;
+ } else {
+ html_output_c(&file_buff, NULL, (char)result);
+ }
+ }
+ ptr++;
+ length--;
+ table_pos = (table_pos + 1) % 64;
+ break;
+ }
+ }
+ free(line);
+ if (length) {
+ ptr = line = cli_readline(stream_in, NULL, 8192);
+ }
+ }
+ retval = TRUE;
+
+abort:
+ fclose(stream_in);
+ html_output_flush(&file_buff);
+ close(file_buff.fd);
+ return retval;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_is_tar.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,97 @@
+/*
+ * is_tar() -- figure out whether file is a tar archive.
+ *
+ * Stolen (by the author of the file utility!) from the public domain tar program:
+ * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
+ *
+ * @(#)list.c 1.18 9/23/86 Public Domain - gnu
+ * $Id: is_tar.c,v 1.2 2006/06/17 21:00:44 tkojm Exp $
+ *
+ * Comments changed and some code/comments reformatted
+ * for file command by Ian Darwin.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "is_tar.h"
+
+#include "others.h"
+
+#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
+
+static int from_oct(int digs, char *where);
+
+/*
+ * Return
+ * 0 if the checksum is bad (i.e., probably not a tar archive),
+ * 1 for old UNIX tar file,
+ * 2 for Unix Std (POSIX) tar file.
+ */
+int is_tar(unsigned char *buf, unsigned int nbytes)
+{
+ union record *header = (union record *)buf;
+ int i;
+ int sum, recsum;
+ char *p;
+
+
+ if (nbytes < sizeof(union record))
+ return 0;
+
+ recsum = from_oct(8, header->header.chksum);
+
+ sum = 0;
+ p = header->charptr;
+ for (i = sizeof(union record); --i >= 0;) {
+ /*
+ * We can't use unsigned char here because of old compilers,
+ * e.g. V7.
+ */
+ sum += 0xFF & *p++;
+ }
+
+ /* Adjust checksum to count the "chksum" field as blanks. */
+ for (i = sizeof(header->header.chksum); --i >= 0;)
+ sum -= 0xFF & header->header.chksum[i];
+ sum += ' '* sizeof header->header.chksum;
+
+ if (sum != recsum)
+ return 0; /* Not a tar archive */
+
+ if (0==strcmp(header->header.magic, TMAGIC))
+ return 2; /* Unix Standard tar archive */
+
+ return 1; /* Old fashioned tar archive */
+}
+
+
+/*
+ * Quick and dirty octal conversion.
+ *
+ * Result is -1 if the field is invalid (all blank, or nonoctal).
+ */
+static int from_oct(int digs, char *where)
+{
+ int value;
+
+ while (isspace((unsigned char)*where)) { /* Skip spaces */
+ where++;
+ if (--digs <= 0)
+ return -1; /* All blank field */
+ }
+ value = 0;
+ while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
+ value = (value << 3) | (*where++ - '0');
+ --digs;
+ }
+
+ if (digs > 0 && *where && !isspace((unsigned char)*where))
+ return -1; /* Ended on non-space/nul */
+
+ return value;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_jscript.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2006 Nigel Horne <njh at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ * Save the JavaScript embedded in an HTML file, then run the script, saving
+ * the output in a file that is to be scanned, then remove the script file
+ *
+ * FIXME: Includes .c files here, which need to be separated out
+ * FIXME: The js code probably only compiles on GCC.
+ * FIXME: The js code needs re_compile_pattern, re_compile_fastmap,
+ * re_search, which NetBSD, and probably other platforms
+ * don't have
+ * TODO: Test with real malware
+ * TODO: Add mailfollowurls type feature
+ * TODO: Check the NGS code for vulnerabilities, leaks etc.
+ * TODO: Check the NGS code is thread safe
+ * TODO: Test code such as
+ * <script>
+ * document.writeln("<script> function f() { ..the real worm code..
+ * </script>"); f();
+ * </script>
+ */
+static char const rcsid[] = "$Id: jscript.c,v 1.11 2006/12/13 15:25:34 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "clamav.h"
+#include "others.h"
+
+#ifdef CL_EXPERIMENTAL
+
+#if HAVE_MMAP
+
+#include <memory.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <ctype.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "jscript.h"
+
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+/* Maximum filenames under various systems - njh */
+#ifndef NAME_MAX /* e.g. Linux */
+# ifdef MAXNAMELEN /* e.g. Solaris */
+# define NAME_MAX MAXNAMELEN
+# else
+# ifdef FILENAME_MAX /* e.g. SCO */
+# define NAME_MAX FILENAME_MAX
+# else
+# define NAME_MAX 256
+# endif
+# endif
+#endif
+
+#ifdef CL_THREAD_SAFE
+#define VM_TIMEOUT 5 /* In seconds: FIXME should be configurable */
+#endif
+
+#if defined(VM_TIMEOUT) && (VM_TIMEOUT > 0)
+#include <pthread.h>
+#include <sys/time.h>
+#include <signal.h>
+#endif
+
+static int run_js(const char *filename, const char *dir);
+static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+ struct stat statb;
+ off_t size; /* total number of bytes in the file */
+ char *buf; /* start of memory mapped area */
+ const char *p;
+ long bytesleft;
+ int created_output, done_header, rc;
+ FILE *fout;
+ char script_filename[NAME_MAX + 1];
+
+ cli_dbgmsg("in cli_scanjs(%s)\n", dir);
+
+ if(fstat(desc, &statb) < 0)
+ return CL_EOPEN;
+
+ size = (size_t)statb.st_size;
+
+ if(size == 0)
+ return CL_CLEAN;
+
+ if(size <= 17) /* doesn't even include <script></script> */
+ return CL_EFORMAT;
+
+ p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+ if(buf == MAP_FAILED)
+ return CL_EMEM;
+
+ cli_dbgmsg("cli_scanjs: scanning %lu bytes\n", size);
+
+ p = buf;
+ bytesleft = size;
+ created_output = done_header = 0;
+ fout = NULL;
+
+ while(p < &buf[size]) {
+ const char *q = cli_pmemstr(p, bytesleft, "<script", 7);
+
+ if(q == NULL)
+ /* TODO: full case independant search */
+ q = cli_pmemstr(p, bytesleft, "<SCRIPT", 7);
+
+ if(q == NULL)
+ break;
+
+ /*
+ * TODO: check language is javascript
+ * TODO: follow src if mail-follow-urls is set
+ */
+
+ bytesleft -= (q - p);
+ p = q;
+
+ q = cli_pmemstr(p, bytesleft, ">", 1);
+ if(q == NULL)
+ break;
+
+ bytesleft -= (q - p);
+ p = q;
+
+ p++;
+ bytesleft--;
+
+ while(bytesleft) {
+ char c;
+
+ if(*p == '<') {
+ p++;
+ if(--bytesleft == 0)
+ break;
+ if((*p == '!') && !done_header) {
+ while(bytesleft && (*p != '\n')) {
+ p++;
+ bytesleft--;
+ }
+ continue;
+ }
+ if((bytesleft >= 7) && (strncasecmp(p, "/script", 7) == 0)) {
+ bytesleft -= 7;
+ p = &p[7];
+ while(bytesleft && (*p != '>')) {
+ p++;
+ bytesleft--;
+ }
+ if(fout) {
+ fclose(fout);
+ fout = NULL;
+ (void)run_js(script_filename, dir);
+
+ if(!cli_leavetemps_flag)
+ unlink(script_filename);
+ }
+ done_header = 0;
+ break;
+ }
+ c = '<';
+ } else {
+ /*c = tolower(*p);*/
+ c = *p;
+ p++;
+ bytesleft--;
+ }
+
+ if(!done_header) {
+ int fd;
+
+ snprintf(script_filename, sizeof(script_filename), "%s/jsXXXXXX", dir);
+#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
+ fd = mkstemp(script_filename);
+ fout = fdopen(fd, "wb");
+ if(fout == NULL)
+ close(fd);
+#elif defined(C_WINDOWS)
+ if(_mktemp(script_filename) == NULL) {
+ /* mktemp only allows 26 files */
+ char *name = cli_gentemp(dir);
+ if(name == NULL)
+ fout = NULL;
+ else {
+ strcpy(script_filename, name);
+ free(name);
+ fout = fopen(script_filename, "wb");
+ }
+ } else
+ fout = fopen(script_filename, "wb");
+#else
+ mktemp(script_filename);
+ fout = fopen(script_filename, "wb");
+#endif
+
+ if(fout == NULL) {
+ cli_errmsg("cli_scanjs: can't create temporary file %s: %s\n", script_filename, strerror(errno));
+ munmap(buf, size);
+ return CL_ETMPFILE;
+ }
+ cli_dbgmsg("Saving javascript to %s\n",
+ script_filename);
+
+ /*
+ * Create a document object, on web pages it's
+ * used to send output to the browser
+ * FIXME: will create a file even if the script
+ * is empty, e.g. src is somewhere else
+ */
+ fputs("function createDoc() {\n", fout);
+ fputs("\tfunction write(text) {\n", fout);
+ /*
+ * Use System.print rather than print so that
+ * a new line is not appended
+ */
+ fputs("\t\tSystem.print(text);\n", fout);
+ fputs("\t}\n", fout);
+ fputs("}\n", fout);
+ fputs("document = new createDoc();\n", fout);
+
+ done_header = 1;
+ created_output = 1;
+ }
+ putc(c, fout);
+ }
+ }
+
+ munmap(buf, size);
+
+ rc = CL_SUCCESS;
+
+ if(!created_output)
+ cli_dbgmsg("No javascript was detected\n");
+ else if(fout) {
+ fclose(fout);
+ rc = run_js(script_filename, dir);
+
+ if(!cli_leavetemps_flag)
+ unlink(script_filename);
+ }
+ return rc;
+}
+
+#include "compiler.c"
+#include "iostream.c"
+#include "js.c"
+#include "main.c"
+#include "debug.c"
+#include "crc32.c"
+
+static FILE *fout;
+
+static int
+write_to_fout(void *context, unsigned char *buf, unsigned int len)
+{
+ return (int)fwrite(buf, (size_t)len, 1, fout);
+}
+
+#if defined(VM_TIMEOUT) && (VM_TIMEOUT > 0)
+
+struct args {
+ const char *filename;
+ const char *dir;
+ pthread_cond_t *cond;
+ int result;
+};
+
+static void *
+js_thread(void *a)
+{
+ JSInterpPtr interp;
+ char *outputfilename;
+ struct args *args = (struct args *)a;
+ const char *dir = args->dir;
+ const char *filename = args->filename;
+ int otype;
+
+ cli_dbgmsg("run_js(%s)\n", filename);
+
+ outputfilename = cli_gentemp(dir);
+ if(outputfilename == NULL) {
+ pthread_cond_broadcast(args->cond);
+ args->result = CL_ETMPFILE;
+ return NULL;
+ }
+
+ fout = fopen(outputfilename, "wb");
+ if(fout == NULL) {
+ pthread_cond_broadcast(args->cond);
+ cli_warnmsg("Can't create %s\n", outputfilename);
+ free(outputfilename);
+ args->result = CL_ETMPFILE;
+ return NULL;
+ }
+
+ cli_dbgmsg("Redirecting JS VM stdout to %s\n", outputfilename);
+ free(outputfilename);
+
+ /*
+ * Run NGS on the file
+ */
+ interp = create_interp(write_to_fout);
+
+ args->result = CL_EIO; /* TODO: CL_TIMEOUT */
+
+ pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &otype);
+
+ if(!js_eval_file(interp, filename)) {
+ cli_warnmsg("JS failed: %s\n", js_error_message(interp));
+ /*rc = CL_EIO;*/
+ }
+
+ /*
+ * If a pthread_cancel() is issued exactly here, js_destroy_interp()
+ * wouldn't be called, leading to a memory leak
+ */
+ if(pthread_cond_broadcast(args->cond) < 0)
+ perror("pthread_cond_broadcast");
+
+ js_destroy_interp(interp);
+
+ fclose(fout);
+
+ args->result = CL_SUCCESS;
+ return NULL;
+}
+
+static int
+run_js(const char *filename, const char *dir)
+{
+ struct args args;
+ pthread_t tid;
+ struct timespec ts;
+ struct timeval tp;
+ pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+ pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+ args.filename = filename;
+ args.dir = dir;
+ args.cond = &cond;
+
+ pthread_create(&tid, NULL, js_thread, &args);
+
+ gettimeofday(&tp, NULL);
+
+ ts.tv_sec = tp.tv_sec + VM_TIMEOUT;
+ ts.tv_nsec = tp.tv_usec * 1000;
+
+ pthread_mutex_lock(&mutex);
+ if(pthread_cond_timedwait(&cond, &mutex, &ts) == ETIMEDOUT) {
+ cli_warnmsg("Runaway javascript stopped after %d seconds\n",
+ VM_TIMEOUT);
+ /*pthread_kill(tid, SIGUSR1);*/
+ if(pthread_cancel(tid) < 0)
+ perror("pthread_cancel");
+ }
+ pthread_mutex_unlock(&mutex);
+ pthread_join(tid, NULL);
+
+ return args.result;
+}
+#else
+static int
+run_js(const char *filename, const char *dir)
+{
+ JSInterpPtr interp;
+ char *outputfilename;
+
+ cli_dbgmsg("run_js(%s)\n", filename);
+
+ outputfilename = cli_gentemp(dir);
+ if(outputfilename == NULL)
+ return CL_ETMPFILE;
+
+ fout = fopen(outputfilename, "wb");
+ if(fout == NULL) {
+ cli_warnmsg("Can't create %s\n", outputfilename);
+ free(outputfilename);
+ return CL_ETMPFILE;
+ }
+
+ cli_dbgmsg("Redirecting JS VM stdout to %s\n", outputfilename);
+ free(outputfilename);
+
+ /*
+ * Run NGS on the file
+ */
+ interp = create_interp(write_to_fout);
+
+ if(!js_eval_file(interp, filename)) {
+ cli_warnmsg("JS failed: %s\n", js_error_message(interp));
+ /*rc = CL_EIO;*/
+ }
+
+ js_destroy_interp(interp);
+
+ fclose(fout);
+
+ return CL_SUCCESS;
+}
+#endif
+
+/* Copied from pdf.c :-( */
+/*
+ * like cli_memstr - but returns the location of the match
+ * FIXME: need a case insensitive version`
+ */
+static const char *
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
+{
+ const char *pt, *hay;
+ size_t n;
+
+ if(haystack == needle)
+ return haystack;
+
+ if(hs < ns)
+ return NULL;
+
+ if(memcmp(haystack, needle, ns) == 0)
+ return haystack;
+
+ pt = hay = haystack;
+ n = hs;
+
+ while((pt = memchr(hay, needle[0], n)) != NULL) {
+ n -= (int) pt - (int) hay;
+ if(n < ns)
+ break;
+
+ if(memcmp(pt, needle, ns) == 0)
+ return pt;
+
+ if(hay == pt) {
+ n--;
+ hay++;
+ } else
+ hay = pt;
+ }
+
+ return NULL;
+}
+
+#else
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+ cli_warnmsg("File not decoded - JS decoding needs mmap() (for now)\n");
+ return CL_CLEAN;
+}
+#endif /*HAVE_MMAP*/
+
+#else /*!CL_EXPERIMENTAL*/
+
+int
+cli_scanjs(const char *dir, int desc)
+{
+ cli_warnmsg("JS decoding files not yet supported\n");
+ return CL_EFORMAT;
+}
+
+#endif /*CL_EXPERIMENTAL*/
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_line.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2004 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ * $Log: line.c,v $
+ * Revision 1.11 2007/02/12 20:46:08 njh
+ * Various tidy
+ *
+ * Revision 1.10 2006/04/09 19:59:27 kojm
+ * update GPL headers with new address for FSF
+ *
+ * Revision 1.9 2005/03/10 08:53:33 nigelhorne
+ * Tidy
+ *
+ * Revision 1.8 2005/03/01 11:38:11 nigelhorne
+ * Fix typo
+ *
+ * Revision 1.7 2004/12/08 20:07:23 nigelhorne
+ * Fix compilation error on Solaris
+ *
+ * Revision 1.6 2004/10/14 17:45:55 nigelhorne
+ * Try to reclaim some memory if it becomes low when decoding
+ *
+ * Revision 1.5 2004/09/30 08:58:56 nigelhorne
+ * Remove empty lines
+ *
+ * Revision 1.4 2004/09/21 14:55:26 nigelhorne
+ * Handle blank lines in text/plain messages
+ *
+ * Revision 1.3 2004/08/25 12:30:36 nigelhorne
+ * Use memcpy rather than strcpy
+ *
+ * Revision 1.2 2004/08/21 11:57:57 nigelhorne
+ * Use line.[ch]
+ *
+ * Revision 1.1 2004/08/20 11:58:20 nigelhorne
+ * First draft
+ *
+ */
+
+static char const rcsid[] = "$Id: line.c,v 1.11 2007/02/12 20:46:08 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG /* map CLAMAV debug onto standard */
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "line.h"
+#include "others.h"
+
+line_t *
+lineCreate(const char *data)
+{
+ const size_t size = strlen(data);
+ line_t *ret = (line_t *)cli_malloc(size + 2);
+
+ if(ret == NULL)
+ return (line_t *)NULL;
+
+ ret[0] = (char)1;
+ /*strcpy(&ret[1], data);*/
+ memcpy(&ret[1], data, size);
+ ret[size + 1] = '\0';
+
+ return ret;
+}
+
+line_t *
+lineLink(line_t *line)
+{
+ assert(line != NULL);
+ if((unsigned char)line[0] == (unsigned char)255) {
+ cli_dbgmsg("lineLink: linkcount too large (%s)\n", lineGetData(line));
+ return lineCreate(lineGetData(line));
+ }
+ line[0]++;
+ /*printf("%d:\n\t'%s'\n", (int)line[0], &line[1]);*/
+ return line;
+}
+
+line_t *
+lineUnlink(line_t *line)
+{
+ /*printf("%d:\n\t'%s'\n", (int)line[0], &line[1]);*/
+
+ if(--line[0] == 0) {
+ free(line);
+ return NULL;
+ }
+ return line;
+}
+
+const char *
+lineGetData(const line_t *line)
+{
+ return line ? &line[1] : NULL;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_lockdb.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2006 Mark Pizzolato <clamav-devel at subscriptions.pizzolato.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+/*
+ * This is a problem, which from a purist point of view, best wants an
+ * RW locking mechanism.
+ * On Posix platforms, we leverage advisory locks provided by fcntl().
+ * Windows doesn't have a native interprocess RW exclusion mechanism,
+ * one could be constructed from the services available, but it is somewhat
+ * complicated. Meanwhile, we observe that in ClamAV, it is extremely rare
+ * that there will ever be an occasion when multiple processes will be
+ * reading the ClamAV database from a given directory at the same, and in
+ * none of those possible cases would it matter if they serialized their
+ * accesses. So, a simple mutual exclusion mechanism will suffice for both
+ * the reader and writer locks on Windows.
+ */
+#ifdef _MSC_VER
+#include <windows.h>
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <errno.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "lockdb.h"
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+pthread_mutex_t lock_mutex = PTHREAD_MUTEX_INITIALIZER;
+#else
+#define pthread_mutex_lock(arg)
+#define pthread_mutex_unlock(arg)
+#endif
+
+#ifdef C_WINDOWS /* FIXME */
+#define DONT_LOCK_DBDIRS
+#endif
+
+struct dblock {
+ struct dblock *lock_link;
+ char lock_file[NAME_MAX];
+#ifndef C_WINDOWS
+ int lock_fd;
+#else
+ HANDLE lock_fd;
+#endif
+ int lock_type;
+};
+
+static struct dblock *dblocks = NULL;
+
+static void cli_lockname(char *lock_file, size_t lock_file_size, const char *dbdirpath);
+static int cli_lockdb(const char *dbdirpath, int wait, int writelock);
+
+#ifdef DONT_LOCK_DBDIRS
+
+int cli_readlockdb(const char *dbdirpath, int wait)
+{
+ return CL_SUCCESS;
+}
+
+int cli_writelockdb(const char *dbdirpath, int wait)
+{
+ return CL_SUCCESS;
+}
+
+int cli_unlockdb(const char *dbdirpath)
+{
+ return CL_SUCCESS;
+}
+
+int cli_freelocks(void)
+{
+ return CL_SUCCESS;
+}
+
+#else /* !DONT_LOCK_DBDIRS */
+
+int cli_readlockdb(const char *dbdirpath, int wait)
+{
+ return cli_lockdb(dbdirpath, wait, 0);
+}
+
+int cli_writelockdb(const char *dbdirpath, int wait)
+{
+ return cli_lockdb(dbdirpath, wait, 1);
+}
+
+int cli_freelocks(void)
+{
+ struct dblock * lock, *nextlock, *usedlocks = NULL;
+
+ pthread_mutex_lock(&lock_mutex);
+ for(lock = dblocks; lock; lock = nextlock) {
+ /* there might be some locks in use, eg: during a db reload, a failure can lead
+ * to cl_free being called */
+ nextlock = lock->lock_link;
+ if(lock->lock_type != -1 && lock->lock_fd != -1) {
+ lock->lock_link = usedlocks;
+ usedlocks = lock;
+ }
+ else {
+ free(lock);
+ }
+ }
+ dblocks = usedlocks;
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_SUCCESS;
+}
+
+
+int cli_unlockdb(const char *dbdirpath)
+{
+ char lock_file[NAME_MAX];
+ struct dblock *lock;
+#ifndef C_WINDOWS
+ struct flock fl;
+#endif
+
+ cli_lockname(lock_file, sizeof(lock_file), dbdirpath);
+ pthread_mutex_lock(&lock_mutex);
+ for(lock=dblocks; lock; lock=lock->lock_link)
+ if(!strcmp(lock_file, lock->lock_file))
+ break;
+ if((!lock) || (lock->lock_type == -1)) {
+ cli_errmsg("Database Directory: %s not locked\n", dbdirpath);
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_ELOCKDB;
+ }
+#ifndef C_WINDOWS
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = F_UNLCK;
+ if(fcntl(lock->lock_fd, F_SETLK, &fl) == -1) {
+#else
+ if(!ReleaseMutex(lock->lock_fd)) {
+#endif
+ cli_errmsg("Error Unlocking Database Directory %s\n", dbdirpath);
+ pthread_mutex_unlock(&lock_mutex);
+#ifndef C_WINDOWS
+ close(lock->lock_fd);
+ lock->lock_fd=-1;
+ unlink(lock->lock_file);
+#endif
+ return CL_ELOCKDB;
+ }
+ lock->lock_type = -1;
+#ifndef C_WINDOWS
+ close(lock->lock_fd);
+ lock->lock_fd=-1;
+ unlink(lock->lock_file);
+#endif
+ pthread_mutex_unlock(&lock_mutex);
+
+ return CL_SUCCESS;
+}
+
+static int cli_lockdb(const char *dbdirpath, int wait, int writelock)
+{
+ char lock_file[NAME_MAX];
+ struct dblock *lock;
+#ifndef C_WINDOWS
+ struct flock fl;
+ mode_t old_mask;
+ unsigned int existing = 0;
+#else
+ DWORD LastError;
+ SECURITY_ATTRIBUTES saAttr;
+ SECURITY_DESCRIPTOR sdDesc;
+#endif
+
+ cli_lockname(lock_file, sizeof(lock_file), dbdirpath);
+ pthread_mutex_lock(&lock_mutex);
+ for(lock=dblocks; lock; lock=lock->lock_link)
+ if(!strcmp(lock_file, lock->lock_file))
+ break;
+ if(!lock) {
+ lock = cli_calloc(1, sizeof(*lock));
+ if(!lock) {
+ cli_errmsg("cli_lockdb(): Can't allocate lock structure to lock Database Directory: %s\n", dbdirpath);
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_EMEM;
+ }
+ lock->lock_link = dblocks;
+ strcpy(lock->lock_file, lock_file);
+ lock->lock_fd = -1;
+ lock->lock_type = -1;
+ dblocks = lock;
+ }
+ if(lock->lock_type != -1) {
+ cli_dbgmsg("Database Directory: %s already %s locked\n", dbdirpath, (lock->lock_type? "write" : "read"));
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_ELOCKDB;
+ }
+#ifndef C_WINDOWS
+ if(lock->lock_fd == -1) {
+ old_mask = umask(0);
+ if(-1 == (lock->lock_fd = open(lock->lock_file, O_RDWR|O_CREAT|O_TRUNC, S_IRWXU|S_IRWXG|S_IROTH))) {
+ if((writelock) ||
+ (-1 == (lock->lock_fd = open(lock->lock_file, O_RDONLY)))) {
+ cli_dbgmsg("Can't %s Lock file for Database Directory: %s\n", (writelock ? "create" : "open"), dbdirpath);
+ umask(old_mask);
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_EIO; /* or CL_EACCESS */
+ } else {
+ existing = 1;
+ }
+ }
+ umask(old_mask);
+ }
+#else
+ if(lock->lock_fd == -1) {
+ /* Create a security descriptor which allows any process to acquire the Mutex */
+ InitializeSecurityDescriptor(&sdDesc, SECURITY_DESCRIPTOR_REVISION);
+ SetSecurityDescriptorDacl(&sdDesc, TRUE, NULL, FALSE);
+ saAttr.nLength = sizeof(saAttr);
+ saAttr.bInheritHandle = FALSE;
+ saAttr.lpSecurityDescriptor = &sdDesc;
+ if(!(lock->lock_fd = CreateMutexA(&saAttr, TRUE, lock->lock_file))) {
+ if((GetLastError() != ERROR_ACCESS_DENIED) ||
+ (!(lock->lock_fd = OpenMutexA(MUTEX_MODIFY_STATE, FALSE, lock->lock_file)))) {
+ cli_dbgmsg("Can't Create Mutex Lock for Database Directory: %s\n", dbdirpath);
+ pthread_mutex_unlock(&lock_mutex);
+ return CL_EIO;
+ }
+ LastError = ERROR_ALREADY_EXISTS;
+ }
+ LastError = GetLastError();
+ } else {
+ LastError = ERROR_ALREADY_EXISTS;
+ }
+#endif
+ pthread_mutex_unlock(&lock_mutex);
+
+#ifndef C_WINDOWS
+ memset(&fl, 0, sizeof(fl));
+ fl.l_type = (writelock ? F_WRLCK : F_RDLCK);
+ if(fcntl(lock->lock_fd, ((wait) ? F_SETLKW : F_SETLK), &fl) == -1) {
+#ifndef C_WINDOWS
+ close(lock->lock_fd);
+ lock->lock_fd = -1;
+ if(errno != EACCES && errno != EAGAIN) {
+ if(!existing)
+ unlink(lock->lock_file);
+ cli_errmsg("Can't acquire %s lock: %s\n", writelock ? "write" : "read", strerror(errno));
+ return CL_EIO;
+ }
+#endif
+ return CL_ELOCKDB;
+ }
+#else
+ if(LastError == ERROR_ALREADY_EXISTS) {
+ if(WAIT_TIMEOUT == WaitForSingleObject(lock->lock_fd, ((wait) ? INFINITE : 0))) {
+ lock->lock_type = -1;
+ return CL_ELOCKDB;
+ }
+ }
+#endif
+ lock->lock_type = writelock;
+
+ return CL_SUCCESS;
+}
+
+static void cli_lockname(char *lock_file, size_t lock_file_size, const char *dbdirpath)
+{
+ char *c;
+
+ lock_file[lock_file_size-1] = '\0';
+#ifndef C_WINDOWS
+ snprintf(lock_file, lock_file_size-1, "%s/.dbLock", dbdirpath);
+ for (c=lock_file; *c; ++c) {
+#else
+ snprintf(lock_file, lock_file_size-1, "Global\\ClamAVDB-%s", dbdirpath);
+ for (c=lock_file+16; *c; ++c) {
+#endif
+ switch (*c) {
+#ifdef C_WINDOWS
+ case '\\':
+ *c = '/';
+#endif
+ case '/':
+ if(c!=lock_file && *(c-1) == '/') { /* compress imbedded // */
+ --c;
+ memmove(c, c+1,strlen(c+1)+1);
+ } else if(c > lock_file+1 && (*(c-2) == '/') && (*(c-1) == '.')) { /* compress imbedded /./ */
+ c -= 2;
+ memmove(c, c+2,strlen(c+2)+1);
+ }
+ break;
+#ifdef C_WINDOWS
+ default:
+ if(islower(*c)) /* Normalize to upper case */
+ *c = toupper(*c);
+ break;
+#endif
+ }
+ }
+#ifdef C_WINDOWS
+ if('/' == lock_file[strlen(lock_file)-1]) /* Remove trailing / */
+ lock_file[strlen(lock_file)-1] = '\0';
+#endif
+}
+
+#endif /* DONT_LOCK_DBDIRS */
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-ac.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,985 @@
+/*
+ * Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "matcher.h"
+#include "matcher-ac.h"
+#include "filetypes.h"
+#include "cltypes.h"
+#include "str.h"
+
+uint8_t cli_ac_mindepth = AC_DEFAULT_MIN_DEPTH;
+uint8_t cli_ac_maxdepth = AC_DEFAULT_MAX_DEPTH;
+
+int cli_ac_addpatt(struct cli_matcher *root, struct cli_ac_patt *pattern)
+{
+ struct cli_ac_node *pt, *next, **newtable;
+ struct cli_ac_patt *ph;
+ struct cli_ac_alt *a1, *a2;
+ uint8_t i, match;
+ uint16_t len = MIN(root->ac_maxdepth, pattern->length);
+
+
+ for(i = 0; i < len; i++) {
+ if(pattern->pattern[i] & CLI_MATCH_WILDCARD) {
+ len = i;
+ break;
+ }
+ }
+
+ if(len < root->ac_mindepth)
+ return CL_EPATSHORT;
+
+ pt = root->ac_root;
+
+ for(i = 0; i < len; i++) {
+ if(!pt->trans) {
+ pt->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+ if(!pt->trans) {
+ cli_errmsg("cli_ac_addpatt: Can't allocate memory for pt->trans\n");
+ return CL_EMEM;
+ }
+ }
+
+ next = pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)];
+
+ if(!next) {
+ next = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
+ if(!next) {
+ cli_errmsg("cli_ac_addpatt: Can't allocate memory for AC node\n");
+ return CL_EMEM;
+ }
+
+ if(i != len - 1) {
+ next->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+ if(!next->trans) {
+ cli_errmsg("cli_ac_addpatt: Can't allocate memory for next->trans\n");
+ free(next);
+ return CL_EMEM;
+ }
+ } else {
+ next->leaf = 1;
+ }
+
+ root->ac_nodes++;
+ newtable = (struct cli_ac_node **) cli_realloc(root->ac_nodetable, root->ac_nodes * sizeof(struct cli_ac_node *));
+ if(!newtable) {
+ root->ac_nodes--;
+ cli_errmsg("cli_ac_addpatt: Can't realloc ac_nodetable\n");
+ if(next->trans)
+ free(next->trans);
+ free(next);
+ return CL_EMEM;
+ }
+ newtable[root->ac_nodes - 1] = next;
+ root->ac_nodetable = newtable;
+
+ pt->trans[(unsigned char) (pattern->pattern[i] & 0xff)] = next;
+ pt->leaf = 0;
+ }
+
+ pt = next;
+ }
+
+ root->ac_patterns++;
+ root->ac_pattable = (struct cli_ac_patt **) cli_realloc2(root->ac_pattable, root->ac_patterns * sizeof(struct cli_ac_patt *));
+ if(!root->ac_pattable) {
+ cli_errmsg("cli_ac_addpatt: Can't realloc ac_pattable\n");
+ return CL_EMEM;
+ }
+ root->ac_pattable[root->ac_patterns - 1] = pattern;
+
+ pt->final = 1;
+ pattern->depth = i;
+
+ ph = pt->list;
+ while(ph) {
+ if((ph->length == pattern->length) && (ph->prefix_length == pattern->prefix_length)) {
+ if(!memcmp(ph->pattern, pattern->pattern, ph->length * sizeof(uint16_t)) && !memcmp(ph->prefix, pattern->prefix, ph->prefix_length * sizeof(uint16_t))) {
+ if(!ph->alt && !pattern->alt) {
+ match = 1;
+ } else if(ph->alt == pattern->alt) {
+ match = 1;
+ for(i = 0; i < ph->alt; i++) {
+ a1 = ph->alttable[i];
+ a2 = pattern->alttable[i];
+
+ if(a1->num != a2->num) {
+ match = 0;
+ break;
+ }
+ if(a1->chmode != a2->chmode) {
+ match = 0;
+ break;
+ } else if(a1->chmode) {
+ if(memcmp(a1->str, a2->str, a1->num)) {
+ match = 0;
+ break;
+ }
+ } else {
+ while(a1 && a2) {
+ if((a1->len != a2->len) || memcmp(a1->str, a2->str, a1->len))
+ break;
+ a1 = a1->next;
+ a2 = a2->next;
+ }
+ if(a1 || a2) {
+ match = 0;
+ break;
+ }
+ }
+ }
+ } else {
+ match = 0;
+ }
+
+ if(match) {
+ pattern->next_same = ph->next_same;
+ ph->next_same = pattern;
+ return CL_SUCCESS;
+ }
+ }
+ }
+ ph = ph->next;
+ }
+
+ pattern->next = pt->list;
+ pt->list = pattern;
+
+ return CL_SUCCESS;
+}
+
+struct bfs_list {
+ struct cli_ac_node *node;
+ struct bfs_list *next;
+};
+
+static int bfs_enqueue(struct bfs_list **bfs, struct bfs_list **last, struct cli_ac_node *n)
+{
+ struct bfs_list *new;
+
+
+ new = (struct bfs_list *) cli_malloc(sizeof(struct bfs_list));
+ if(!new) {
+ cli_errmsg("bfs_enqueue: Can't allocate memory for bfs_list\n");
+ return CL_EMEM;
+ }
+ new->next = NULL;
+ new->node = n;
+
+ if(*last) {
+ (*last)->next = new;
+ *last = new;
+ } else {
+ *bfs = *last = new;
+ }
+
+ return CL_SUCCESS;
+}
+
+static struct cli_ac_node *bfs_dequeue(struct bfs_list **bfs, struct bfs_list **last)
+{
+ struct bfs_list *lpt;
+ struct cli_ac_node *pt;
+
+
+ if(!(lpt = *bfs)) {
+ return NULL;
+ } else {
+ *bfs = (*bfs)->next;
+ pt = lpt->node;
+ if(lpt == *last)
+ *last = NULL;
+ free(lpt);
+ return pt;
+ }
+}
+
+static int ac_maketrans(struct cli_matcher *root)
+{
+ struct bfs_list *bfs = NULL, *bfs_last = NULL;
+ struct cli_ac_node *ac_root = root->ac_root, *child, *node, *fail;
+ struct cli_ac_patt *patt;
+ int i, ret;
+
+
+ for(i = 0; i < 256; i++) {
+ node = ac_root->trans[i];
+ if(!node) {
+ ac_root->trans[i] = ac_root;
+ } else {
+ node->fail = ac_root;
+ if((ret = bfs_enqueue(&bfs, &bfs_last, node)))
+ return ret;
+ }
+ }
+
+ while((node = bfs_dequeue(&bfs, &bfs_last))) {
+ if(node->leaf)
+ continue;
+
+ for(i = 0; i < 256; i++) {
+ child = node->trans[i];
+ if(child) {
+ fail = node->fail;
+ while(fail->leaf || !fail->trans[i])
+ fail = fail->fail;
+
+ child->fail = fail->trans[i];
+
+ if(child->list) {
+ patt = child->list;
+ while(patt->next)
+ patt = patt->next;
+
+ patt->next = child->fail->list;
+ } else {
+ child->list = child->fail->list;
+ }
+
+ if(child->list)
+ child->final = 1;
+
+ if((ret = bfs_enqueue(&bfs, &bfs_last, child)) != 0)
+ return ret;
+ }
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
+int cli_ac_buildtrie(struct cli_matcher *root)
+{
+ if(!root)
+ return CL_EMALFDB;
+
+ if(!root->ac_root) {
+ cli_dbgmsg("cli_ac_buildtrie: AC pattern matcher is not initialised\n");
+ return CL_SUCCESS;
+ }
+
+ return ac_maketrans(root);
+}
+
+int cli_ac_init(struct cli_matcher *root, uint8_t mindepth, uint8_t maxdepth)
+{
+
+ root->ac_root = (struct cli_ac_node *) cli_calloc(1, sizeof(struct cli_ac_node));
+ if(!root->ac_root) {
+ cli_errmsg("cli_ac_init: Can't allocate memory for ac_root\n");
+ return CL_EMEM;
+ }
+
+ root->ac_root->trans = (struct cli_ac_node **) cli_calloc(256, sizeof(struct cli_ac_node *));
+ if(!root->ac_root->trans) {
+ cli_errmsg("cli_ac_init: Can't allocate memory for ac_root->trans\n");
+ free(root->ac_root);
+ return CL_EMEM;
+ }
+
+ root->ac_mindepth = mindepth;
+ root->ac_maxdepth = maxdepth;
+
+ return CL_SUCCESS;
+}
+
+static void ac_free_alt(struct cli_ac_patt *p)
+{
+ uint16_t i;
+ struct cli_ac_alt *a1, *a2;
+
+
+ if(!p->alt)
+ return;
+
+ for(i = 0; i < p->alt; i++) {
+ a1 = p->alttable[i];
+ while(a1) {
+ a2 = a1;
+ a1 = a1->next;
+ if(a2->str)
+ free(a2->str);
+ free(a2);
+ }
+ }
+ free(p->alttable);
+}
+
+void cli_ac_free(struct cli_matcher *root)
+{
+ uint32_t i;
+ struct cli_ac_patt *patt;
+
+
+ for(i = 0; i < root->ac_patterns; i++) {
+ patt = root->ac_pattable[i];
+
+ if(patt->prefix)
+ free(patt->prefix);
+ else
+ free(patt->pattern);
+ free(patt->virname);
+ if(patt->offset)
+ free(patt->offset);
+ if(patt->alt)
+ ac_free_alt(patt);
+ free(patt);
+ }
+ if(root->ac_pattable)
+ free(root->ac_pattable);
+
+ for(i = 0; i < root->ac_nodes; i++) {
+ if(!root->ac_nodetable[i]->leaf)
+ free(root->ac_nodetable[i]->trans);
+ free(root->ac_nodetable[i]);
+ }
+
+ if(root->ac_nodetable)
+ free(root->ac_nodetable);
+
+ if(root->ac_root) {
+ free(root->ac_root->trans);
+ free(root->ac_root);
+ }
+}
+
+/*
+ * FIXME: the current support for string alternatives uses a brute-force
+ * approach and doesn't perform any kind of verification and
+ * backtracking. This may easily lead to false negatives, eg. when
+ * an alternative contains strings of different lengths and
+ * more than one of them can match at the current position.
+ */
+
+#define AC_MATCH_CHAR(p,b) \
+ switch(wc = p & CLI_MATCH_WILDCARD) { \
+ case CLI_MATCH_CHAR: \
+ if((unsigned char) p != b) \
+ return 0; \
+ break; \
+ \
+ case CLI_MATCH_IGNORE: \
+ break; \
+ \
+ case CLI_MATCH_ALTERNATIVE: \
+ found = 0; \
+ alt = pattern->alttable[altcnt]; \
+ if(alt->chmode) { \
+ for(j = 0; j < alt->num; j++) { \
+ if(alt->str[j] == b) { \
+ found = 1; \
+ break; \
+ } \
+ } \
+ } else { \
+ while(alt) { \
+ if(bp + alt->len <= length) { \
+ if(!memcmp(&buffer[bp], alt->str, alt->len)) { \
+ found = 1; \
+ bp += alt->len - 1; \
+ break; \
+ } \
+ } \
+ alt = alt->next; \
+ } \
+ } \
+ if(!found) \
+ return 0; \
+ altcnt++; \
+ break; \
+ \
+ case CLI_MATCH_NIBBLE_HIGH: \
+ if((unsigned char) (p & 0x00f0) != (b & 0xf0)) \
+ return 0; \
+ break; \
+ \
+ case CLI_MATCH_NIBBLE_LOW: \
+ if((unsigned char) (p & 0x000f) != (b & 0x0f)) \
+ return 0; \
+ break; \
+ \
+ default: \
+ cli_errmsg("ac_findmatch: Unknown wildcard 0x%x\n", wc); \
+ return 0; \
+ }
+
+inline static int ac_findmatch(const unsigned char *buffer, uint32_t offset, uint32_t length, const struct cli_ac_patt *pattern, uint32_t *end)
+{
+ uint32_t bp;
+ uint16_t wc, i, j, altcnt = pattern->alt_pattern;
+ uint8_t found;
+ struct cli_ac_alt *alt;
+
+
+ if((offset + pattern->length > length) || (pattern->prefix_length > offset))
+ return 0;
+
+ bp = offset + pattern->depth;
+
+ for(i = pattern->depth; i < pattern->length && bp < length; i++) {
+ AC_MATCH_CHAR(pattern->pattern[i],buffer[bp]);
+ bp++;
+ }
+ *end = bp;
+
+ if(pattern->prefix) {
+ altcnt = 0;
+ bp = offset - pattern->prefix_length;
+
+ for(i = 0; i < pattern->prefix_length; i++) {
+ AC_MATCH_CHAR(pattern->prefix[i],buffer[bp]);
+ bp++;
+ }
+ }
+
+ return 1;
+}
+
+int cli_ac_initdata(struct cli_ac_data *data, uint32_t partsigs, uint8_t tracklen)
+{
+
+ if(!data) {
+ cli_errmsg("cli_ac_init: data == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ data->partsigs = partsigs;
+
+ if(!partsigs)
+ return CL_SUCCESS;
+
+ data->offmatrix = (int32_t ***) cli_calloc(partsigs, sizeof(int32_t **));
+ if(!data->offmatrix) {
+ cli_errmsg("cli_ac_init: Can't allocate memory for data->offmatrix\n");
+ return CL_EMEM;
+ }
+
+ return CL_SUCCESS;
+}
+
+void cli_ac_freedata(struct cli_ac_data *data)
+{
+ uint32_t i;
+
+
+ if(data && data->partsigs) {
+ for(i = 0; i < data->partsigs; i++) {
+ if(data->offmatrix[i]) {
+ free(data->offmatrix[i][0]);
+ free(data->offmatrix[i]);
+ }
+ }
+ free(data->offmatrix);
+ }
+}
+
+inline static int ac_addtype(struct cli_matched_type **list, cli_file_t type, off_t offset)
+{
+ struct cli_matched_type *tnode, *tnode_last;
+
+
+ if(*list && (*list)->cnt >= MAX_EMBEDDED_OBJ)
+ return CL_SUCCESS;
+
+ if(!(tnode = cli_calloc(1, sizeof(struct cli_matched_type)))) {
+ cli_errmsg("cli_ac_addtype: Can't allocate memory for new type node\n");
+ return CL_EMEM;
+ }
+
+ tnode->type = type;
+ tnode->offset = offset;
+
+ tnode_last = *list;
+ while(tnode_last && tnode_last->next)
+ tnode_last = tnode_last->next;
+
+ if(tnode_last)
+ tnode_last->next = tnode;
+ else
+ *list = tnode;
+
+ (*list)->cnt++;
+ return CL_SUCCESS;
+}
+
+int cli_ac_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, struct cli_ac_data *mdata, uint8_t otfrec, uint32_t offset, cli_file_t ftype, int fd, struct cli_matched_type **ftoffset)
+{
+ struct cli_ac_node *current;
+ struct cli_ac_patt *patt, *pt;
+ uint32_t i, bp, realoff, matchend;
+ uint16_t j;
+ int32_t **offmatrix;
+ uint8_t found;
+ struct cli_target_info info;
+ int type = CL_CLEAN;
+
+
+ if(!root->ac_root)
+ return CL_CLEAN;
+
+ if(!mdata) {
+ cli_errmsg("cli_ac_scanbuff: mdata == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ memset(&info, 0, sizeof(info));
+ current = root->ac_root;
+
+ for(i = 0; i < length; i++) {
+
+ while(current->leaf || !current->trans[buffer[i]])
+ current = current->fail;
+
+ current = current->trans[buffer[i]];
+
+ if(current->final) {
+ patt = current->list;
+ while(patt) {
+ bp = i + 1 - patt->depth;
+ if(ac_findmatch(buffer, bp, length, patt, &matchend)) {
+ pt = patt;
+ while(pt) {
+ realoff = offset + bp - pt->prefix_length;
+
+ if((pt->offset || pt->target) && (!pt->sigid || pt->partno == 1)) {
+ if((fd == -1 && !ftype) || !cli_validatesig(ftype, pt->offset, realoff, &info, fd, pt->virname)) {
+ pt = pt->next_same;
+ continue;
+ }
+ }
+
+ if(pt->sigid) { /* it's a partial signature */
+
+ if(pt->partno != 1 && (!mdata->offmatrix[pt->sigid - 1] || !mdata->offmatrix[pt->sigid - 1][pt->partno - 2][0])) {
+ pt = pt->next_same;
+ continue;
+ }
+
+ if(!mdata->offmatrix[pt->sigid - 1]) {
+ mdata->offmatrix[pt->sigid - 1] = cli_malloc(pt->parts * sizeof(int32_t *));
+ if(!mdata->offmatrix[pt->sigid - 1]) {
+ cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u]\n", pt->sigid - 1);
+ return CL_EMEM;
+ }
+
+ mdata->offmatrix[pt->sigid - 1][0] = cli_malloc(pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+ if(!mdata->offmatrix[pt->sigid - 1][0]) {
+ cli_errmsg("cli_ac_scanbuff: Can't allocate memory for mdata->offmatrix[%u][0]\n", pt->sigid - 1);
+ free(mdata->offmatrix[pt->sigid - 1]);
+ mdata->offmatrix[pt->sigid - 1] = NULL;
+ return CL_EMEM;
+ }
+ memset(mdata->offmatrix[pt->sigid - 1][0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+ mdata->offmatrix[pt->sigid - 1][0][0] = 0;
+ for(j = 1; j < pt->parts; j++) {
+ mdata->offmatrix[pt->sigid - 1][j] = mdata->offmatrix[pt->sigid - 1][0] + j * (AC_DEFAULT_TRACKLEN + 1);
+ mdata->offmatrix[pt->sigid - 1][j][0] = 0;
+ }
+ }
+ offmatrix = mdata->offmatrix[pt->sigid - 1];
+
+ if(pt->partno != 1) {
+ found = 0;
+ for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[pt->partno - 2][j] != -1; j++) {
+ found = 1;
+ if(pt->maxdist)
+ if(realoff - offmatrix[pt->partno - 2][j] > pt->maxdist)
+ found = 0;
+
+ if(found && pt->mindist)
+ if(realoff - offmatrix[pt->partno - 2][j] < pt->mindist)
+ found = 0;
+
+ if(found)
+ break;
+ }
+ }
+
+ if(pt->partno == 1 || (found && (pt->partno != pt->parts))) {
+ offmatrix[pt->partno - 1][0] %= AC_DEFAULT_TRACKLEN;
+ offmatrix[pt->partno - 1][0]++;
+ offmatrix[pt->partno - 1][offmatrix[pt->partno - 1][0]] = offset + matchend;
+
+ if(pt->partno == 1) /* save realoff for the first part */
+ offmatrix[pt->parts - 1][offmatrix[pt->partno - 1][0]] = realoff;
+ } else if(found && pt->partno == pt->parts) {
+ if(pt->type) {
+ if(otfrec) {
+ if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
+ cli_dbgmsg("Matched signature for file type %s\n", pt->virname);
+ type = pt->type;
+ if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) {
+ /* FIXME: we don't know which offset of the first part is the correct one */
+ for(j = 1; j <= AC_DEFAULT_TRACKLEN && offmatrix[0][j] != -1; j++) {
+ if(ac_addtype(ftoffset, type, offmatrix[pt->parts - 1][j])) {
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+ return CL_EMEM;
+ }
+ }
+ }
+
+ memset(offmatrix[0], -1, pt->parts * (AC_DEFAULT_TRACKLEN + 1) * sizeof(int32_t));
+ for(j = 0; j < pt->parts; j++)
+ offmatrix[j][0] = 0;
+ }
+ }
+
+ } else { /* !pt->type */
+ if(virname)
+ *virname = pt->virname;
+
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+
+ return CL_VIRUS;
+ }
+ }
+
+ } else { /* old type signature */
+ if(pt->type) {
+ if(otfrec) {
+ if(pt->type > type || pt->type >= CL_TYPE_SFX || pt->type == CL_TYPE_MSEXE) {
+ cli_dbgmsg("Matched signature for file type %s at %u\n", pt->virname, realoff);
+ type = pt->type;
+ if(ftoffset && (!*ftoffset || (*ftoffset)->cnt < MAX_EMBEDDED_OBJ) && ((ftype == CL_TYPE_MSEXE && type >= CL_TYPE_SFX) || ((ftype == CL_TYPE_MSEXE || ftype == CL_TYPE_ZIP) && type == CL_TYPE_MSEXE))) {
+
+ if(ac_addtype(ftoffset, type, realoff)) {
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+ return CL_EMEM;
+ }
+ }
+ }
+ }
+ } else {
+ if(virname)
+ *virname = pt->virname;
+
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+ return CL_VIRUS;
+ }
+ }
+ pt = pt->next_same;
+ }
+ }
+ patt = patt->next;
+ }
+ }
+ }
+
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+
+ return otfrec ? type : CL_CLEAN;
+}
+
+/* FIXME: clean up the code */
+int cli_ac_addsig(struct cli_matcher *root, const char *virname, const char *hexsig, uint32_t sigid, uint16_t parts, uint16_t partno, uint16_t type, uint32_t mindist, uint32_t maxdist, const char *offset, uint8_t target)
+{
+ struct cli_ac_patt *new;
+ char *pt, *hex = NULL;
+ uint16_t i, j, ppos = 0, pend;
+ uint8_t wprefix = 0, zprefix = 1, namelen, plen = 0;
+ struct cli_ac_alt *newalt, *altpt, **newtable;
+ int ret, error = CL_SUCCESS;
+
+
+ if(strlen(hexsig) / 2 < root->ac_mindepth)
+ return CL_EPATSHORT;
+
+ if((new = (struct cli_ac_patt *) cli_calloc(1, sizeof(struct cli_ac_patt))) == NULL)
+ return CL_EMEM;
+
+ new->type = type;
+ new->sigid = sigid;
+ new->parts = parts;
+ new->partno = partno;
+ new->mindist = mindist;
+ new->maxdist = maxdist;
+ new->target = target;
+
+ if(strchr(hexsig, '(')) {
+ char *hexcpy, *hexnew, *start, *h, *c;
+
+ if(!(hexcpy = cli_strdup(hexsig))) {
+ free(new);
+ return CL_EMEM;
+ }
+
+ if(!(hexnew = (char *) cli_calloc(strlen(hexsig) + 1, 1))) {
+ free(hexcpy);
+ free(new);
+ return CL_EMEM;
+ }
+
+ start = pt = hexcpy;
+ while((pt = strchr(start, '('))) {
+ *pt++ = 0;
+
+ if(!start) {
+ error = CL_EMALFDB;
+ break;
+ }
+
+ strcat(hexnew, start);
+ strcat(hexnew, "()");
+
+ if(!(start = strchr(pt, ')'))) {
+ error = CL_EMALFDB;
+ break;
+ }
+ *start++ = 0;
+
+ newalt = (struct cli_ac_alt *) cli_calloc(1, sizeof(struct cli_ac_alt));
+ if(!newalt) {
+ cli_errmsg("cli_ac_addsig: Can't allocate newalt\n");
+ error = CL_EMEM;
+ break;
+ }
+
+ new->alt++;
+ newtable = (struct cli_ac_alt **) cli_realloc(new->alttable, new->alt * sizeof(struct cli_ac_alt *));
+ if(!newtable) {
+ new->alt--;
+ free(newalt);
+ cli_errmsg("cli_ac_addsig: Can't realloc new->alttable\n");
+ error = CL_EMEM;
+ break;
+ }
+ newtable[new->alt - 1] = newalt;
+ new->alttable = newtable;
+
+ for(i = 0; i < strlen(pt); i++)
+ if(pt[i] == '|')
+ newalt->num++;
+
+ if(!newalt->num) {
+ error = CL_EMALFDB;
+ break;
+ } else
+ newalt->num++;
+
+ if(3 * newalt->num - 1 == (uint16_t) strlen(pt)) {
+ newalt->chmode = 1;
+ newalt->str = (unsigned char *) cli_malloc(newalt->num);
+ if(!newalt->str) {
+ cli_errmsg("cli_ac_addsig: Can't allocate newalt->str\n");
+ error = CL_EMEM;
+ break;
+ }
+ }
+
+ for(i = 0; i < newalt->num; i++) {
+ if(!(h = cli_strtok(pt, i, "|"))) {
+ error = CL_EMALFDB;
+ break;
+ }
+
+ if(!(c = cli_hex2str(h))) {
+ free(h);
+ error = CL_EMALFDB;
+ break;
+ }
+
+ if(newalt->chmode) {
+ newalt->str[i] = *c;
+ free(c);
+ } else {
+ if(i) {
+ altpt = newalt;
+ while(altpt->next)
+ altpt = altpt->next;
+
+ altpt->next = (struct cli_ac_alt *) cli_calloc(1, sizeof(struct cli_ac_alt));
+ if(!altpt->next) {
+ cli_errmsg("cli_ac_addsig: Can't allocate altpt->next\n");
+ error = CL_EMEM;
+ free(c);
+ free(h);
+ break;
+ }
+
+ altpt->next->str = (unsigned char *) c;
+ altpt->next->len = strlen(h) / 2;
+ } else {
+ newalt->str = (unsigned char *) c;
+ newalt->len = strlen(h) / 2;
+ }
+ }
+
+ free(h);
+ }
+
+ if(error)
+ break;
+ }
+
+ if(start)
+ strcat(hexnew, start);
+
+ hex = hexnew;
+ free(hexcpy);
+
+ if(error) {
+ if(new->alt) {
+ free(hex);
+ ac_free_alt(new);
+ }
+ free(new);
+ return error;
+ }
+ }
+
+ if((new->pattern = cli_hex2ui(new->alt ? hex : hexsig)) == NULL) {
+ if(new->alt) {
+ free(hex);
+ ac_free_alt(new);
+ }
+ free(new);
+ return CL_EMALFDB;
+ }
+ new->length = strlen(new->alt ? hex : hexsig) / 2;
+ if(new->alt)
+ free(hex);
+
+ for(i = 0; i < root->ac_maxdepth && i < new->length; i++) {
+ if(new->pattern[i] & CLI_MATCH_WILDCARD) {
+ wprefix = 1;
+ break;
+ }
+ if(zprefix && new->pattern[i])
+ zprefix = 0;
+ }
+
+ if(wprefix || zprefix) {
+ pend = new->length - root->ac_mindepth + 1;
+ for(i = 0; i < pend; i++) {
+ for(j = i; j < i + root->ac_maxdepth && j < new->length; j++) {
+ if(new->pattern[j] & CLI_MATCH_WILDCARD) {
+ break;
+ } else {
+ if(j - i + 1 >= plen) {
+ plen = j - i + 1;
+ ppos = i;
+ }
+ }
+ if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
+ break;
+ }
+ if(plen >= root->ac_maxdepth && (new->pattern[ppos] || new->pattern[ppos + 1]))
+ break;
+ }
+
+ if(plen < root->ac_mindepth) {
+ cli_errmsg("cli_ac_addsig: Can't find a static subpattern of length %u\n", root->ac_mindepth);
+ ac_free_alt(new);
+ free(new->pattern);
+ free(new);
+ return CL_EMALFDB;
+ }
+
+ new->prefix = new->pattern;
+ new->prefix_length = ppos;
+ new->pattern = &new->prefix[ppos];
+ new->length -= ppos;
+
+ for(i = 0; i < new->prefix_length; i++)
+ if((new->prefix[i] & CLI_MATCH_WILDCARD) == CLI_MATCH_ALTERNATIVE)
+ new->alt_pattern++;
+ }
+
+ if(new->length > root->maxpatlen)
+ root->maxpatlen = new->length;
+
+ if((pt = strstr(virname, " (Clam)")))
+ namelen = strlen(virname) - strlen(pt);
+ else
+ namelen = strlen(virname);
+
+ if(!namelen) {
+ cli_errmsg("cli_ac_addsig: No virus name\n");
+ if(new->prefix)
+ free(new->prefix);
+ else
+ free(new->pattern);
+ ac_free_alt(new);
+ free(new);
+ return CL_EMALFDB;
+ }
+
+ if((new->virname = cli_calloc(namelen + 1, sizeof(char))) == NULL) {
+ if(new->prefix)
+ free(new->prefix);
+ else
+ free(new->pattern);
+ ac_free_alt(new);
+ free(new);
+ return CL_EMEM;
+ }
+ strncpy(new->virname, virname, namelen);
+
+ if(offset) {
+ new->offset = cli_strdup(offset);
+ if(!new->offset) {
+ if(new->prefix)
+ free(new->prefix);
+ else
+ free(new->pattern);
+ ac_free_alt(new);
+ free(new->virname);
+ free(new);
+ return CL_EMEM;
+ }
+ }
+
+ if((ret = cli_ac_addpatt(root, new))) {
+ if(new->prefix)
+ free(new->prefix);
+ else
+ free(new->pattern);
+ free(new->virname);
+ ac_free_alt(new);
+ if(new->offset)
+ free(new->offset);
+ free(new);
+ return ret;
+ }
+
+ return CL_SUCCESS;
+}
+
+void cli_ac_setdepth(uint8_t mindepth, uint8_t maxdepth)
+{
+ cli_ac_mindepth = mindepth;
+ cli_ac_maxdepth = maxdepth;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher-bm.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2004 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+
+#include "clamav.h"
+#include "memory.h"
+#include "others.h"
+#include "cltypes.h"
+#include "matcher.h"
+#include "matcher-bm.h"
+#include "filetypes.h"
+
+#define BM_MIN_LENGTH 3
+#define BM_BLOCK_SIZE 3
+#define HASH(a,b,c) (211 * a + 37 * b + c)
+
+int cli_bm_addpatt(struct cli_matcher *root, struct cli_bm_patt *pattern)
+{
+ uint16_t idx, i;
+ const unsigned char *pt = pattern->pattern;
+ struct cli_bm_patt *prev, *next = NULL;
+
+
+ if(pattern->length < BM_MIN_LENGTH) {
+ cli_errmsg("Signature for %s is too short\n", pattern->virname);
+ return CL_EPATSHORT;
+ }
+
+#if BM_MIN_LENGTH == BM_BLOCK_SIZE
+ /* try to load balance bm_suffix (at the cost of bm_shift) */
+ for(i = 0; i < pattern->length - BM_BLOCK_SIZE + 1; i++) {
+ idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
+ if(!root->bm_suffix[idx]) {
+ if(i) {
+ pattern->prefix = pattern->pattern;
+ pattern->prefix_length = i;
+ pattern->pattern = &pattern->pattern[i];
+ pattern->length -= i;
+ pt = pattern->pattern;
+ }
+ break;
+ }
+ }
+#endif
+
+ for(i = 0; i <= BM_MIN_LENGTH - BM_BLOCK_SIZE; i++) {
+ idx = HASH(pt[i], pt[i + 1], pt[i + 2]);
+ root->bm_shift[idx] = MIN(root->bm_shift[idx], BM_MIN_LENGTH - BM_BLOCK_SIZE - i);
+ }
+
+ prev = next = root->bm_suffix[idx];
+ while(next) {
+ if(pt[0] >= next->pattern[0])
+ break;
+ prev = next;
+ next = next->next;
+ }
+
+ if(next == root->bm_suffix[idx]) {
+ pattern->next = root->bm_suffix[idx];
+ if(root->bm_suffix[idx])
+ pattern->cnt = root->bm_suffix[idx]->cnt;
+ root->bm_suffix[idx] = pattern;
+ } else {
+ pattern->next = prev->next;
+ prev->next = pattern;
+ }
+ root->bm_suffix[idx]->cnt++;
+
+ return CL_SUCCESS;
+}
+
+int cli_bm_init(struct cli_matcher *root)
+{
+ uint16_t i, size = HASH(255, 255, 255) + 1;
+
+
+ if(!(root->bm_shift = (uint8_t *) cli_malloc(size * sizeof(uint8_t))))
+ return CL_EMEM;
+
+ if(!(root->bm_suffix = (struct cli_bm_patt **) cli_calloc(size, sizeof(struct cli_bm_patt *)))) {
+ free(root->bm_shift);
+ return CL_EMEM;
+ }
+
+ for(i = 0; i < size; i++)
+ root->bm_shift[i] = BM_MIN_LENGTH - BM_BLOCK_SIZE + 1;
+
+ return CL_SUCCESS;
+}
+
+void cli_bm_free(struct cli_matcher *root)
+{
+ struct cli_bm_patt *patt, *prev;
+ uint16_t i, size = HASH(255, 255, 255) + 1;
+
+
+ if(root->bm_shift)
+ free(root->bm_shift);
+
+ if(root->bm_suffix) {
+ for(i = 0; i < size; i++) {
+ patt = root->bm_suffix[i];
+ while(patt) {
+ prev = patt;
+ patt = patt->next;
+ if(prev->prefix)
+ free(prev->prefix);
+ else
+ free(prev->pattern);
+ if(prev->virname)
+ free(prev->virname);
+ if(prev->offset)
+ free(prev->offset);
+ free(prev);
+ }
+ }
+ free(root->bm_suffix);
+ }
+}
+
+int cli_bm_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cli_matcher *root, uint32_t offset, cli_file_t ftype, int fd)
+{
+ uint32_t i, j, off;
+ uint8_t found, pchain, shift;
+ uint16_t idx, idxchk;
+ struct cli_bm_patt *p;
+ const unsigned char *bp, *pt;
+ unsigned char prefix;
+ struct cli_target_info info;
+
+
+ if(!root->bm_shift)
+ return CL_CLEAN;
+
+ if(length < BM_MIN_LENGTH)
+ return CL_CLEAN;
+
+ memset(&info, 0, sizeof(info));
+
+ for(i = BM_MIN_LENGTH - BM_BLOCK_SIZE; i < length - BM_BLOCK_SIZE + 1; ) {
+ idx = HASH(buffer[i], buffer[i + 1], buffer[i + 2]);
+ shift = root->bm_shift[idx];
+
+ if(shift == 0) {
+ prefix = buffer[i - BM_MIN_LENGTH + BM_BLOCK_SIZE];
+ p = root->bm_suffix[idx];
+ pchain = 0;
+ while(p) {
+ if(p->pattern[0] != prefix) {
+ if(pchain)
+ break;
+ p = p->next;
+ continue;
+ } else pchain = 1;
+
+ off = i - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+ bp = buffer + off;
+
+ if((off + p->length > length) || (p->prefix_length > off)) {
+ p = p->next;
+ continue;
+ }
+
+ idxchk = MIN(p->length, length - off) - 1;
+ if(idxchk) {
+ if((bp[idxchk] != p->pattern[idxchk]) || (bp[idxchk / 2] != p->pattern[idxchk / 2])) {
+ p = p->next;
+ continue;
+ }
+ }
+
+ if(p->prefix_length) {
+ off -= p->prefix_length;
+ bp -= p->prefix_length;
+ pt = p->prefix;
+ } else {
+ pt = p->pattern;
+ }
+
+ found = 1;
+ for(j = 0; j < p->length + p->prefix_length && off < length; j++, off++) {
+ if(bp[j] != pt[j]) {
+ found = 0;
+ break;
+ }
+ }
+
+ if(found && p->length + p->prefix_length == j) {
+
+ if(p->target || p->offset) {
+ off = offset + i - p->prefix_length - BM_MIN_LENGTH + BM_BLOCK_SIZE;
+ if((fd == -1 && !ftype) || !cli_validatesig(ftype, p->offset, off, &info, fd, p->virname)) {
+ p = p->next;
+ continue;
+ }
+ }
+
+ if(virname)
+ *virname = p->virname;
+
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+
+ return CL_VIRUS;
+ }
+
+ p = p->next;
+ }
+
+ shift = 1;
+ }
+
+ i += shift;
+ }
+
+ if(info.exeinfo.section)
+ free(info.exeinfo.section);
+
+ return CL_CLEAN;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_matcher.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "matcher-ac.h"
+#include "matcher-bm.h"
+#include "md5.h"
+#include "filetypes.h"
+#include "matcher.h"
+#include "pe.h"
+#include "elf.h"
+#include "execs.h"
+#include "special.h"
+#include "str.h"
+#include "cltypes.h"
+
+static cli_file_t targettab[CL_TARGET_TABLE_SIZE] = { 0, CL_TYPE_MSEXE, CL_TYPE_MSOLE2, CL_TYPE_HTML, CL_TYPE_MAIL, CL_TYPE_GRAPHICS, CL_TYPE_ELF };
+
+int cli_scanbuff(const unsigned char *buffer, uint32_t length, const char **virname, const struct cl_engine *engine, cli_file_t ftype)
+{
+ int ret = CL_CLEAN;
+ unsigned int i;
+ struct cli_ac_data mdata;
+ struct cli_matcher *groot, *troot = NULL;
+
+
+ if(!engine) {
+ cli_errmsg("cli_scanbuff: engine == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ groot = engine->root[0]; /* generic signatures */
+
+ if(ftype) {
+ for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
+ if(targettab[i] == ftype) {
+ troot = engine->root[i];
+ break;
+ }
+ }
+ }
+
+ if(troot) {
+
+ if((ret = cli_ac_initdata(&mdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+ return ret;
+
+ if(troot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, troot, 0, ftype, -1)) != CL_VIRUS)
+ ret = cli_ac_scanbuff(buffer, length, virname, troot, &mdata, 0, 0, ftype, -1, NULL);
+
+ cli_ac_freedata(&mdata);
+
+ if(ret == CL_VIRUS)
+ return ret;
+ }
+
+ if((ret = cli_ac_initdata(&mdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+ return ret;
+
+ if(groot->ac_only || (ret = cli_bm_scanbuff(buffer, length, virname, groot, 0, ftype, -1)) != CL_VIRUS)
+ ret = cli_ac_scanbuff(buffer, length, virname, groot, &mdata, 0, 0, ftype, -1, NULL);
+
+ cli_ac_freedata(&mdata);
+
+ return ret;
+}
+
+struct cli_md5_node *cli_vermd5(const unsigned char *md5, const struct cl_engine *engine)
+{
+ struct cli_md5_node *pt;
+
+
+ if(!(pt = engine->md5_hlist[md5[0] & 0xff]))
+ return NULL;
+
+ while(pt) {
+ if(!memcmp(pt->md5, md5, 16))
+ return pt;
+
+ pt = pt->next;
+ }
+
+ return NULL;
+}
+
+off_t cli_caloff(const char *offstr, struct cli_target_info *info, int fd, cli_file_t ftype, int *ret, unsigned int *maxshift)
+{
+ int (*einfo)(int, struct cli_exe_info *) = NULL;
+ unsigned int n, val;
+ const char *pt;
+ off_t pos, offset;
+
+
+ *ret = 0;
+
+ if(!strncmp(offstr, "EP", 2) || offstr[0] == 'S') {
+
+ if(info->status == -1) {
+ *ret = -1;
+ return 0;
+
+ } else if(!info->status) {
+
+ if(ftype == CL_TYPE_MSEXE)
+ einfo = cli_peheader;
+ else if(ftype == CL_TYPE_ELF)
+ einfo = cli_elfheader;
+
+ if(einfo) {
+ if((pos = lseek(fd, 0, SEEK_CUR)) == -1) {
+ cli_dbgmsg("Invalid descriptor\n");
+ info->status = *ret = -1;
+ return 0;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ if(einfo(fd, &info->exeinfo)) {
+ lseek(fd, pos, SEEK_SET);
+ info->status = *ret = -1;
+ return 0;
+ }
+ lseek(fd, pos, SEEK_SET);
+ info->status = 1;
+ }
+ }
+ }
+
+ if((pt = strchr(offstr, ',')))
+ *maxshift = atoi(++pt);
+
+ if(isdigit(offstr[0])) {
+ return atoi(offstr);
+
+ } else if(info->status == 1 && (!strncmp(offstr, "EP+", 3) || !strncmp(offstr, "EP-", 3))) {
+
+ if(offstr[2] == '+')
+ return info->exeinfo.ep + atoi(offstr + 3);
+ else
+ return info->exeinfo.ep - atoi(offstr + 3);
+
+ } else if(info->status == 1 && offstr[0] == 'S') {
+
+ if(!strncmp(offstr, "SL", 2) && info->exeinfo.section[info->exeinfo.nsections - 1].rsz) {
+
+ if(sscanf(offstr, "SL+%u", &val) != 1) {
+ *ret = -1;
+ return 0;
+ }
+
+ offset = val + info->exeinfo.section[info->exeinfo.nsections - 1].raw;
+
+ } else {
+
+ if(sscanf(offstr, "S%u+%u", &n, &val) != 2) {
+ *ret = -1;
+ return 0;
+ }
+
+ if(n >= info->exeinfo.nsections || !info->exeinfo.section[n].rsz) {
+ *ret = -1;
+ return 0;
+ }
+
+ offset = val + info->exeinfo.section[n].raw;
+ }
+
+ return offset;
+
+ } else if(!strncmp(offstr, "EOF-", 4)) {
+ struct stat sb;
+
+ if(!info->fsize) {
+ if(fstat(fd, &sb) == -1) {
+ info->status = *ret = -1;
+ return 0;
+ }
+ info->fsize = sb.st_size;
+ }
+
+ return info->fsize - atoi(offstr + 4);
+ }
+
+ *ret = -1;
+ return 0;
+}
+
+static int cli_checkfp(int fd, const struct cl_engine *engine)
+{
+ struct cli_md5_node *md5_node;
+ unsigned char *digest;
+
+
+ if(engine->md5_hlist) {
+
+ if(!(digest = cli_md5digest(fd))) {
+ cli_errmsg("cli_checkfp(): Can't generate MD5 checksum\n");
+ return 0;
+ }
+
+ if((md5_node = cli_vermd5(digest, engine)) && md5_node->fp) {
+ struct stat sb;
+
+ if(fstat(fd, &sb))
+ return CL_EIO;
+
+ if((unsigned int) sb.st_size != md5_node->size) {
+ cli_warnmsg("Detected false positive MD5 match. Please report.\n");
+ } else {
+ cli_dbgmsg("Eliminated false positive match (fp sig: %s)\n", md5_node->virname);
+ free(digest);
+ return 1;
+ }
+ }
+
+ free(digest);
+ }
+
+ return 0;
+}
+
+int cli_validatesig(cli_file_t ftype, const char *offstr, off_t fileoff, struct cli_target_info *info, int desc, const char *virname)
+{
+ off_t offset;
+ int ret;
+ unsigned int maxshift = 0;
+
+
+ if(offstr && desc != -1) {
+ offset = cli_caloff(offstr, info, desc, ftype, &ret, &maxshift);
+
+ if(ret == -1) {
+ cli_dbgmsg("cli_validatesig: Can't calculate offset for signature %s\n", virname);
+ return 0;
+ }
+
+ if(maxshift) {
+ if((fileoff < offset) || (fileoff > offset + (off_t) maxshift)) {
+ cli_dbgmsg("Signature offset: %lu, expected: [%lu..%lu] (%s)\n", fileoff, offset, offset + maxshift, virname);
+ return 0;
+ }
+ } else if(fileoff != offset) {
+ cli_dbgmsg("Signature offset: %lu, expected: %lu (%s)\n", fileoff, offset, virname);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int cli_scandesc(int desc, cli_ctx *ctx, uint8_t otfrec, cli_file_t ftype, uint8_t ftonly, struct cli_matched_type **ftoffset)
+{
+ unsigned char *buffer, *buff, *endbl, *upt;
+ int ret = CL_CLEAN, type = CL_CLEAN, i, bytes;
+ uint32_t buffersize, length, maxpatlen, shift = 0, offset = 0;
+ struct cli_ac_data gdata, tdata;
+ cli_md5_ctx md5ctx;
+ unsigned char digest[16];
+ struct cli_md5_node *md5_node;
+ struct cli_matcher *groot = NULL, *troot = NULL;
+
+
+ if(!ctx->engine) {
+ cli_errmsg("cli_scandesc: engine == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ if(!ftonly)
+ groot = ctx->engine->root[0]; /* generic signatures */
+
+ if(ftype) {
+ for(i = 1; i < CL_TARGET_TABLE_SIZE; i++) {
+ if(targettab[i] == ftype) {
+ troot = ctx->engine->root[i];
+ break;
+ }
+ }
+ }
+
+ if(ftonly) {
+ if(!troot)
+ return CL_CLEAN;
+
+ maxpatlen = troot->maxpatlen;
+ } else {
+ if(troot)
+ maxpatlen = MAX(troot->maxpatlen, groot->maxpatlen);
+ else
+ maxpatlen = groot->maxpatlen;
+ }
+
+ /* prepare the buffer */
+ buffersize = maxpatlen + SCANBUFF;
+ if(!(buffer = (unsigned char *) cli_calloc(buffersize, sizeof(unsigned char)))) {
+ cli_dbgmsg("cli_scandesc(): unable to cli_calloc(%u)\n", buffersize);
+ return CL_EMEM;
+ }
+
+ if(!ftonly && (ret = cli_ac_initdata(&gdata, groot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+ return ret;
+
+ if(troot) {
+ if((ret = cli_ac_initdata(&tdata, troot->ac_partsigs, AC_DEFAULT_TRACKLEN)))
+ return ret;
+ }
+
+ if(!ftonly && ctx->engine->md5_hlist)
+ cli_md5_init(&md5ctx);
+
+ buff = buffer;
+ buff += maxpatlen; /* pointer to read data block */
+ endbl = buff + SCANBUFF - maxpatlen; /* pointer to the last block
+ * length of maxpatlen
+ */
+
+ upt = buff;
+ while((bytes = cli_readn(desc, buff + shift, SCANBUFF - shift)) > 0) {
+
+ if(ctx->scanned)
+ *ctx->scanned += bytes / CL_COUNT_PRECISION;
+
+ length = shift + bytes;
+ if(upt == buffer)
+ length += maxpatlen;
+
+ if(troot) {
+ if(troot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, troot, offset, ftype, desc)) != CL_VIRUS)
+ ret = cli_ac_scanbuff(upt, length, ctx->virname, troot, &tdata, otfrec, offset, ftype, desc, ftoffset);
+
+ if(ret == CL_VIRUS) {
+ free(buffer);
+ if(!ftonly)
+ cli_ac_freedata(&gdata);
+ cli_ac_freedata(&tdata);
+
+ lseek(desc, 0, SEEK_SET);
+ if(cli_checkfp(desc, ctx->engine))
+ return CL_CLEAN;
+ else
+ return CL_VIRUS;
+ }
+ }
+
+ if(!ftonly) {
+ if(groot->ac_only || (ret = cli_bm_scanbuff(upt, length, ctx->virname, groot, offset, ftype, desc)) != CL_VIRUS)
+ ret = cli_ac_scanbuff(upt, length, ctx->virname, groot, &gdata, otfrec, offset, ftype, desc, ftoffset);
+
+ if(ret == CL_VIRUS) {
+ free(buffer);
+ cli_ac_freedata(&gdata);
+ if(troot)
+ cli_ac_freedata(&tdata);
+ lseek(desc, 0, SEEK_SET);
+ if(cli_checkfp(desc, ctx->engine))
+ return CL_CLEAN;
+ else
+ return CL_VIRUS;
+
+ } else if(otfrec && ret >= CL_TYPENO) {
+ if(ret > type)
+ type = ret;
+ }
+
+ if(ctx->engine->md5_hlist)
+ cli_md5_update(&md5ctx, buff + shift, bytes);
+ }
+
+ if(bytes + shift == SCANBUFF) {
+ memmove(buffer, endbl, maxpatlen);
+ offset += SCANBUFF;
+
+ if(upt == buff) {
+ upt = buffer;
+ offset -= maxpatlen;
+ }
+
+ shift = 0;
+
+ } else {
+ shift += bytes;
+ }
+
+ }
+
+ free(buffer);
+ if(!ftonly)
+ cli_ac_freedata(&gdata);
+ if(troot)
+ cli_ac_freedata(&tdata);
+
+ if(!ftonly && ctx->engine->md5_hlist) {
+ cli_md5_final(digest, &md5ctx);
+
+ if((md5_node = cli_vermd5(digest, ctx->engine)) && !md5_node->fp) {
+ struct stat sb;
+
+ if(fstat(desc, &sb))
+ return CL_EIO;
+
+ if((unsigned int) sb.st_size != md5_node->size) {
+ cli_warnmsg("Detected false positive MD5 match. Please report.\n");
+ } else {
+ if(ctx->virname)
+ *ctx->virname = md5_node->virname;
+
+ return CL_VIRUS;
+ }
+ }
+ }
+
+ return otfrec ? type : CL_CLEAN;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mbox.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,5220 @@
+/*
+ * Copyright (C) 2002-2006 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+static char const rcsid[] = "$Id: mbox.c,v 1.381 2007/02/15 12:26:44 njh Exp $";
+
+#ifdef _MSC_VER
+#include <winsock.h> /* only needed in CL_EXPERIMENTAL */
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG /* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT /* for Solaris 2.8 */
+#endif
+#endif
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include <ctype.h>
+#include <time.h>
+#include <fcntl.h>
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include "clamav.h"
+#ifndef C_WINDOWS
+#include <dirent.h>
+#endif
+#include <limits.h>
+#include <signal.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <stddef.h>
+#endif
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "others.h"
+#include "str.h"
+#include "filetypes.h"
+#include "mbox.h"
+#include "dconf.h"
+
+#define DCONF_PHISHING mctx->ctx->dconf->phishing
+
+#ifdef CL_DEBUG
+
+#if defined(C_LINUX) || defined(C_CYGWIN)
+#include <features.h>
+#endif
+
+#if __GLIBC__ == 2 && __GLIBC_MINOR__ >= 1
+#define HAVE_BACKTRACE
+#endif
+#endif
+
+#ifdef HAVE_BACKTRACE
+#include <execinfo.h>
+#include <syslog.h>
+
+static void sigsegv(int sig);
+static void print_trace(int use_syslog);
+
+/*#define SAVE_TMP /* Save the file being worked on in tmp */
+#endif
+
+#if defined(NO_STRTOK_R) || !defined(CL_THREAD_SAFE)
+#undef strtok_r
+#undef __strtok_r
+#define strtok_r(a,b,c) strtok(a,b)
+#endif
+
+#ifdef HAVE_STDBOOL_H
+#ifdef C_BEOS
+#include "SupportDefs.h"
+#else
+#include <stdbool.h>
+#endif
+#else
+#ifdef FALSE
+typedef unsigned char bool;
+#else
+typedef enum { FALSE = 0, TRUE = 1 } bool;
+#endif
+#endif
+
+typedef enum {
+ FAIL,
+ OK,
+ OK_ATTACHMENTS_NOT_SAVED,
+ VIRUS,
+ MAXREC,
+ MAXFILES
+} mbox_status;
+
+#ifndef isblank
+#define isblank(c) (((c) == ' ') || ((c) == '\t'))
+#endif
+
+#define SAVE_TO_DISC /* multipart/message are saved in a temporary file */
+
+#define FOLLOWURLS 5 /*
+ * Maximum number of URLs scanned in a message
+ * part. Helps to prevent Dialer.gen-45 and
+ * Trojan.WinREG.Zapchast which are often
+ * dispatched by emails which point to it. If
+ * not defined, don't check any URLs
+ * It is also used to indicate the number of
+ * 301/302 redirects we wish to follow
+ */
+
+#include "htmlnorm.h"
+
+#include "phishcheck.h"
+
+#ifndef C_WINDOWS
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#ifndef C_BEOS
+#include <net/if.h>
+#include <arpa/inet.h>
+#endif
+#endif
+
+#ifndef C_WINDOWS
+#define closesocket(s) close(s)
+#define SOCKET int
+#endif
+
+#include <fcntl.h>
+#ifndef C_WINDOWS
+#include <sys/time.h>
+#endif
+
+#ifndef HAVE_IN_PORT_T
+typedef unsigned short in_port_t;
+#endif
+
+#ifndef HAVE_IN_ADDR_T
+typedef unsigned int in_addr_t;
+#endif
+
+#if (!defined(EALREADY)) && (defined(WSAEALREADY))
+#define EALREADY WSAEALREADY
+#endif
+#if (!defined(EINPROGRESS)) && (defined(WSAEINPROGRESS))
+#define EINPROGRESS WSAEINPROGRESS
+#endif
+#if (!defined(EISCONN)) && (defined(WSAEISCONN))
+#define EISCONN WSAEISCONN
+#endif
+
+/* Needs HAVE_STRCASSTR test in configure */
+#ifndef C_LINUX
+#define strcasestr(h, n) strstr(h, n) /* This will cause isBounceMessage() to match too much */
+#endif
+
+/*
+ * Define this to handle messages covered by section 7.3.2 of RFC1341.
+ * This is experimental code so it is up to YOU to (1) ensure it's secure
+ * (2) periodically trim the directory of old files
+ *
+ * If you use the load balancing feature of clamav-milter to run clamd on
+ * more than one machine you must make sure that .../partial is on a shared
+ * network filesystem
+ */
+#ifndef C_WINDOWS /* TODO: when opendir() is done */
+#define PARTIAL_DIR
+#endif
+
+/*#define NEW_WORLD*/
+
+/*#define SCAN_UNENCODED_BOUNCES *//*
+ * Slows things down a lot and only catches unencoded copies
+ * of EICAR within bounces, which don't matter
+ */
+
+typedef struct mbox_ctx {
+ const char *dir;
+ unsigned int files; /* number of files extracted */
+ const table_t *rfc821Table;
+ const table_t *subtypeTable;
+ cli_ctx *ctx;
+} mbox_ctx;
+
+static int cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx);
+static message *parseEmailFile(FILE *fin, const table_t *rfc821Table, const char *firstLine, const char *dir);
+static message *parseEmailHeaders(message *m, const table_t *rfc821Table);
+static int parseEmailHeader(message *m, const char *line, const table_t *rfc821Table);
+static mbox_status parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level);
+static int boundaryStart(const char *line, const char *boundary);
+static int boundaryEnd(const char *line, const char *boundary);
+static int initialiseTables(table_t **rfc821Table, table_t **subtypeTable);
+static int getTextPart(message *const messages[], size_t size);
+static size_t strip(char *buf, int len);
+static int parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg);
+static int saveTextPart(mbox_ctx *mctx, message *m, int destroy_text);
+static char *rfc2047(const char *in);
+static char *rfc822comments(const char *in, char *out);
+#ifdef PARTIAL_DIR
+static int rfc1341(message *m, const char *dir);
+#endif
+static bool usefulHeader(int commandNumber, const char *cmd);
+static char *getline_from_mbox(char *buffer, size_t len, FILE *fin);
+static bool isBounceStart(const char *line);
+static bool exportBinhexMessage(mbox_ctx *mctx, message *m);
+static int exportBounceMessage(mbox_ctx *ctx, text *start);
+static message *do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level);
+static int count_quotes(const char *buf);
+static bool next_is_folded_header(const text *t);
+static bool newline_in_header(const char *line);
+
+static blob *getHrefs(message *m, tag_arguments_t *hrefs);
+static void hrefs_done(blob *b, tag_arguments_t *hrefs);
+static void checkURLs(message *m, mbox_ctx *mctx, mbox_status *rc, int is_html);
+static void do_checkURLs(const char *dir, tag_arguments_t *hrefs);
+
+#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+struct arg {
+ char *url;
+ const char *dir;
+ char *filename;
+ int depth;
+};
+#define URL_TIMEOUT 5 /* Allow 5 seconds to connect */
+#ifdef CL_THREAD_SAFE
+static void *getURL(void *a);
+#else
+static void *getURL(struct arg *arg);
+#endif
+static int nonblock_connect(const char *url, SOCKET sock, const struct sockaddr *addr);
+static int connect_error(const char *url, SOCKET sock);
+static int my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len);
+
+#define NONBLOCK_SELECT_MAX_FAILURES 3
+#define NONBLOCK_MAX_ATTEMPTS 10
+
+#endif
+
+/* Maximum line length according to RFC821 */
+#define RFC2821LENGTH 1000
+
+/* Hashcodes for our hash tables */
+#define CONTENT_TYPE 1
+#define CONTENT_TRANSFER_ENCODING 2
+#define CONTENT_DISPOSITION 3
+
+/* Mime sub types */
+#define PLAIN 1
+#define ENRICHED 2
+#define HTML 3
+#define RICHTEXT 4
+#define MIXED 5
+#define ALTERNATIVE 6 /* RFC1521*/
+#define DIGEST 7
+#define SIGNED 8
+#define PARALLEL 9
+#define RELATED 10 /* RFC2387 */
+#define REPORT 11 /* RFC1892 */
+#define APPLEDOUBLE 12 /* Handling of this in only noddy for now */
+#define FAX MIXED /*
+ * RFC3458
+ * Drafts stated to treat is as mixed if it is
+ * not known. This disappeared in the final
+ * version (except when talking about
+ * voice-message), but it is good enough for us
+ * since we do no validation of coversheet
+ * presence etc. (which also has disappeared
+ * in the final version)
+ */
+#define ENCRYPTED 13 /*
+ * e.g. RFC2015
+ * Content-Type: multipart/encrypted;
+ * boundary="nextPart1383049.XCRrrar2yq";
+ * protocol="application/pgp-encrypted"
+ */
+#define X_BFILE RELATED /*
+ * BeOS, expert two parts: the file and it's
+ * attributes. The attributes part comes as
+ * Content-Type: application/x-be_attribute
+ * name="foo"
+ * I can't find where it is defined, any
+ * pointers would be appreciated. For now
+ * we treat it as multipart/related
+ */
+#define KNOWBOT 14 /* Unknown and undocumented format? */
+
+static const struct tableinit {
+ const char *key;
+ int value;
+} rfc821headers[] = {
+ /* TODO: make these regular expressions */
+ { "Content-Type", CONTENT_TYPE },
+ { "Content-Transfer-Encoding", CONTENT_TRANSFER_ENCODING },
+ { "Content-Disposition", CONTENT_DISPOSITION },
+ { NULL, 0 }
+}, mimeSubtypes[] = { /* see RFC2045 */
+ /* subtypes of Text */
+ { "plain", PLAIN },
+ { "enriched", ENRICHED },
+ { "html", HTML },
+ { "richtext", RICHTEXT },
+ /* subtypes of Multipart */
+ { "mixed", MIXED },
+ { "alternative", ALTERNATIVE },
+ { "digest", DIGEST },
+ { "signed", SIGNED },
+ { "parallel", PARALLEL },
+ { "related", RELATED },
+ { "report", REPORT },
+ { "appledouble", APPLEDOUBLE },
+ { "fax-message", FAX },
+ { "encrypted", ENCRYPTED },
+ { "x-bfile", X_BFILE }, /* BeOS */
+ { "knowbot", KNOWBOT }, /* ??? */
+ { "knowbot-metadata", KNOWBOT }, /* ??? */
+ { "knowbot-code", KNOWBOT }, /* ??? */
+ { "knowbot-state", KNOWBOT }, /* ??? */
+ { NULL, 0 }
+};
+
+#ifdef CL_THREAD_SAFE
+static pthread_mutex_t tables_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#ifdef NEW_WORLD
+
+#include "matcher.h"
+
+#undef PARTIAL_DIR
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#else /*HAVE_MMAP*/
+#undef NEW_WORLD
+#endif
+#endif
+
+#ifdef NEW_WORLD
+/*
+ * Files larger than this are scanned with the old method, should be
+ * StreamMaxLength, I guess
+ * If NW_MAX_FILE_SIZE is not defined, all files go through the
+ * new method. This definition is for machines very tight on RAM, or
+ * with large StreamMaxLength values
+ */
+#define MAX_ALLOCATION 134217728 /* see libclamav/others.c */
+#define NW_MAX_FILE_SIZE MAX_ALLOCATION
+
+struct scanlist {
+ const char *start;
+ size_t size;
+ encoding_type decoder; /* only BASE64 and QUOTEDPRINTABLE for now */
+ struct scanlist *next;
+};
+
+static struct map {
+ const char *offset; /* sorted */
+ const char *word;
+ struct map *next;
+} *map, *tail;
+
+static int save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len);
+static void create_map(const char *begin, const char *end);
+static void add_to_map(const char *offset, const char *word);
+static const char *find_in_map(const char *offset, const char *word);
+static void free_map(void);
+
+/*
+ * This could be the future. Instead of parsing and decoding it just decodes.
+ *
+ * USE IT AT YOUR PERIL, a large number of viruses are not detected with this
+ * method, possibly because the decoded files must be exact and not have
+ * extra data at the start or end, which this code will produce.
+ *
+ * Currently only supports base64 and quoted-printable
+ *
+ * You may also see a lot of warnings. For the moment it falls back to old
+ * world mode if it doesn't know what to do - that'll be removed.
+ * The code is untidy...
+ *
+ * FIXME: Some mailbox scans are slower with this method. I suspect that it's
+ * because the scan can proceed to the end of the file rather than the end
+ * of the attachment which can mean than later emails are scanned many times
+ *
+ * FIXME: quoted printable doesn't know when to stop, so size related virus
+ * matching breaks
+ *
+ * TODO: Fall through to cli_parse_mbox() too often
+ *
+ * TODO: Add support for systems without mmap()
+ *
+ * TODO: partial_dir fall through
+ *
+ * FIXME: Some EICAR gets through
+ */
+int
+cli_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+ char *start, *ptr, *line;
+ const char *last, *p, *q;
+ size_t size;
+ struct stat statb;
+ message *m;
+ fileblob *fb;
+ int ret = CL_CLEAN;
+ int wasAlloced;
+ struct scanlist *scanlist, *scanelem;
+
+ if(dir == NULL) {
+ cli_warnmsg("cli_mbox called with NULL dir\n");
+ return CL_ENULLARG;
+ }
+ if(fstat(desc, &statb) < 0)
+ return CL_EOPEN;
+
+ size = statb.st_size;
+
+ if(size == 0)
+ return CL_CLEAN;
+
+#ifdef NW_MAX_FILE_SIZE
+ if(size > NW_MAX_FILE_SIZE)
+ return cli_parse_mbox(dir, desc, ctx);
+#endif
+
+ /*cli_warnmsg("NEW_WORLD is new code - use at your own risk.\n");*/
+#ifdef PARTIAL_DIR
+ cli_warnmsg("PARTIAL_DIR doesn't work in the NEW_WORLD yet\n");
+#endif
+
+ start = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+ if(start == MAP_FAILED)
+ return CL_EMEM;
+
+ cli_dbgmsg("mmap'ed mbox\n");
+
+ ptr = cli_malloc(size);
+ if(ptr) {
+ memcpy(ptr, start, size);
+ munmap(start, size);
+ start = ptr;
+ wasAlloced = 1;
+ } else
+ wasAlloced = 0;
+
+ /* last points to the last *valid* address in the array */
+ last = &start[size - 1];
+
+ create_map(start, last);
+
+ scanelem = scanlist = NULL;
+ q = start;
+ /*
+ * FIXME: mismatch of const char * and char * here and in later calls
+ * to find_in_map()
+ */
+ while((p = find_in_map(q, "base64")) != NULL) {
+ cli_dbgmsg("Found base64\n");
+ if(scanelem) {
+ scanelem->next = cli_malloc(sizeof(struct scanlist));
+ scanelem = scanelem->next;
+ } else
+ scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
+ scanelem->next = NULL;
+ scanelem->decoder = BASE64;
+ q = scanelem->start = &p[6];
+ if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
+ ((p = find_in_map(q, "base64")) != NULL) ||
+ ((p = find_in_map(q, "quoted-printable")) != NULL)) {
+ scanelem->size = (size_t)(p - q);
+ q = p;
+ } else {
+ scanelem->size = (size_t)(last - scanelem->start) + 1;
+ break;
+ }
+ cli_dbgmsg("base64: last %u q %u\n", (unsigned int)last, (unsigned int)q);
+ assert(scanelem->size <= size);
+ }
+
+ q = start;
+ while((p = find_in_map(q, "quoted-printable")) != NULL) {
+ if(p != q)
+ switch(p[-1]) {
+ case ' ':
+ case ':':
+ case '=': /* wrong but allow it */
+ break;
+ default:
+ q = &p[16];
+ cli_dbgmsg("Ignore quoted-printable false positive\n");
+ continue; /* false positive */
+ }
+
+ cli_dbgmsg("Found quoted-printable\n");
+#ifdef notdef
+ /*
+ * The problem with quoted printable is recognising when to stop
+ * parsing
+ */
+ if(scanelem) {
+ scanelem->next = cli_malloc(sizeof(struct scanlist));
+ scanelem = scanelem->next;
+ } else
+ scanlist = scanelem = cli_malloc(sizeof(struct scanlist));
+ scanelem->next = NULL;
+ scanelem->decoder = QUOTEDPRINTABLE;
+ q = scanelem->start = &p[16];
+ cli_dbgmsg("qp: last %u q %u\n", (unsigned int)last, (unsigned int)q);
+ if(((p = find_in_map(q, "\nFrom ")) != NULL) ||
+ ((p = find_in_map(q, "quoted-printable")) != NULL) ||
+ ((p = find_in_map(q, "base64")) != NULL)) {
+ scanelem->size = (size_t)(p - q);
+ q = p;
+ cli_dbgmsg("qp: scanelem->size = %u\n", scanelem->size);
+ } else {
+ scanelem->size = (size_t)(last - scanelem->start) + 1;
+ break;
+ }
+ assert(scanelem->size <= size);
+#else
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ free_map();
+ return cli_parse_mbox(dir, desc, ctx);
+#endif
+ }
+
+ if(scanlist == NULL) {
+ const struct tableinit *tableinit;
+ bool anyHeadersFound = FALSE;
+ bool hasuuencode = FALSE;
+ cli_file_t type;
+
+ /* FIXME: message: There could of course be no decoder needed... */
+ for(tableinit = rfc821headers; tableinit->key; tableinit++)
+ if(find_in_map(start, tableinit->key)) {
+ anyHeadersFound = TRUE;
+ break;
+ }
+
+ if((!anyHeadersFound) &&
+ ((p = find_in_map(start, "\nbegin ")) != NULL) &&
+ (isuuencodebegin(++p)))
+ /* uuencoded part */
+ hasuuencode = TRUE;
+ else {
+ cli_dbgmsg("Nothing encoded, looking for a text part to save\n");
+ ret = save_text(ctx, dir, start, size);
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ free_map();
+ if(ret != CL_EFORMAT)
+ return ret;
+ ret = CL_CLEAN;
+ }
+
+ free_map();
+
+ type = cli_filetype(start, size);
+
+ if((type == CL_TYPE_UNKNOWN_TEXT) &&
+ (strncmp(start, "Microsoft Mail Internet Headers", 31) == 0))
+ type = CL_TYPE_MAIL;
+
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ if(anyHeadersFound || hasuuencode) {
+ /* TODO: reduce the number of falls through here */
+ if(hasuuencode)
+ /* TODO: fast track visa */
+ cli_warnmsg("New world - fall back to old uudecoder\n");
+ else
+ cli_warnmsg("cli_mbox: unknown encoder, type %d\n", type);
+ if(type == CL_TYPE_MAIL)
+ return cli_parse_mbox(dir, desc, ctx);
+ cli_dbgmsg("Unknown filetype %d, return CLEAN\n", type);
+ return CL_CLEAN;
+ }
+
+#if 0 /* I don't believe this is needed any more */
+ /*
+ * The message could be a plain text phish
+ * FIXME: Can't get to the option whether we are looking for
+ * phishes or not, so assume we are, this slows things a
+ * lot
+ * Should be
+ * if((type == CL_TYPE_MAIL) && (!(no-phishing))
+ */
+ if(type == CL_TYPE_MAIL)
+ return cli_parse_mbox(dir, desc, ctx);
+#endif
+ cli_dbgmsg("cli_mbox: I believe it's plain text (type == %d) which must be clean\n",
+ type);
+ return CL_CLEAN;
+ }
+#if 0
+ if(wasAlloced) {
+ const char *max = NULL;
+
+ for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
+ const char *end = &scanelem->start[scanelem->size];
+
+ if(end > max)
+ max = end;
+ }
+
+ if(max < last)
+ printf("could free %d bytes\n", (int)(last - max));
+ }
+#endif
+
+ for(scanelem = scanlist; scanelem; scanelem = scanelem->next) {
+ if(scanelem->decoder == BASE64) {
+ const char *b64start = scanelem->start;
+ size_t b64size = scanelem->size;
+
+ cli_dbgmsg("b64size = %lu\n", b64size);
+ while((*b64start != '\n') && (*b64start != '\r')) {
+ b64start++;
+ b64size--;
+ }
+ /*
+ * Look for the end of the headers
+ */
+ while(b64start < last) {
+ if(*b64start == ';') {
+ b64start++;
+ b64size--;
+ } else if((memcmp(b64start, "\n\n", 2) == 0) ||
+ (memcmp(b64start, "\r\r", 2) == 0)) {
+ b64start += 2;
+ b64size -= 2;
+ break;
+ } else if(memcmp(b64start, "\r\n\r\n", 4) == 0) {
+ b64start += 4;
+ b64size -= 4;
+ break;
+ } else if(memcmp(b64start, "\n \n", 3) == 0) {
+ /*
+ * Some viruses are broken and have
+ * one space character at the end of
+ * the headers
+ */
+ b64start += 3;
+ b64size -= 3;
+ break;
+ } else if(memcmp(b64start, "\r\n \r\n", 5) == 0) {
+ /*
+ * Some viruses are broken and have
+ * one space character at the end of
+ * the headers
+ */
+ b64start += 5;
+ b64size -= 5;
+ break;
+ }
+ b64start++;
+ b64size--;
+ }
+
+ if(b64size > 0L)
+ while((!isalnum(*b64start)) && (*b64start != '/')) {
+ if(b64size-- == 0L)
+ break;
+ b64start++;
+ }
+
+ if(b64size > 0L) {
+ int lastline;
+ char *tmpfilename;
+ unsigned char *uptr;
+
+ cli_dbgmsg("cli_mbox: decoding %ld base64 bytes\n", b64size);
+ if((fb = fileblobCreate()) == NULL) {
+ free_map();
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ return CL_EMEM;
+ }
+
+ tmpfilename = cli_gentemp(dir);
+ if(tmpfilename == NULL) {
+ free_map();
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+ fileblobDestroy(fb);
+
+ return CL_EMEM;
+ }
+ fileblobSetFilename(fb, dir, tmpfilename);
+ free(tmpfilename);
+
+ line = NULL;
+
+ m = messageCreate();
+ if(m == NULL) {
+ free_map();
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+ fileblobDestroy(fb);
+
+ return CL_EMEM;
+ }
+ messageSetEncoding(m, "base64");
+
+ messageSetCTX(m, ctx);
+ fileblobSetCTX(fb, ctx);
+
+ lastline = 0;
+ do {
+ int length = 0, datalen;
+ char *newline, *equal;
+ unsigned char *bigbuf, *data;
+ unsigned char smallbuf[1024];
+ const char *cptr;
+
+ /*printf("%ld: ", b64size); fflush(stdout);*/
+
+ for(cptr = b64start; b64size && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
+ length++;
+ --b64size;
+ }
+
+ /*printf("%d: ", length); fflush(stdout);*/
+
+ newline = cli_realloc(line, length + 1);
+ if(newline == NULL)
+ break;
+ line = newline;
+
+ memcpy(line, b64start, length);
+ line[length] = '\0';
+
+ equal = strchr(line, '=');
+ if(equal) {
+ lastline++;
+ *equal = '\0';
+ }
+ /*puts(line);*/
+
+#if 0
+ if(messageAddStr(m, line) < 0)
+ break;
+#endif
+ if(length >= (int)sizeof(smallbuf)) {
+ datalen = length + 2;
+ data = bigbuf = cli_malloc(datalen);
+ if(data == NULL)
+ break;
+ } else {
+ bigbuf = NULL;
+ data = smallbuf;
+ datalen = sizeof(data) - 1;
+ }
+ uptr = decodeLine(m, BASE64, line, data, datalen);
+
+ if(uptr == NULL) {
+ if(bigbuf)
+ free(bigbuf);
+ break;
+ }
+ /*cli_dbgmsg("base64: write %u bytes\n", (size_t)(uptr - data));*/
+ datalen = fileblobAddData(fb, data, (size_t)(uptr - data));
+ if(bigbuf)
+ free(bigbuf);
+
+ if(datalen < 0)
+ break;
+ if(fileblobContainsVirus(fb))
+ break;
+
+ if((b64size > 0) && (*cptr == '\r')) {
+ b64start = ++cptr;
+ --b64size;
+ }
+ if((b64size > 0) && (*cptr == '\n')) {
+ b64start = ++cptr;
+ --b64size;
+ }
+ if(lastline)
+ break;
+ } while(b64size > 0L);
+
+ if(m->base64chars) {
+ unsigned char data[4];
+
+ uptr = base64Flush(m, data);
+ if(uptr) {
+ /*cli_dbgmsg("base64: flush %u bytes\n", (size_t)(uptr - data));*/
+ (void)fileblobAddData(fb, data, (size_t)(uptr - data));
+ }
+ }
+ if(fb)
+ fileblobDestroy(fb);
+ else
+ ret = -1;
+
+ messageDestroy(m);
+ free(line);
+ }
+ } else if(scanelem->decoder == QUOTEDPRINTABLE) {
+ const char *quotedstart = scanelem->start;
+ size_t quotedsize = scanelem->size;
+
+ cli_dbgmsg("quotedsize = %lu\n", quotedsize);
+ while(*quotedstart != '\n') {
+ quotedstart++;
+ quotedsize--;
+ }
+ /*
+ * Look for the end of the headers
+ */
+ while(quotedstart < last) {
+ if(*quotedstart == ';') {
+ quotedstart++;
+ quotedsize--;
+ } else if((*quotedstart == '\n') || (*quotedstart == '\r')) {
+ quotedstart++;
+ quotedsize--;
+ if((*quotedstart == '\n') || (*quotedstart == '\r')) {
+ quotedstart++;
+ quotedsize--;
+ break;
+ }
+ }
+ quotedstart++;
+ quotedsize--;
+ }
+
+ while(!isalnum(*quotedstart)) {
+ quotedstart++;
+ quotedsize--;
+ }
+
+ if(quotedsize > 0L) {
+ cli_dbgmsg("cli_mbox: decoding %ld quoted-printable bytes\n", quotedsize);
+
+ m = messageCreate();
+ if(m == NULL) {
+ free_map();
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ return CL_EMEM;
+ }
+ messageSetEncoding(m, "quoted-printable");
+ messageSetCTX(m, ctx);
+
+ line = NULL;
+
+ do {
+ int length = 0;
+ char *newline;
+ const char *cptr;
+
+ /*printf("%ld: ", quotedsize); fflush(stdout);*/
+
+ for(cptr = quotedstart; quotedsize && (*cptr != '\n') && (*cptr != '\r'); cptr++) {
+ length++;
+ --quotedsize;
+ }
+
+ /*printf("%d: ", length); fflush(stdout);*/
+
+ newline = cli_realloc(line, length + 1);
+ if(newline == NULL)
+ break;
+ line = newline;
+
+ memcpy(line, quotedstart, length);
+ line[length] = '\0';
+
+ /*puts(line);*/
+
+ if(messageAddStr(m, line) < 0)
+ break;
+
+ if((quotedsize > 0) && (*cptr == '\r')) {
+ quotedstart = ++cptr;
+ --quotedsize;
+ }
+ if((quotedsize > 0) && (*cptr == '\n')) {
+ quotedstart = ++cptr;
+ --quotedsize;
+ }
+ } while(quotedsize > 0L);
+
+ free(line);
+ fb = messageToFileblob(m, dir, 1);
+ messageDestroy(m);
+
+ if(fb)
+ fileblobDestroy(fb);
+ else
+ ret = -1;
+ }
+ }
+ }
+ scanelem = scanlist;
+
+ /*
+ * There could be a phish in the plain text part, so save that
+ * FIXME: Can't get to the option whether we are looking for
+ * phishes or not, so assume we are, this slows things a
+ * lot
+ * Should be
+ * if((type == CL_TYPE_MAIL) && (!(no-phishing))
+ */
+ ret = save_text(ctx, dir, start, size);
+
+ free_map();
+
+ while(scanelem) {
+ struct scanlist *n = scanelem->next;
+
+ free(scanelem);
+ scanelem = n;
+ }
+
+ if(wasAlloced)
+ free(start);
+ else
+ munmap(start, size);
+
+ /*
+ * FIXME: Need to run cl_scandir() here and return that value
+ */
+ cli_dbgmsg("cli_mbox: ret = %d\n", ret);
+ if(ret != CL_EFORMAT)
+ return ret;
+
+ cli_warnmsg("New world - don't know what to do - fall back to old world\n");
+ /* Fall back for now */
+ lseek(desc, 0L, SEEK_SET);
+ return cli_parse_mbox(dir, desc, ctx);
+}
+
+/*
+ * Save a text part - it could contain phish or jscript
+ */
+static int
+save_text(cli_ctx *ctx, const char *dir, const char *start, size_t len)
+{
+ const char *p;
+
+ if((p = find_in_map(start, "\n\n")) || (p = find_in_map(start, "\r\n\r\n"))) {
+ const char *q;
+ fileblob *fb;
+ char *tmpfilename;
+
+ if(((q = find_in_map(start, "base64")) == NULL) &&
+ ((q = find_in_map(start, "quoted_printable")) == NULL)) {
+ cli_dbgmsg("It's all plain text!\n");
+ if(*p == '\r')
+ p += 4;
+ else
+ p += 2;
+ len -= (p - start);
+ } else if(((q = find_in_map(p, "\nFrom ")) == NULL) &&
+ ((q = find_in_map(p, "base64")) == NULL) &&
+ ((q = find_in_map(p, "quoted-printable")) == NULL))
+ cli_dbgmsg("Can't find end of plain text - assume it's all\n");
+ else
+ len = (size_t)(q - p);
+
+ if(len < 5) {
+ cli_dbgmsg("save_text: Too small\n");
+ return CL_EFORMAT;
+ }
+ if(ctx->scanned)
+ *ctx->scanned += len / CL_COUNT_PRECISION;
+
+ /*
+ * This doesn't work, cli_scanbuff isn't designed to be used
+ * in this way. It gets the "filetype" wrong and then
+ * doesn't scan correctly
+ */
+ if(cli_scanbuff((char *)p, len, ctx->virname, ctx->engine, CL_TYPE_UNKNOWN_DATA) == CL_VIRUS) {
+ cli_dbgmsg("save_text: found %s\n", *ctx->virname);
+ return CL_VIRUS;
+ }
+
+ fb = fileblobCreate();
+ if(fb == NULL)
+ return CL_EMEM;
+
+ tmpfilename = cli_gentemp(dir);
+
+ if(tmpfilename == NULL) {
+ fileblobDestroy(fb);
+ return CL_ETMPFILE;
+ }
+ cli_dbgmsg("save plain bit to %s, %u bytes\n",
+ tmpfilename, len);
+
+ fileblobSetFilename(fb, dir, tmpfilename);
+ free(tmpfilename);
+
+ (void)fileblobAddData(fb, (const unsigned char *)p, len);
+ fileblobDestroy(fb);
+ return CL_SUCCESS;
+ }
+ cli_dbgmsg("No text part found to save\n");
+ return CL_EFORMAT;
+}
+
+static void
+create_map(const char *begin, const char *end)
+{
+ const struct wordlist {
+ const char *word;
+ int len;
+ } wordlist[] = {
+ { "base64", 6 },
+ { "quoted-printable", 16 },
+ { "\nbegin ", 7 },
+ { "\nFrom ", 6 },
+ { "\n\n", 2 },
+ { "\r\n\r\n", 4 },
+ { NULL, 0 }
+ };
+
+ if(map) {
+ cli_warnmsg("create_map called without free_map\n");
+ free_map();
+ }
+ while(begin < end) {
+ const struct wordlist *word;
+
+ for(word = wordlist; word->word; word++) {
+ if((end - begin) < word->len)
+ continue;
+ if(strncasecmp(begin, word->word, word->len) == 0) {
+ add_to_map(begin, word->word);
+ break;
+ }
+ }
+ begin++;
+ }
+}
+
+/* To sort map, assume 'offset' is presented in sorted order */
+static void
+add_to_map(const char *offset, const char *word)
+{
+ if(map) {
+ tail->next = cli_malloc(sizeof(struct map)); /* FIXME: verify */
+ tail = tail->next;
+ } else
+ map = tail = cli_malloc(sizeof(struct map)); /* FIXME: verify */
+
+ tail->offset = offset;
+ tail->word = word;
+ tail->next = NULL;
+}
+
+static const char *
+find_in_map(const char *offset, const char *word)
+{
+ const struct map *item;
+
+ for(item = map; item; item = item->next)
+ if(item->offset >= offset)
+ if(strcasecmp(word, item->word) == 0)
+ return item->offset;
+
+ return NULL;
+}
+
+static void
+free_map(void)
+{
+ while(map) {
+ struct map *next = map->next;
+
+ free(map);
+ map = next;
+ }
+ map = NULL;
+}
+
+#else /*!NEW_WORLD*/
+int
+cli_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+ if(dir == NULL) {
+ cli_warnmsg("cli_mbox called with NULL dir\n");
+ return CL_ENULLARG;
+ }
+ return cli_parse_mbox(dir, desc, ctx);
+}
+#endif
+
+/*
+ * TODO: when signal handling is added, need to remove temp files when a
+ * signal is received
+ * TODO: add option to scan in memory not via temp files, perhaps with a
+ * named pipe or memory mapped file, though this won't work on big e-mails
+ * containing many levels of encapsulated messages - it'd just take too much
+ * RAM
+ * TODO: parse .msg format files
+ * TODO: fully handle AppleDouble format, see
+ * http://www.lazerware.com/formats/Specs/AppleSingle_AppleDouble.pdf
+ * TODO: ensure parseEmailHeaders is always called before parseEmailBody
+ * TODO: create parseEmail which calls parseEmailHeaders then parseEmailBody
+ * TODO: Handle unepected NUL bytes in header lines which stop strcmp()s:
+ * e.g. \0Content-Type: application/binary;
+ */
+static int
+cli_parse_mbox(const char *dir, int desc, cli_ctx *ctx)
+{
+ int retcode, i;
+ message *body;
+ FILE *fd;
+ char buffer[RFC2821LENGTH + 1];
+ mbox_ctx mctx;
+#ifdef HAVE_BACKTRACE
+ void (*segv)(int);
+#endif
+ static table_t *rfc821, *subtype;
+#ifdef SAVE_TMP
+ char tmpfilename[16];
+ int tmpfd;
+#endif
+
+#ifdef NEW_WORLD
+ cli_dbgmsg("fall back to old world\n");
+#else
+ cli_dbgmsg("in mbox()\n");
+#endif
+
+ i = dup(desc);
+ if((fd = fdopen(i, "rb")) == NULL) {
+ cli_errmsg("Can't open descriptor %d\n", desc);
+ close(i);
+ return CL_EOPEN;
+ }
+ rewind(fd); /* bug 240 */
+#ifdef SAVE_TMP
+ /*
+ * Copy the incoming mail for debugging, so that if it falls over
+ * we have a copy of the offending email. This is debugging code
+ * that you shouldn't of course install in a live environment. I am
+ * not interested in hearing about security issues with this section
+ * of the parser.
+ */
+ strcpy(tmpfilename, "/tmp/mboxXXXXXX");
+ tmpfd = mkstemp(tmpfilename);
+ if(tmpfd < 0) {
+ perror(tmpfilename);
+ cli_errmsg("Can't make debugging file\n");
+ } else {
+ FILE *tmpfp = fdopen(tmpfd, "w");
+
+ if(tmpfp) {
+ while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL)
+ fputs(buffer, tmpfp);
+ fclose(tmpfp);
+ rewind(fd);
+ } else
+ cli_errmsg("Can't fdopen debugging file\n");
+ }
+#endif
+ if(fgets(buffer, sizeof(buffer) - 1, fd) == NULL) {
+ /* empty message */
+ fclose(fd);
+#ifdef SAVE_TMP
+ unlink(tmpfilename);
+#endif
+ return CL_CLEAN;
+ }
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&tables_mutex);
+#endif
+ if(rfc821 == NULL) {
+ assert(subtype == NULL);
+
+ if(initialiseTables(&rfc821, &subtype) < 0) {
+ rfc821 = NULL;
+ subtype = NULL;
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&tables_mutex);
+#endif
+ fclose(fd);
+#ifdef SAVE_TMP
+ unlink(tmpfilename);
+#endif
+ return CL_EMEM;
+ }
+ }
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&tables_mutex);
+#endif
+
+#ifdef HAVE_BACKTRACE
+ segv = signal(SIGSEGV, sigsegv);
+#endif
+
+ retcode = CL_SUCCESS;
+ body = NULL;
+
+ mctx.dir = dir;
+ mctx.rfc821Table = rfc821;
+ mctx.subtypeTable = subtype;
+ mctx.ctx = ctx;
+ mctx.files = 0;
+
+ /*
+ * Is it a UNIX style mbox with more than one
+ * mail message, or just a single mail message?
+ *
+ * TODO: It would be better if we called cli_scandir here rather than
+ * in cli_scanmail. Then we could improve the way mailboxes with more
+ * than one message is handled, e.g. giving a better indication of
+ * which message within the mailbox is infected
+ */
+ /*if((strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
+ if(strncmp(buffer, "From ", 5) == 0) {
+ /*
+ * Have been asked to check a UNIX style mbox file, which
+ * may contain more than one e-mail message to decode
+ *
+ * It would be far better for scanners.c to do this splitting
+ * and do this
+ * FOR EACH mail in the mailbox
+ * DO
+ * pass this mail to cli_mbox --
+ * scan this file
+ * IF this file has a virus quit
+ * THEN
+ * return CL_VIRUS
+ * FI
+ * END
+ * This would remove a problem with this code that it can
+ * fill up the tmp directory before it starts scanning
+ */
+ bool lastLineWasEmpty;
+ int messagenumber;
+ message *m = messageCreate();
+
+ if(m == NULL) {
+ fclose(fd);
+#ifdef HAVE_BACKTRACE
+ signal(SIGSEGV, segv);
+#endif
+#ifdef SAVE_TMP
+ unlink(tmpfilename);
+#endif
+ return CL_EMEM;
+ }
+
+ lastLineWasEmpty = FALSE;
+ messagenumber = 1;
+ messageSetCTX(m, ctx);
+
+ do {
+ cli_chomp(buffer);
+ /*if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0) && isalnum(buffer[5])) {*/
+ if(lastLineWasEmpty && (strncmp(buffer, "From ", 5) == 0)) {
+ cli_dbgmsg("Deal with message number %d\n", messagenumber++);
+ /*
+ * End of a message in the mail box
+ */
+ body = parseEmailHeaders(m, rfc821);
+ if(body == NULL) {
+ messageReset(m);
+ continue;
+ }
+ messageSetCTX(body, ctx);
+ messageDestroy(m);
+ if(messageGetBody(body)) {
+ mbox_status rc = parseEmailBody(body, NULL, &mctx, 0);
+ if(rc == FAIL) {
+ messageReset(body);
+ m = body;
+ continue;
+ } else if(rc == VIRUS) {
+ cli_dbgmsg("Message number %d is infected\n",
+ messagenumber);
+ retcode = CL_VIRUS;
+ m = NULL;
+ break;
+ }
+ }
+ /*
+ * Starting a new message, throw away all the
+ * information about the old one. It would
+ * be best to be able to scan this message
+ * now, but cli_scanfile needs arguments
+ * that haven't been passed here so it can't be
+ * called
+ */
+ m = body;
+ messageReset(body);
+ messageSetCTX(body, ctx);
+
+ cli_dbgmsg("Finished processing message\n");
+ } else
+ lastLineWasEmpty = (bool)(buffer[0] == '\0');
+
+ if(isuuencodebegin(buffer)) {
+ /*
+ * Fast track visa to uudecode.
+ * TODO: binhex, yenc
+ */
+ if(uudecodeFile(m, buffer, dir, fd) < 0)
+ if(messageAddStr(m, buffer) < 0)
+ break;
+ } else
+ /* at this point, the \n has been removed */
+ if(messageAddStr(m, buffer) < 0)
+ break;
+ } while(fgets(buffer, sizeof(buffer) - 1, fd) != NULL);
+
+ fclose(fd);
+
+ if(retcode == CL_SUCCESS) {
+ cli_dbgmsg("Extract attachments from email %d\n", messagenumber);
+ body = parseEmailHeaders(m, rfc821);
+ }
+ if(m)
+ messageDestroy(m);
+ } else {
+ /*
+ * It's a single message, parse the headers then the body
+ */
+ if(strncmp(buffer, "P I ", 4) == 0)
+ /*
+ * CommuniGate Pro format: ignore headers until
+ * blank line
+ */
+ while((fgets(buffer, sizeof(buffer) - 1, fd) != NULL) &&
+ (strchr("\r\n", buffer[0]) == NULL))
+ ;
+ /*
+ * Ignore any blank lines at the top of the message
+ */
+ while(strchr("\r\n", buffer[0]) &&
+ (getline_from_mbox(buffer, sizeof(buffer) - 1, fd) != NULL))
+ ;
+
+ buffer[sizeof(buffer) - 1] = '\0';
+
+ body = parseEmailFile(fd, rfc821, buffer, dir);
+ fclose(fd);
+ }
+
+ if(body) {
+ /*
+ * Write out the last entry in the mailbox
+ */
+ if((retcode == CL_SUCCESS) && messageGetBody(body)) {
+ messageSetCTX(body, ctx);
+ switch(parseEmailBody(body, NULL, &mctx, 0)) {
+ case FAIL:
+ /*
+ * beware: cli_magic_scandesc(),
+ * changes this into CL_CLEAN, so only
+ * use it to inform the higher levels
+ * that we couldn't decode it because
+ * it isn't an mbox, not to signal
+ * decoding errors on what *is* a valid
+ * mbox
+ */
+ retcode = CL_EFORMAT;
+ break;
+ case MAXREC:
+ retcode = CL_EMAXREC;
+ break;
+ case MAXFILES:
+ retcode = CL_EMAXFILES;
+ break;
+ case VIRUS:
+ retcode = CL_VIRUS;
+ break;
+ }
+ }
+
+ /*
+ * Tidy up and quit
+ */
+ messageDestroy(body);
+ }
+
+ if((retcode == CL_CLEAN) && ctx->found_possibly_unwanted && (*ctx->virname == NULL)) {
+ *ctx->virname = "Phishing.Heuristics.Email";
+ ctx->found_possibly_unwanted = 0;
+ retcode = CL_VIRUS;
+ }
+
+ cli_dbgmsg("cli_mbox returning %d\n", retcode);
+
+#ifdef HAVE_BACKTRACE
+ signal(SIGSEGV, segv);
+#endif
+
+#ifdef SAVE_TMP
+ unlink(tmpfilename);
+#endif
+ return retcode;
+}
+
+/*
+ * Read in an email message from fin, parse it, and return the message
+ *
+ * FIXME: files full of new lines and nothing else are
+ * handled ungracefully...
+ */
+static message *
+parseEmailFile(FILE *fin, const table_t *rfc821, const char *firstLine, const char *dir)
+{
+ bool inHeader = TRUE;
+ bool bodyIsEmpty = TRUE;
+ bool lastWasBlank = FALSE, lastBodyLineWasBlank = FALSE;
+ message *ret;
+ bool anyHeadersFound = FALSE;
+ int commandNumber = -1;
+ char *fullline = NULL, *boundary = NULL;
+ size_t fulllinelength = 0;
+ char buffer[RFC2821LENGTH + 1];
+
+ cli_dbgmsg("parseEmailFile\n");
+
+ ret = messageCreate();
+ if(ret == NULL)
+ return NULL;
+
+ strcpy(buffer, firstLine);
+ do {
+ const char *line;
+
+ (void)cli_chomp(buffer);
+
+ if(buffer[0] == '\0')
+ line = NULL;
+ else
+ line = buffer;
+
+ /*
+ * Don't blank lines which are only spaces from headers,
+ * otherwise they'll be treated as the end of header marker
+ */
+ if(lastWasBlank) {
+ lastWasBlank = FALSE;
+ if(boundaryStart(buffer, boundary)) {
+ cli_dbgmsg("Found a header line with space that should be blank\n");
+ inHeader = FALSE;
+ }
+ }
+ if(inHeader) {
+ cli_dbgmsg("parseEmailFile: check '%s' fullline %p\n",
+ buffer ? buffer : "", fullline);
+ /*
+ * Ensure wide characters are handled where
+ * sizeof(char) > 1
+ */
+ if(line && isspace(line[0] & 0xFF)) {
+ char copy[sizeof(buffer)];
+
+ strcpy(copy, buffer);
+ strstrip(copy);
+ if(copy[0] == '\0') {
+ /*
+ * The header line contains only white
+ * space. This is not the end of the
+ * headers according to RFC2822, but
+ * some MUAs will handle it as though
+ * it were, and virus writers exploit
+ * this bug. We can't just break from
+ * the loop here since that would allow
+ * other exploits such as inserting a
+ * white space line before the
+ * content-type line. So we just have
+ * to make a best guess. Sigh.
+ */
+ if(fullline) {
+ if(parseEmailHeader(ret, fullline, rfc821) < 0)
+ continue;
+
+ free(fullline);
+ fullline = NULL;
+ }
+ if(boundary ||
+ ((boundary = (char *)messageFindArgument(ret, "boundary")) != NULL)) {
+ lastWasBlank = TRUE;
+ continue;
+ }
+ }
+ }
+ if((line == NULL) && (fullline == NULL)) { /* empty line */
+ /*
+ * A blank line signifies the end of
+ * the header and the start of the text
+ */
+ if(!anyHeadersFound)
+ /* Ignore the junk at the top */
+ continue;
+
+ cli_dbgmsg("End of header information\n");
+ inHeader = FALSE;
+ bodyIsEmpty = TRUE;
+ } else {
+ char *ptr;
+ int lookahead;
+
+ if(fullline == NULL) {
+ char cmd[RFC2821LENGTH + 1], out[RFC2821LENGTH + 1];
+
+ /*
+ * Continuation of line we're ignoring?
+ */
+ if(isblank(line[0]))
+ continue;
+
+ /*
+ * Is this a header we're interested in?
+ */
+ if((strchr(line, ':') == NULL) ||
+ (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
+ if(strncmp(line, "From ", 5) == 0)
+ anyHeadersFound = TRUE;
+ continue;
+ }
+
+ ptr = rfc822comments(cmd, out);
+ commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
+
+ switch(commandNumber) {
+ case CONTENT_TRANSFER_ENCODING:
+ case CONTENT_DISPOSITION:
+ case CONTENT_TYPE:
+ anyHeadersFound = TRUE;
+ break;
+ default:
+ if(!anyHeadersFound)
+ anyHeadersFound = usefulHeader(commandNumber, cmd);
+ continue;
+ }
+ fullline = cli_strdup(line);
+ fulllinelength = strlen(line) + 1;
+ } else if(line != NULL) {
+ fulllinelength += strlen(line);
+ ptr = cli_realloc(fullline, fulllinelength);
+ if(ptr == NULL)
+ continue;
+ fullline = ptr;
+ strcat(fullline, line);
+ }
+
+ assert(fullline != NULL);
+
+ lookahead = getc(fin);
+ if(lookahead != EOF) {
+ ungetc(lookahead, fin);
+
+ /*
+ * Section B.2 of RFC822 says TAB or
+ * SPACE means a continuation of the
+ * previous entry.
+ *
+ * Add all the arguments on the line
+ */
+ if(isblank(lookahead))
+ continue;
+ }
+
+ /*
+ * Handle broken headers, where the next
+ * line isn't indented by whitespace
+ */
+ if(fullline[fulllinelength - 2] == ';')
+ /* Add arguments to this line */
+ continue;
+
+ if(line && (count_quotes(fullline) & 1))
+ continue;
+
+ ptr = rfc822comments(fullline, NULL);
+ if(ptr) {
+ free(fullline);
+ fullline = ptr;
+ }
+
+ if(parseEmailHeader(ret, fullline, rfc821) < 0)
+ continue;
+
+ free(fullline);
+ fullline = NULL;
+ }
+ } else if(line && isuuencodebegin(line)) {
+ /*
+ * Fast track visa to uudecode.
+ * TODO: binhex, yenc
+ */
+ bodyIsEmpty = FALSE;
+ if(uudecodeFile(ret, line, dir, fin) < 0)
+ if(messageAddStr(ret, line) < 0)
+ break;
+ } else {
+ if(line == NULL) {
+ /*
+ * Although this would save time and RAM, some
+ * phish signatures have been built which need
+ * the blank lines
+ */
+ if(lastBodyLineWasBlank &&
+ (messageGetMimeType(ret) != TEXT)) {
+ cli_dbgmsg("Ignoring consecutive blank lines in the body\n");
+ continue;
+ }
+ lastBodyLineWasBlank = TRUE;
+ } else {
+ if(bodyIsEmpty) {
+ /*
+ * Broken message: new line in the
+ * middle of the headers, so the first
+ * line of the body is in fact
+ * the last lines of the header
+ */
+ if(newline_in_header(line))
+ continue;
+ bodyIsEmpty = FALSE;
+ }
+ lastBodyLineWasBlank = FALSE;
+ }
+
+ if(messageAddStr(ret, line) < 0)
+ break;
+ }
+ } while(getline_from_mbox(buffer, sizeof(buffer) - 1, fin) != NULL);
+
+ if(boundary)
+ free(boundary);
+
+ if(fullline) {
+ if(*fullline) switch(commandNumber) {
+ case CONTENT_TRANSFER_ENCODING:
+ case CONTENT_DISPOSITION:
+ case CONTENT_TYPE:
+ cli_dbgmsg("parseEmailFile: Fullline unparsed '%s'\n", fullline);
+ }
+ free(fullline);
+ }
+
+ if(!anyHeadersFound) {
+ /*
+ * False positive in believing we have an e-mail when we don't
+ */
+ messageDestroy(ret);
+ cli_dbgmsg("parseEmailFile: no headers found, assuming it isn't an email\n");
+ return NULL;
+ }
+
+ cli_dbgmsg("parseEmailFile: return\n");
+
+ return ret;
+}
+
+/*
+ * The given message contains a raw e-mail.
+ *
+ * Returns the message's body with the correct arguments set, empties the
+ * given message's contents (note that it isn't destroyed)
+ *
+ * TODO: remove the duplication with parseEmailFile
+ */
+static message *
+parseEmailHeaders(message *m, const table_t *rfc821)
+{
+ bool inHeader = TRUE;
+ bool bodyIsEmpty = TRUE;
+ text *t;
+ message *ret;
+ bool anyHeadersFound = FALSE;
+ int commandNumber = -1;
+ char *fullline = NULL;
+ size_t fulllinelength = 0;
+
+ cli_dbgmsg("parseEmailHeaders\n");
+
+ if(m == NULL)
+ return NULL;
+
+ ret = messageCreate();
+
+ for(t = messageGetBody(m); t; t = t->t_next) {
+ const char *line;
+
+ if(t->t_line)
+ line = lineGetData(t->t_line);
+ else
+ line = NULL;
+
+ if(inHeader) {
+ cli_dbgmsg("parseEmailHeaders: check '%s'\n",
+ line ? line : "");
+ if(line == NULL) {
+ /*
+ * A blank line signifies the end of
+ * the header and the start of the text
+ */
+ cli_dbgmsg("End of header information\n");
+ if(!anyHeadersFound) {
+ cli_dbgmsg("Nothing interesting in the header\n");
+ break;
+ }
+ inHeader = FALSE;
+ bodyIsEmpty = TRUE;
+ } else {
+ char *ptr;
+
+ if(fullline == NULL) {
+ char cmd[RFC2821LENGTH + 1];
+
+ /*
+ * Continuation of line we're ignoring?
+ */
+ if(isblank(line[0]))
+ continue;
+
+ /*
+ * Is this a header we're interested in?
+ */
+ if((strchr(line, ':') == NULL) ||
+ (cli_strtokbuf(line, 0, ":", cmd) == NULL)) {
+ if(strncmp(line, "From ", 5) == 0)
+ anyHeadersFound = TRUE;
+ continue;
+ }
+
+ ptr = rfc822comments(cmd, NULL);
+ commandNumber = tableFind(rfc821, ptr ? ptr : cmd);
+ if(ptr)
+ free(ptr);
+
+ switch(commandNumber) {
+ case CONTENT_TRANSFER_ENCODING:
+ case CONTENT_DISPOSITION:
+ case CONTENT_TYPE:
+ anyHeadersFound = TRUE;
+ break;
+ default:
+ if(!anyHeadersFound)
+ anyHeadersFound = usefulHeader(commandNumber, cmd);
+ continue;
+ }
+ fullline = cli_strdup(line);
+ fulllinelength = strlen(line) + 1;
+ } else if(line) {
+ fulllinelength += strlen(line);
+ ptr = cli_realloc(fullline, fulllinelength);
+ if(ptr == NULL)
+ continue;
+ fullline = ptr;
+ strcat(fullline, line);
+ }
+ assert(fullline != NULL);
+
+ if(next_is_folded_header(t))
+ /* Add arguments to this line */
+ continue;
+
+ lineUnlink(t->t_line);
+ t->t_line = NULL;
+
+ if(count_quotes(fullline) & 1)
+ continue;
+
+ ptr = rfc822comments(fullline, NULL);
+ if(ptr) {
+ free(fullline);
+ fullline = ptr;
+ }
+
+ if(parseEmailHeader(ret, fullline, rfc821) < 0)
+ continue;
+
+ free(fullline);
+ fullline = NULL;
+ }
+ } else {
+ if(bodyIsEmpty) {
+ if(line == NULL)
+ /* throw away leading blank lines */
+ continue;
+ /*
+ * Broken message: new line in the
+ * middle of the headers, so the first
+ * line of the body is in fact
+ * the last lines of the header
+ */
+ if(newline_in_header(line))
+ continue;
+ bodyIsEmpty = FALSE;
+ }
+ /*if(t->t_line && isuuencodebegin(t->t_line))
+ puts("FIXME: add fast visa here");*/
+ cli_dbgmsg("parseEmailHeaders: inished with headers, moving body\n");
+ messageMoveText(ret, t, m);
+ break;
+ }
+ }
+
+ if(fullline) {
+ if(*fullline) switch(commandNumber) {
+ case CONTENT_TRANSFER_ENCODING:
+ case CONTENT_DISPOSITION:
+ case CONTENT_TYPE:
+ cli_dbgmsg("parseEmailHeaders: Fullline unparsed '%s'\n", fullline);
+ }
+ free(fullline);
+ }
+
+ if(!anyHeadersFound) {
+ /*
+ * False positive in believing we have an e-mail when we don't
+ */
+ messageDestroy(ret);
+ cli_dbgmsg("parseEmailHeaders: no headers found, assuming it isn't an email\n");
+ return NULL;
+ }
+
+ cli_dbgmsg("parseEmailHeaders: return\n");
+
+ return ret;
+}
+
+/*
+ * Handle a header line of an email message
+ */
+static int
+parseEmailHeader(message *m, const char *line, const table_t *rfc821)
+{
+ int ret;
+#ifdef CL_THREAD_SAFE
+ char *strptr;
+#endif
+ const char *separater;
+ char *cmd, *copy, tokenseparater[2];
+
+ cli_dbgmsg("parseEmailHeader '%s'\n", line);
+
+ /*
+ * In RFC822 the separater between the key a value is a colon,
+ * e.g. Content-Transfer-Encoding: base64
+ * However some MUA's are lapse about this and virus writers exploit
+ * this hole, so we need to check all known possiblities
+ */
+ for(separater = ":= "; *separater; separater++)
+ if(strchr(line, *separater) != NULL)
+ break;
+
+ if(*separater == '\0')
+ return -1;
+
+ copy = rfc2047(line);
+ if(copy == NULL)
+ /* an RFC checker would return -1 here */
+ copy = cli_strdup(line);
+
+ tokenseparater[0] = *separater;
+ tokenseparater[1] = '\0';
+
+ ret = -1;
+
+#ifdef CL_THREAD_SAFE
+ cmd = strtok_r(copy, tokenseparater, &strptr);
+#else
+ cmd = strtok(copy, tokenseparater);
+#endif
+
+ if(cmd && (strstrip(cmd) > 0)) {
+#ifdef CL_THREAD_SAFE
+ char *arg = strtok_r(NULL, "", &strptr);
+#else
+ char *arg = strtok(NULL, "");
+#endif
+
+ if(arg)
+ /*
+ * Found a header such as
+ * Content-Type: multipart/mixed;
+ * set arg to be
+ * "multipart/mixed" and cmd to
+ * be "Content-Type"
+ */
+ ret = parseMimeHeader(m, cmd, rfc821, arg);
+ }
+ free(copy);
+ return ret;
+}
+
+/*
+ * This is a recursive routine.
+ * FIXME: We are not passed &mrec so we can't check against MAX_MAIL_RECURSION
+ *
+ * This function parses the body of mainMessage and saves its attachments in dir
+ *
+ * mainMessage is the buffer to be parsed, it contains an e-mail's body, without
+ * any headers. First time of calling it'll be
+ * the whole message. Later it'll be parts of a multipart message
+ * textIn is the plain text message being built up so far
+ */
+static mbox_status
+parseEmailBody(message *messageIn, text *textIn, mbox_ctx *mctx, unsigned int recursion_level)
+{
+ mbox_status rc;
+ text *aText = textIn;
+ message *mainMessage = messageIn;
+ fileblob *fb;
+ bool infected = FALSE;
+ const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
+ const struct cl_limits *limits = mctx->ctx->limits;
+
+ cli_dbgmsg("in parseEmailBody, %u files saved so far\n",
+ mctx->files);
+
+ if(limits) {
+ if(limits->maxmailrec) {
+ const cli_ctx *ctx = mctx->ctx; /* needed for BLOCKMAX :-( */
+
+ /*
+ * This is approximate
+ */
+ if(recursion_level > limits->maxmailrec) {
+
+ cli_warnmsg("parseEmailBody: hit maximum recursion level (%u)\n", recursion_level);
+ if(BLOCKMAX) {
+ if(ctx->virname)
+ *ctx->virname = "MIME.RecursionLimit";
+ return VIRUS;
+ } else
+ return MAXREC;
+ }
+ }
+ if(limits->maxfiles && (mctx->files >= limits->maxfiles)) {
+ /*
+ * FIXME: This is only approx - it may have already
+ * been exceeded
+ */
+ cli_dbgmsg("parseEmailBody: number of files exceeded %u\n", limits->maxfiles);
+ return MAXFILES;
+ }
+ }
+
+ rc = OK;
+
+ /* Anything left to be parsed? */
+ if(mainMessage && (messageGetBody(mainMessage) != NULL)) {
+ mime_type mimeType;
+ int subtype, inhead, htmltextPart, inMimeHead, i;
+ const char *mimeSubtype;
+ char *protocol, *boundary;
+ const text *t_line;
+ /*bool isAlternative;*/
+ message *aMessage;
+ int multiparts = 0;
+ message **messages = NULL; /* parts of a multipart message */
+
+ cli_dbgmsg("Parsing mail file\n");
+
+ mimeType = messageGetMimeType(mainMessage);
+ mimeSubtype = messageGetMimeSubtype(mainMessage);
+
+ /* pre-process */
+ subtype = tableFind(mctx->subtypeTable, mimeSubtype);
+ if((mimeType == TEXT) && (subtype == PLAIN)) {
+ /*
+ * This is effectively no encoding, notice that we
+ * don't check that charset is us-ascii
+ */
+ cli_dbgmsg("text/plain: Assume no attachements\n");
+ mimeType = NOMIME;
+ messageSetMimeSubtype(mainMessage, "");
+ } else if((mimeType == MESSAGE) &&
+ (strcasecmp(mimeSubtype, "rfc822-headers") == 0)) {
+ /*
+ * RFC1892/RFC3462: section 2 text/rfc822-headers
+ * incorrectly sent as message/rfc822-headers
+ *
+ * Parse as text/plain, i.e. no mime
+ */
+ cli_dbgmsg("Changing message/rfc822-headers to text/rfc822-headers\n");
+ mimeType = NOMIME;
+ messageSetMimeSubtype(mainMessage, "");
+ } else
+ cli_dbgmsg("mimeType = %d\n", (int)mimeType);
+
+ switch(mimeType) {
+ case NOMIME:
+ cli_dbgmsg("Not a mime encoded message\n");
+ aText = textAddMessage(aText, mainMessage);
+ if(!doPhishingScan)
+ break;
+ /*
+ * Fall through: some phishing mails claim they are
+ * text/plain, when they are in fact html
+ */
+ case TEXT:
+ /* text/plain has been preprocessed as no encoding */
+ if(((mctx->ctx->options&CL_SCAN_MAILURL) && (subtype == HTML)) || doPhishingScan) {
+ /*
+ * It would be better to save and scan the
+ * file and only checkURLs if it's found to be
+ * clean
+ */
+ checkURLs(mainMessage, mctx, &rc, (subtype == HTML));
+ /*
+ * There might be html sent without subtype
+ * html too, so scan them for phishing
+ */
+ if(rc == VIRUS)
+ infected = TRUE;
+ }
+ break;
+ case MULTIPART:
+ cli_dbgmsg("Content-type 'multipart' handler\n");
+ boundary = messageFindArgument(mainMessage, "boundary");
+
+ if(boundary == NULL) {
+ cli_warnmsg("Multipart/%s MIME message contains no boundary header\n",
+ mimeSubtype);
+ /* Broken e-mail message */
+ mimeType = NOMIME;
+ /*
+ * The break means that we will still
+ * check if the file contains a uuencoded file
+ */
+ break;
+ }
+
+ /* Perhaps it should assume mixed? */
+ if(mimeSubtype[0] == '\0') {
+ cli_warnmsg("Multipart has no subtype assuming alternative\n");
+ mimeSubtype = "alternative";
+ messageSetMimeSubtype(mainMessage, "alternative");
+ }
+
+ /*
+ * Get to the start of the first message
+ */
+ t_line = messageGetBody(mainMessage);
+
+ if(t_line == NULL) {
+ cli_warnmsg("Multipart MIME message has no body\n");
+ free((char *)boundary);
+ mimeType = NOMIME;
+ break;
+ }
+
+ do
+ if(t_line->t_line) {
+ if(boundaryStart(lineGetData(t_line->t_line), boundary))
+ break;
+ /*
+ * Found a binhex file before
+ * the first multipart
+ * TODO: check yEnc
+ */
+ if(binhexBegin(mainMessage) == t_line) {
+ if(exportBinhexMessage(mctx, mainMessage)) {
+ /* virus found */
+ rc = VIRUS;
+ infected = TRUE;
+ break;
+ }
+ } else if(t_line->t_next &&
+ (encodingLine(mainMessage) == t_line->t_next)) {
+ /*
+ * We look for the next line
+ * since later on we'll skip
+ * over the important line when
+ * we think it's a blank line
+ * at the top of the message -
+ * which it would have been in
+ * an RFC compliant world
+ */
+ cli_dbgmsg("Found MIME attachment before the first MIME section \"%s\"\n",
+ lineGetData(t_line->t_next->t_line));
+ if(messageGetEncoding(mainMessage) == NOENCODING)
+ break;
+ }
+ }
+ while((t_line = t_line->t_next) != NULL);
+
+ if(t_line == NULL) {
+ cli_dbgmsg("Multipart MIME message contains no boundary lines (%s)\n",
+ boundary);
+ /*
+ * Free added by Thomas Lamy
+ * <Thomas.Lamy at in-online.net>
+ */
+ free((char *)boundary);
+ mimeType = NOMIME;
+ /*
+ * The break means that we will still
+ * check if the file contains a yEnc/binhex file
+ */
+ break;
+ }
+ /*
+ * Build up a table of all of the parts of this
+ * multipart message. Remember, each part may itself
+ * be a multipart message.
+ */
+ inhead = 1;
+ inMimeHead = 0;
+
+ /*
+ * Re-read this variable in case mimeSubtype has changed
+ */
+ subtype = tableFind(mctx->subtypeTable, mimeSubtype);
+
+ /*
+ * Parse the mainMessage object and create an array
+ * of objects called messages, one for each of the
+ * multiparts that mainMessage contains.
+ *
+ * This looks like parseEmailHeaders() - maybe there's
+ * some duplication of code to be cleaned up
+ *
+ * We may need to create an array rather than just
+ * save each part as it is found because not all
+ * elements will need scanning, and we don't yet know
+ * which of those elements it will be, except in
+ * the case of mixed, when all parts need to be scanned.
+ */
+ for(multiparts = 0; t_line && !infected; multiparts++) {
+ int lines = 0;
+ message **m;
+ mbox_status old_rc;
+
+ m = cli_realloc(messages, ((multiparts + 1) * sizeof(message *)));
+ if(m == NULL)
+ break;
+ messages = m;
+
+ aMessage = messages[multiparts] = messageCreate();
+ if(aMessage == NULL) {
+ multiparts--;
+ continue;
+ }
+ messageSetCTX(aMessage, mctx->ctx);
+
+ cli_dbgmsg("Now read in part %d\n", multiparts);
+
+ /*
+ * Ignore blank lines. There shouldn't be ANY
+ * but some viruses insert them
+ */
+ while((t_line = t_line->t_next) != NULL)
+ if(t_line->t_line &&
+ /*(cli_chomp(t_line->t_text) > 0))*/
+ (strlen(lineGetData(t_line->t_line)) > 0))
+ break;
+
+ if(t_line == NULL) {
+ cli_dbgmsg("Empty part\n");
+ /*
+ * Remove this part unless there's
+ * a binhex portion somewhere in
+ * the complete message that we may
+ * throw away by mistake if the MIME
+ * encoding information is incorrect
+ */
+ if(mainMessage &&
+ (binhexBegin(mainMessage) == NULL)) {
+ messageDestroy(aMessage);
+ --multiparts;
+ }
+ continue;
+ }
+
+ do {
+ const char *line = lineGetData(t_line->t_line);
+
+ /*cli_dbgmsg("multipart %d: inMimeHead %d inhead %d boundary '%s' line '%s' next '%s'\n",
+ multiparts, inMimeHead, inhead, boundary, line,
+ t_line->t_next && t_line->t_next->t_line ? lineGetData(t_line->t_next->t_line) : "(null)");*/
+
+ if(inMimeHead) { /* continuation line */
+ if(line == NULL) {
+ /*inhead =*/ inMimeHead = 0;
+ continue;
+ }
+ /*
+ * Handle continuation lines
+ * because the previous line
+ * ended with a ; or this line
+ * starts with a white space
+ */
+ cli_dbgmsg("Multipart %d: About to add mime Argument '%s'\n",
+ multiparts, line);
+ /*
+ * Handle the case when it
+ * isn't really a continuation
+ * line:
+ * Content-Type: application/octet-stream;
+ * Content-Transfer-Encoding: base64
+ */
+ parseEmailHeader(aMessage, line, mctx->rfc821Table);
+
+ while(isspace((int)*line))
+ line++;
+
+ if(*line == '\0') {
+ inhead = inMimeHead = 0;
+ continue;
+ }
+ inMimeHead = FALSE;
+ messageAddArgument(aMessage, line);
+ } else if(inhead) { /* handling normal headers */
+ /*int quotes;*/
+ char *fullline, *ptr;
+
+ if(line == NULL) {
+ /*
+ * empty line, should the end of the headers,
+ * but some base64 decoders, e.g. uudeview, are broken
+ * and will handle this type of entry, decoding the
+ * base64 content...
+ * Content-Type: application/octet-stream; name=text.zip
+ * Content-Transfer-Encoding: base64
+ * Content-Disposition: attachment; filename="text.zip"
+ *
+ * Content-Disposition: attachment;
+ * filename=text.zip
+ * Content-Type: application/octet-stream;
+ * name=text.zip
+ * Content-Transfer-Encoding: base64
+ *
+ * UEsDBAoAAAAAAACgPjJ2RHw676gAAO+oAABEAAAAbWFpbF90ZXh0LWluZm8udHh0ICAgICAgICAg
+ */
+ const text *next = t_line->t_next;
+
+ if(next && next->t_line) {
+ const char *data = lineGetData(next->t_line);
+
+ if((messageGetEncoding(aMessage) == NOENCODING) &&
+ (messageGetMimeType(aMessage) == APPLICATION) &&
+ strstr(data, "base64")) {
+ /*
+ * Handle this nightmare (note the blank
+ * line in the header and the incorrect
+ * content-transfer-encoding header)
+ *
+ * Content-Type: application/octet-stream; name="zipped_files.EXEX-Spanska: Yes
+ *
+ * r-Encoding: base64
+ * Content-Disposition: attachment; filename="zipped_files.EXE"
+ */
+ messageSetEncoding(aMessage, "base64");
+ cli_dbgmsg("Ignoring fake end of headers\n");
+ continue;
+ }
+ if((strncmp(data, "Content", 7) == 0) ||
+ (strncmp(data, "filename=", 9) == 0)) {
+ cli_dbgmsg("Ignoring fake end of headers\n");
+ continue;
+ }
+ }
+ cli_dbgmsg("Multipart %d: End of header information\n",
+ multiparts);
+ inhead = 0;
+ continue;
+ }
+ if(isspace((int)*line)) {
+ /*
+ * The first line is
+ * continuation line.
+ * This is tricky
+ * to handle, but
+ * all we can do is our
+ * best
+ */
+ cli_dbgmsg("Part %d starts with a continuation line\n",
+ multiparts);
+ messageAddArgument(aMessage, line);
+ /*
+ * Give it a default
+ * MIME type since
+ * that may be the
+ * missing line
+ *
+ * Choose application to
+ * force a save
+ */
+ if(messageGetMimeType(aMessage) == NOMIME)
+ messageSetMimeType(aMessage, "application");
+ continue;
+ }
+
+ inMimeHead = FALSE;
+
+ assert(strlen(line) <= RFC2821LENGTH);
+
+ fullline = rfc822comments(line, NULL);
+ if(fullline == NULL)
+ fullline = cli_strdup(line);
+
+ /*quotes = count_quotes(fullline);*/
+
+ /*
+ * Fold next lines to the end of this
+ * if they start with a white space
+ * or if this line has an odd number of quotes:
+ * Content-Type: application/octet-stream; name="foo
+ * "
+ */
+ while(t_line && next_is_folded_header(t_line)) {
+ const char *data;
+
+ t_line = t_line->t_next;
+
+ data = lineGetData(t_line->t_line);
+
+ if(data[1] == '\0') {
+ /*
+ * Broken message: the
+ * blank line at the end
+ * of the headers isn't blank -
+ * it contains a space
+ */
+ cli_dbgmsg("Multipart %d: headers not terminated by blank line\n",
+ multiparts);
+ inhead = FALSE;
+ break;
+ }
+
+ ptr = cli_realloc(fullline,
+ strlen(fullline) + strlen(data) + 1);
+
+ if(ptr == NULL)
+ break;
+
+ fullline = ptr;
+ strcat(fullline, data);
+
+ /*quotes = count_quotes(data);*/
+ }
+
+ cli_dbgmsg("Multipart %d: About to parse folded header '%s'\n",
+ multiparts, fullline);
+
+ parseEmailHeader(aMessage, fullline, mctx->rfc821Table);
+ free(fullline);
+ } else if(boundaryEnd(line, boundary)) {
+ /*
+ * Some viruses put information
+ * *after* the end of message,
+ * which presumably some broken
+ * mail clients find, so we
+ * can't assume that this
+ * is the end of the message
+ */
+ /* t_line = NULL;*/
+ break;
+ } else if(boundaryStart(line, boundary)) {
+ inhead = 1;
+ break;
+ } else {
+ if(messageAddLine(aMessage, t_line->t_line) < 0)
+ break;
+ lines++;
+ }
+ } while((t_line = t_line->t_next) != NULL);
+
+ cli_dbgmsg("Part %d has %d lines, rc = %d\n",
+ multiparts, lines, (int)rc);
+
+ /*
+ * Only save in the array of messages if some
+ * decision will be taken on whether to scan.
+ * If all parts will be scanned then save to
+ * file straight away
+ */
+ switch(subtype) {
+ case MIXED:
+ case ALTERNATIVE:
+ case REPORT:
+ case DIGEST:
+ case APPLEDOUBLE:
+ case KNOWBOT:
+ case -1:
+ old_rc = rc;
+ mainMessage = do_multipart(mainMessage,
+ messages, multiparts,
+ &rc, mctx, messageIn,
+ &aText, recursion_level);
+ if((rc == OK_ATTACHMENTS_NOT_SAVED) && (old_rc == OK))
+ rc = OK;
+ if(messages[multiparts]) {
+ messageDestroy(messages[multiparts]);
+ messages[multiparts] = NULL;
+ }
+ --multiparts;
+ if(rc == VIRUS)
+ infected = TRUE;
+ break;
+ }
+ }
+
+ free((char *)boundary);
+
+ /*
+ * Preprocess. Anything special to be done before
+ * we handle the multiparts?
+ */
+ switch(subtype) {
+ case KNOWBOT:
+ /* TODO */
+ cli_dbgmsg("multipart/knowbot parsed as multipart/mixed for now\n");
+ mimeSubtype = "mixed";
+ break;
+ case -1:
+ /*
+ * According to section 7.2.6 of
+ * RFC1521, unrecognised multiparts
+ * should be treated as multipart/mixed.
+ */
+ cli_dbgmsg("Unsupported multipart format `%s', parsed as mixed\n", mimeSubtype);
+ mimeSubtype = "mixed";
+ break;
+ }
+
+ /*
+ * We've finished message we're parsing
+ */
+ if(mainMessage && (mainMessage != messageIn)) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ }
+
+ cli_dbgmsg("The message has %d parts\n", multiparts);
+
+ if(infected || ((multiparts == 0) && (aText == NULL))) {
+ if(messages) {
+ for(i = 0; i < multiparts; i++)
+ if(messages[i])
+ messageDestroy(messages[i]);
+ free(messages);
+ }
+ if(aText && (textIn == NULL))
+ textDestroy(aText);
+
+ /*
+ * Nothing to do
+ */
+ switch(rc) {
+ case VIRUS: return VIRUS;
+ case MAXREC: return MAXREC;
+ default: return OK_ATTACHMENTS_NOT_SAVED;
+ }
+ }
+
+ cli_dbgmsg("Find out the multipart type (%s)\n", mimeSubtype);
+
+ /*
+ * We now have all the parts of the multipart message
+ * in the messages array:
+ * message *messages[multiparts]
+ * Let's decide what to do with them all
+ */
+ switch(tableFind(mctx->subtypeTable, mimeSubtype)) {
+ case RELATED:
+ cli_dbgmsg("Multipart related handler\n");
+ /*
+ * Have a look to see if there's HTML code
+ * which will need scanning
+ */
+ aMessage = NULL;
+ assert(multiparts > 0);
+
+ htmltextPart = getTextPart(messages, multiparts);
+
+ if(htmltextPart >= 0) {
+ if(messageGetBody(messages[htmltextPart]))
+ aText = textAddMessage(aText, messages[htmltextPart]);
+ } else
+ /*
+ * There isn't an HTML bit. If there's a
+ * multipart bit, it'll may be in there
+ * somewhere
+ */
+ for(i = 0; i < multiparts; i++)
+ if(messageGetMimeType(messages[i]) == MULTIPART) {
+ aMessage = messages[i];
+ htmltextPart = i;
+ break;
+ }
+
+ if(htmltextPart == -1)
+ cli_dbgmsg("No HTML code found to be scanned\n");
+ else {
+ rc = parseEmailBody(aMessage, aText, mctx, recursion_level + 1);
+ if((rc == OK) && aMessage) {
+ assert(aMessage == messages[htmltextPart]);
+ messageDestroy(aMessage);
+ messages[htmltextPart] = NULL;
+ } else if(rc == VIRUS) {
+ infected = TRUE;
+ break;
+ }
+ }
+
+ /*
+ * Fixed based on an idea from Stephen White <stephen at earth.li>
+ * The message is confused about the difference
+ * between alternative and related. Badtrans.B
+ * suffers from this problem.
+ *
+ * Fall through in this case:
+ * Content-Type: multipart/related;
+ * type="multipart/alternative"
+ */
+ /*
+ * Changed to always fall through based on
+ * an idea from Michael Dankov <misha at btrc.ru>
+ * that some viruses are completely confused
+ * about the difference between related
+ * and mixed
+ */
+ /*cptr = messageFindArgument(mainMessage, "type");
+ if(cptr == NULL)
+ break;
+ isAlternative = (bool)(strcasecmp(cptr, "multipart/alternative") == 0);
+ free((char *)cptr);
+ if(!isAlternative)
+ break;*/
+ case DIGEST:
+ /*
+ * According to section 5.1.5 RFC2046, the
+ * default mime type of multipart/digest parts
+ * is message/rfc822
+ *
+ * We consider them as alternative, wrong in
+ * the strictest sense since they aren't
+ * alternatives - all parts a valid - but it's
+ * OK for our needs since it means each part
+ * will be scanned
+ */
+ case ALTERNATIVE:
+ cli_dbgmsg("Multipart alternative handler\n");
+
+ /*
+ * Fall through - some clients are broken and
+ * say alternative instead of mixed. The Klez
+ * virus is broken that way, and anyway we
+ * wish to scan all of the alternatives
+ */
+ case REPORT:
+ /*
+ * According to section 1 of RFC1892, the
+ * syntax of multipart/report is the same
+ * as multipart/mixed. There are some required
+ * parameters, but there's no need for us to
+ * verify that they exist
+ */
+ case MIXED:
+ case APPLEDOUBLE: /* not really supported */
+ /*
+ * Look for attachments
+ *
+ * Not all formats are supported. If an
+ * unsupported format turns out to be
+ * common enough to implement, it is a simple
+ * matter to add it
+ */
+ if(aText) {
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ }
+
+ cli_dbgmsg("Mixed message with %d parts\n", multiparts);
+ for(i = 0; i < multiparts; i++) {
+ mainMessage = do_multipart(mainMessage,
+ messages, i, &rc, mctx,
+ messageIn, &aText, recursion_level + 1);
+ if(rc == VIRUS) {
+ infected = TRUE;
+ break;
+ }
+ if(rc == MAXREC)
+ break;
+ }
+
+ /* rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1); */
+ break;
+ case SIGNED:
+ case PARALLEL:
+ /*
+ * If we're here it could be because we have a
+ * multipart/mixed message, consisting of a
+ * message followed by an attachment. That
+ * message itself is a multipart/alternative
+ * message and we need to dig out the plain
+ * text part of that alternative
+ */
+ htmltextPart = getTextPart(messages, multiparts);
+ if(htmltextPart == -1)
+ htmltextPart = 0;
+
+ rc = parseEmailBody(messages[htmltextPart], aText, mctx, recursion_level + 1);
+ break;
+ case ENCRYPTED:
+ rc = FAIL; /* Not yet handled */
+ protocol = (char *)messageFindArgument(mainMessage, "protocol");
+ if(protocol) {
+ if(strcasecmp(protocol, "application/pgp-encrypted") == 0) {
+ /* RFC2015 */
+ cli_warnmsg("PGP encoded attachment not scanned\n");
+ rc = OK_ATTACHMENTS_NOT_SAVED;
+ } else
+ cli_warnmsg("Unknown encryption protocol '%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", protocol);
+ free(protocol);
+ } else
+ cli_dbgmsg("Encryption method missing protocol name\n");
+
+ break;
+ default:
+ assert(0);
+ }
+
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+
+ if(aText && (textIn == NULL)) {
+ if((!infected) && (fb = fileblobCreate()) != NULL) {
+ cli_dbgmsg("Save non mime and/or text/plain part\n");
+ fileblobSetFilename(fb, mctx->dir, "textpart");
+ /*fileblobAddData(fb, "Received: by clamd (textpart)\n", 30);*/
+ fileblobSetCTX(fb, mctx->ctx);
+ (void)textToFileblob(aText, fb, 1);
+
+ fileblobDestroy(fb);
+ mctx->files++;
+ }
+ textDestroy(aText);
+ }
+
+ for(i = 0; i < multiparts; i++)
+ if(messages[i])
+ messageDestroy(messages[i]);
+
+ if(messages)
+ free(messages);
+
+ return rc;
+
+ case MESSAGE:
+ /*
+ * Check for forbidden encodings
+ */
+ switch(messageGetEncoding(mainMessage)) {
+ case NOENCODING:
+ case EIGHTBIT:
+ case BINARY:
+ break;
+ default:
+ cli_warnmsg("MIME type 'message' cannot be decoded\n");
+ break;
+ }
+ rc = FAIL;
+ if((strcasecmp(mimeSubtype, "rfc822") == 0) ||
+ (strcasecmp(mimeSubtype, "delivery-status") == 0)) {
+ message *m = parseEmailHeaders(mainMessage, mctx->rfc821Table);
+ if(m) {
+ cli_dbgmsg("Decode rfc822\n");
+
+ messageSetCTX(m, mctx->ctx);
+
+ if(mainMessage && (mainMessage != messageIn)) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ } else
+ messageReset(mainMessage);
+ if(messageGetBody(m))
+ rc = parseEmailBody(m, NULL, mctx, recursion_level + 1);
+
+ messageDestroy(m);
+ }
+ break;
+ } else if(strcasecmp(mimeSubtype, "disposition-notification") == 0) {
+ /* RFC 2298 - handle like a normal email */
+ rc = OK;
+ break;
+ } else if(strcasecmp(mimeSubtype, "partial") == 0) {
+#ifdef PARTIAL_DIR
+ /* RFC1341 message split over many emails */
+ if(rfc1341(mainMessage, mctx->dir) >= 0)
+ rc = OK;
+#else
+ cli_warnmsg("Partial message received from MUA/MTA - message cannot be scanned\n");
+#endif
+ } else if(strcasecmp(mimeSubtype, "external-body") == 0)
+ /* TODO */
+ cli_warnmsg("Attempt to send Content-type message/external-body trapped");
+ else
+ cli_warnmsg("Unsupported message format `%s' - if you believe this file contains a virus, submit it to www.clamav.net\n", mimeSubtype);
+
+
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+ if(messages)
+ free(messages);
+ return rc;
+
+ default:
+ cli_warnmsg("Message received with unknown mime encoding - assume application");
+ /*
+ * Some Yahoo emails attach as
+ * Content-Type: X-unknown/unknown;
+ * instead of
+ * Content-Type: application/unknown;
+ * so let's try our best to salvage something
+ */
+ case APPLICATION:
+ /*cptr = messageGetMimeSubtype(mainMessage);
+
+ if((strcasecmp(cptr, "octet-stream") == 0) ||
+ (strcasecmp(cptr, "x-msdownload") == 0)) {*/
+ {
+ fb = messageToFileblob(mainMessage, mctx->dir, 1);
+
+ if(fb) {
+ cli_dbgmsg("Saving main message as attachment\n");
+ if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+ rc = VIRUS;
+ mctx->files++;
+ if(mainMessage != messageIn) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ } else
+ messageReset(mainMessage);
+ }
+ } /*else
+ cli_warnmsg("Discarded application not sent as attachment\n");*/
+ break;
+
+ case AUDIO:
+ case VIDEO:
+ case IMAGE:
+ break;
+ }
+
+ if(messages) {
+ /* "can't happen" */
+ cli_warnmsg("messages != NULL, report to http://bugs.clamav.net\n");
+ free(messages);
+ }
+ }
+
+ if(aText && (textIn == NULL)) {
+ /* Look for a bounce in the text (non mime encoded) portion */
+ const text *t;
+ /* isBounceStart() is expensive, reduce the number of calls */
+ bool lookahead_definately_is_bounce = FALSE;
+
+ for(t = aText; t && (rc != VIRUS); t = t->t_next) {
+ const line_t *l = t->t_line;
+ const text *lookahead, *topofbounce;
+ const char *s;
+ bool inheader;
+
+ if(l == NULL) {
+ /* assert(lookahead_definately_is_bounce == FALSE) */
+ continue;
+ }
+
+ if(lookahead_definately_is_bounce)
+ lookahead_definately_is_bounce = FALSE;
+ else if(!isBounceStart(lineGetData(l)))
+ continue;
+
+ lookahead = t->t_next;
+ if(lookahead) {
+ if(isBounceStart(lineGetData(lookahead->t_line))) {
+ lookahead_definately_is_bounce = TRUE;
+ /* don't save worthless header lines */
+ continue;
+ }
+ } else /* don't save a single liner */
+ break;
+
+ /*
+ * We've found what looks like the start of a bounce
+ * message. Only bother saving if it really is a bounce
+ * message, this helps to speed up scanning of ping-pong
+ * messages that have lots of bounces within bounces in
+ * them
+ */
+ for(; lookahead; lookahead = lookahead->t_next) {
+ l = lookahead->t_line;
+
+ if(l == NULL)
+ break;
+ s = lineGetData(l);
+ if(strncasecmp(s, "Content-Type:", 13) == 0) {
+ /*
+ * Don't bother with text/plain or
+ * text/html
+ */
+ if(strcasestr(s, "text/plain") != NULL)
+ /*
+ * Don't bother to save the
+ * unuseful part, read past
+ * the headers then we'll go
+ * on to look for the next
+ * bounce message
+ */
+ continue;
+ if((!doPhishingScan) &&
+ (strcasestr(s, "text/html") != NULL))
+ continue;
+ break;
+ }
+ }
+
+ if(lookahead && (lookahead->t_line == NULL)) {
+ cli_dbgmsg("Non mime part bounce message is not mime encoded, so it will not be scanned\n");
+ t = lookahead;
+ /* look for next bounce message */
+ continue;
+ }
+
+ /*
+ * Prescan the bounce message to see if there's likely
+ * to be anything nasty.
+ * This algorithm is hand crafted and may be breakable
+ * so all submissions are welcome. It's best NOT to
+ * remove this however you may be tempted, because it
+ * significantly speeds up the scanning of multiple
+ * bounces (i.e. bounces within many bounces)
+ */
+ for(; lookahead; lookahead = lookahead->t_next) {
+ l = lookahead->t_line;
+
+ if(l) {
+ s = lineGetData(l);
+ if((strncasecmp(s, "Content-Type:", 13) == 0) &&
+ (strstr(s, "multipart/") == NULL) &&
+ (strstr(s, "message/rfc822") == NULL) &&
+ (strstr(s, "text/plain") == NULL))
+ break;
+ }
+ }
+ if(lookahead == NULL) {
+ cli_dbgmsg("cli_mbox: I believe it's plain text which must be clean\n");
+ /* nothing here, move along please */
+ break;
+ }
+ if((fb = fileblobCreate()) == NULL)
+ break;
+ cli_dbgmsg("Save non mime part bounce message\n");
+ fileblobSetFilename(fb, mctx->dir, "bounce");
+ fileblobAddData(fb, (const unsigned char *)"Received: by clamd (bounce)\n", 28);
+ fileblobSetCTX(fb, mctx->ctx);
+
+ inheader = TRUE;
+ topofbounce = NULL;
+ do {
+ l = t->t_line;
+
+ if(l == NULL) {
+ if(inheader) {
+ inheader = FALSE;
+ topofbounce = t;
+ }
+ } else {
+ s = lineGetData(l);
+ fileblobAddData(fb, (const unsigned char *)s, strlen(s));
+ }
+ fileblobAddData(fb, (const unsigned char *)"\n", 1);
+ lookahead = t->t_next;
+ if(lookahead == NULL)
+ break;
+ t = lookahead;
+ l = t->t_line;
+ if((!inheader) && l) {
+ s = lineGetData(l);
+ if(isBounceStart(s)) {
+ cli_dbgmsg("Found the start of another bounce candidate (%s)\n", s);
+ lookahead_definately_is_bounce = TRUE;
+ break;
+ }
+ }
+ } while(!fileblobInfected(fb));
+
+ if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+ rc = VIRUS;
+ mctx->files++;
+
+ if(topofbounce)
+ t = topofbounce;
+ }
+ textDestroy(aText);
+ aText = NULL;
+ }
+
+ /*
+ * No attachments - scan the text portions, often files
+ * are hidden in HTML code
+ */
+ if(mainMessage && (rc != VIRUS)) {
+ text *t_line;
+
+ /*
+ * Look for uu-encoded main file
+ */
+ if((encodingLine(mainMessage) != NULL) &&
+ ((t_line = bounceBegin(mainMessage)) != NULL))
+ rc = (exportBounceMessage(mctx, t_line) == CL_VIRUS) ? VIRUS : OK;
+ else {
+ bool saveIt;
+
+ if(messageGetMimeType(mainMessage) == MESSAGE)
+ /*
+ * Quick peek, if the encapsulated
+ * message has no
+ * content encoding statement don't
+ * bother saving to scan, it's safe
+ */
+ saveIt = (bool)(encodingLine(mainMessage) != NULL);
+ else if((t_line = encodingLine(mainMessage)) != NULL) {
+ /*
+ * Some bounces include the message
+ * body without the headers.
+ * FIXME: Unfortunately this generates a
+ * lot of false positives that a bounce
+ * has been found when it hasn't.
+ */
+ if((fb = fileblobCreate()) != NULL) {
+ cli_dbgmsg("Found a bounce message with no header at '%s'\n",
+ lineGetData(t_line->t_line));
+ fileblobSetFilename(fb, mctx->dir, "bounce");
+ fileblobAddData(fb,
+ (const unsigned char *)"Received: by clamd (bounce)\n",
+ 28);
+
+ fileblobSetCTX(fb, mctx->ctx);
+ if(fileblobScanAndDestroy(textToFileblob(t_line, fb, 1)) == CL_VIRUS)
+ rc = VIRUS;
+ mctx->files++;
+ }
+ saveIt = FALSE;
+ } else
+ /*
+ * Save the entire text portion,
+ * since it it may be an HTML file with
+ * a JavaScript virus or a phish
+ */
+ saveIt = TRUE;
+
+ if(saveIt) {
+ cli_dbgmsg("Saving text part to scan, rc = %d\n",
+ (int)rc);
+ if(saveTextPart(mctx, mainMessage, 1) == CL_VIRUS)
+ rc = VIRUS;
+
+ if(mainMessage != messageIn) {
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ } else
+ messageReset(mainMessage);
+ }
+ }
+ } /*else
+ rc = OK_ATTACHMENTS_NOT_SAVED; /* nothing saved */
+
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+
+ if((rc != FAIL) && infected)
+ rc = VIRUS;
+
+ cli_dbgmsg("parseEmailBody() returning %d\n", (int)rc);
+
+ return rc;
+}
+
+/*
+ * Is the current line the start of a new section?
+ *
+ * New sections start with --boundary
+ */
+static int
+boundaryStart(const char *line, const char *boundary)
+{
+ const char *ptr;
+ char *out;
+ int rc;
+ char buf[RFC2821LENGTH + 1];
+
+ if(line == NULL)
+ return 0; /* empty line */
+ if(boundary == NULL)
+ return 0;
+
+ /*cli_dbgmsg("boundaryStart: line = '%s' boundary = '%s'\n", line, boundary);*/
+
+ if((*line != '-') && (*line != '('))
+ return 0;
+
+ if(strchr(line, '-') == NULL)
+ return 0;
+
+ if(strlen(line) <= sizeof(buf)) {
+ out = NULL;
+ ptr = rfc822comments(line, buf);
+ } else
+ ptr = out = rfc822comments(line, NULL);
+
+ if(ptr == NULL)
+ ptr = line;
+
+ if((*ptr++ != '-') || (*ptr == '\0')) {
+ if(out)
+ free(out);
+ return 0;
+ }
+
+ /*
+ * Gibe.B3 is broken, it has:
+ * boundary="---- =_NextPart_000_01C31177.9DC7C000"
+ * but it's boundaries look like
+ * ------ =_NextPart_000_01C31177.9DC7C000
+ * notice the one too few '-'.
+ * Presumably this is a deliberate exploitation of a bug in some mail
+ * clients.
+ *
+ * The trouble is that this creates a lot of false positives for
+ * boundary conditions, if we're too lax about matches. We do our level
+ * best to avoid these false positives. For example if we have
+ * boundary="1" we want to ensure that we don't break out of every line
+ * that has -1 in it instead of starting --1. This needs some more work.
+ *
+ * Look with and without RFC822 comments stripped, I've seen some
+ * samples where () are taken as comments in boundaries and some where
+ * they're not. Irrespective of whatever RFC2822 says, we need to find
+ * viruses in both types of mails.
+ */
+ if((strstr(&ptr[1], boundary) != NULL) || (strstr(line, boundary) != NULL)) {
+ const char *k = ptr;
+
+ /*
+ * We need to ensure that we don't match --11=-=-=11 when
+ * looking for --1=-=-=1 in well behaved headers, that's a
+ * false positive problem mentioned above
+ */
+ rc = 0;
+ do
+ if(strcmp(++k, boundary) == 0) {
+ rc = 1;
+ break;
+ }
+ while(*k == '-');
+ if(rc == 0) {
+ k = &line[1];
+ do
+ if(strcmp(++k, boundary) == 0) {
+ rc = 1;
+ break;
+ }
+ while(*k == '-');
+ }
+ } else if(*ptr++ != '-')
+ rc = 0;
+ else
+ rc = (strcasecmp(ptr, boundary) == 0);
+
+ if(out)
+ free(out);
+
+ if(rc == 1)
+ cli_dbgmsg("boundaryStart: found %s in %s\n", boundary, line);
+
+ return rc;
+}
+
+/*
+ * Is the current line the end?
+ *
+ * The message ends with with --boundary--
+ */
+static int
+boundaryEnd(const char *line, const char *boundary)
+{
+ size_t len;
+
+ if(line == NULL)
+ return 0;
+
+ /*cli_dbgmsg("boundaryEnd: line = '%s' boundary = '%s'\n", line, boundary);*/
+
+ if(*line++ != '-')
+ return 0;
+ if(*line++ != '-')
+ return 0;
+ len = strlen(boundary);
+ if(strncasecmp(line, boundary, len) != 0)
+ return 0;
+ /*
+ * Use < rather than == because some broken mails have white
+ * space after the boundary
+ */
+ if(strlen(line) < (len + 2))
+ return 0;
+ line = &line[len];
+ if(*line++ != '-')
+ return 0;
+ if(*line == '-') {
+ cli_dbgmsg("boundaryEnd: found %s in %s\n", boundary, line);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Initialise the various lookup tables
+ */
+static int
+initialiseTables(table_t **rfc821Table, table_t **subtypeTable)
+{
+ const struct tableinit *tableinit;
+
+ /*
+ * Initialise the various look up tables
+ */
+ *rfc821Table = tableCreate();
+ assert(*rfc821Table != NULL);
+
+ for(tableinit = rfc821headers; tableinit->key; tableinit++)
+ if(tableInsert(*rfc821Table, tableinit->key, tableinit->value) < 0) {
+ tableDestroy(*rfc821Table);
+ *rfc821Table = NULL;
+ return -1;
+ }
+
+ *subtypeTable = tableCreate();
+ assert(*subtypeTable != NULL);
+
+ for(tableinit = mimeSubtypes; tableinit->key; tableinit++)
+ if(tableInsert(*subtypeTable, tableinit->key, tableinit->value) < 0) {
+ tableDestroy(*rfc821Table);
+ tableDestroy(*subtypeTable);
+ *rfc821Table = NULL;
+ *subtypeTable = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * If there's a HTML text version use that, otherwise
+ * use the first text part, otherwise just use the
+ * first one around. HTML text is most likely to include
+ * a scripting worm
+ *
+ * If we can't find one, return -1
+ */
+static int
+getTextPart(message *const messages[], size_t size)
+{
+ size_t i;
+ int textpart = -1;
+
+ for(i = 0; i < size; i++)
+ if(messages[i] && (messageGetMimeType(messages[i]) == TEXT)) {
+ if(strcasecmp(messageGetMimeSubtype(messages[i]), "html") == 0)
+ return (int)i;
+ textpart = (int)i;
+ }
+
+ return textpart;
+}
+
+/*
+ * strip -
+ * Remove the trailing spaces from a buffer. Don't call this directly,
+ * always call strstrip() which is a wrapper to this routine to be used with
+ * NUL terminated strings. This code looks a bit strange because of it's
+ * heritage from code that worked on strings that weren't necessarily NUL
+ * terminated.
+ * TODO: rewrite for clamAV
+ *
+ * Returns it's new length (a la strlen)
+ *
+ * len must be int not size_t because of the >= 0 test, it is sizeof(buf)
+ * not strlen(buf)
+ */
+static size_t
+strip(char *buf, int len)
+{
+ register char *ptr;
+ register size_t i;
+
+ if((buf == NULL) || (len <= 0))
+ return 0;
+
+ i = strlen(buf);
+ if(len > (int)(i + 1))
+ return i;
+ ptr = &buf[--len];
+
+#if defined(UNIX) || defined(C_LINUX) || defined(C_DARWIN) /* watch - it may be in shared text area */
+ do
+ if(*ptr)
+ *ptr = '\0';
+ while((--len >= 0) && (!isgraph(*--ptr)) && (*ptr != '\n') && (*ptr != '\r'));
+#else /* more characters can be displayed on DOS */
+ do
+#ifndef REAL_MODE_DOS
+ if(*ptr) /* C8.0 puts into a text area */
+#endif
+ *ptr = '\0';
+ while((--len >= 0) && ((*--ptr == '\0') || isspace((int)(*ptr & 0xFF))));
+#endif
+ return((size_t)(len + 1));
+}
+
+/*
+ * strstrip:
+ * Strip a given string
+ */
+size_t
+strstrip(char *s)
+{
+ if(s == (char *)NULL)
+ return(0);
+
+ return(strip(s, (int)strlen(s) + 1));
+}
+
+/*
+ * Returns 0 for OK, -1 for error
+ */
+static int
+parseMimeHeader(message *m, const char *cmd, const table_t *rfc821Table, const char *arg)
+{
+ char *copy, *p, *buf;
+ const char *ptr;
+ int commandNumber;
+
+ cli_dbgmsg("parseMimeHeader: cmd='%s', arg='%s'\n", cmd, arg);
+
+ copy = rfc822comments(cmd, NULL);
+ if(copy) {
+ commandNumber = tableFind(rfc821Table, copy);
+ free(copy);
+ } else
+ commandNumber = tableFind(rfc821Table, cmd);
+
+ copy = rfc822comments(arg, NULL);
+
+ if(copy)
+ ptr = copy;
+ else
+ ptr = arg;
+
+ buf = NULL;
+
+ switch(commandNumber) {
+ case CONTENT_TYPE:
+ /*
+ * Fix for non RFC1521 compliant mailers
+ * that send content-type: Text instead
+ * of content-type: Text/Plain, or
+ * just simply "Content-Type:"
+ */
+ if(arg == NULL)
+ /*
+ * According to section 4 of RFC1521:
+ * "Note also that a subtype specification is
+ * MANDATORY. There are no default subtypes"
+ *
+ * We have to break this and make an assumption
+ * for the subtype because virus writers and
+ * email client writers don't get it right
+ */
+ cli_warnmsg("Empty content-type received, no subtype specified, assuming text/plain; charset=us-ascii\n");
+ else if(strchr(ptr, '/') == NULL)
+ /*
+ * Empty field, such as
+ * Content-Type:
+ * which I believe is illegal according to
+ * RFC1521
+ */
+ cli_dbgmsg("Invalid content-type '%s' received, no subtype specified, assuming text/plain; charset=us-ascii\n", ptr);
+ else {
+ int i;
+
+ buf = cli_malloc(strlen(ptr) + 1);
+ if(buf == NULL) {
+ if(copy)
+ free(copy);
+ return -1;
+ }
+ /*
+ * Some clients are broken and
+ * put white space after the ;
+ */
+ if(*arg == '/') {
+ cli_warnmsg("Content-type '/' received, assuming application/octet-stream\n");
+ messageSetMimeType(m, "application");
+ messageSetMimeSubtype(m, "octet-stream");
+ } else {
+ /*
+ * The content type could be in quotes:
+ * Content-Type: "multipart/mixed"
+ * FIXME: this is a hack in that ignores
+ * the quotes, it doesn't handle
+ * them properly
+ */
+ while(isspace(*ptr))
+ ptr++;
+ if(ptr[0] == '\"')
+ ptr++;
+
+ if(ptr[0] != '/') {
+ char *s;
+#ifdef CL_THREAD_SAFE
+ char *strptr = NULL;
+#endif
+
+ s = cli_strtokbuf(ptr, 0, ";", buf);
+ /*
+ * Handle
+ * Content-Type: foo/bar multipart/mixed
+ * and
+ * Content-Type: multipart/mixed foo/bar
+ */
+ if(s && *s) {
+ char *buf2 = cli_strdup(buf);
+
+ if(buf2 == NULL) {
+ if(copy)
+ free(copy);
+ free(buf);
+ return -1;
+ }
+ for(;;) {
+#ifdef CL_THREAD_SAFE
+ int set = messageSetMimeType(m, strtok_r(s, "/", &strptr));
+#else
+ int set = messageSetMimeType(m, strtok(s, "/"));
+#endif
+
+ /*
+ * Stephen White <stephen at earth.li>
+ * Some clients put space after
+ * the mime type but before
+ * the ;
+ */
+#ifdef CL_THREAD_SAFE
+ s = strtok_r(NULL, ";", &strptr);
+#else
+ s = strtok(NULL, ";");
+#endif
+ if(s == NULL)
+ break;
+ if(set) {
+ size_t len = strstrip(s) - 1;
+ if(s[len] == '\"') {
+ s[len] = '\0';
+ len = strstrip(s);
+ }
+ if(len) {
+ if(strchr(s, ' '))
+ messageSetMimeSubtype(m,
+ cli_strtokbuf(s, 0, " ", buf2));
+ else
+ messageSetMimeSubtype(m, s);
+ }
+ }
+
+ while(*s && !isspace(*s))
+ s++;
+ if(*s++ == '\0')
+ break;
+ if(*s == '\0')
+ break;
+ }
+ free(buf2);
+ }
+ }
+ }
+
+ /*
+ * Add in all rest of the the arguments.
+ * e.g. if the header is this:
+ * Content-Type:', arg='multipart/mixed; boundary=foo
+ * we find the boundary argument set it
+ */
+ i = 1;
+ while(cli_strtokbuf(ptr, i++, ";", buf) != NULL) {
+ cli_dbgmsg("mimeArgs = '%s'\n", buf);
+
+ messageAddArguments(m, buf);
+ }
+ }
+ break;
+ case CONTENT_TRANSFER_ENCODING:
+ messageSetEncoding(m, ptr);
+ break;
+ case CONTENT_DISPOSITION:
+ buf = cli_malloc(strlen(ptr) + 1);
+ if(buf == NULL) {
+ if(copy)
+ free(copy);
+ return -1;
+ }
+ p = cli_strtokbuf(ptr, 0, ";", buf);
+ if(p) {
+ if(*p) {
+ messageSetDispositionType(m, p);
+ messageAddArgument(m, cli_strtokbuf(ptr, 1, ";", buf));
+ }
+ }
+ if(!messageHasFilename(m))
+ /*
+ * Handle this type of header, without
+ * a filename (e.g. some Worm.Torvil.D)
+ * Content-ID: <nRfkHdrKsAxRU>
+ * Content-Transfer-Encoding: base64
+ * Content-Disposition: attachment
+ */
+ messageAddArgument(m, "filename=unknown");
+ }
+ if(copy)
+ free(copy);
+ if(buf)
+ free(buf);
+
+ return 0;
+}
+
+/*
+ * Save the text portion of the message
+ */
+static int
+saveTextPart(mbox_ctx *mctx, message *m, int destroy_text)
+{
+ fileblob *fb;
+
+ messageAddArgument(m, "filename=textportion");
+ if((fb = messageToFileblob(m, mctx->dir, destroy_text)) != NULL) {
+ /*
+ * Save main part to scan that
+ */
+ cli_dbgmsg("Saving main message\n");
+
+ mctx->files++;
+ return fileblobScanAndDestroy(fb);
+ }
+ return CL_ETMPFILE;
+}
+
+/*
+ * Handle RFC822 comments in headers.
+ * If out == NULL, return a buffer without the comments, the caller must free
+ * the returned buffer
+ * Return NULL on error or if the input * has no comments.
+ * See secion 3.4.3 of RFC822
+ * TODO: handle comments that go on to more than one line
+ */
+static char *
+rfc822comments(const char *in, char *out)
+{
+ const char *iptr;
+ char *optr;
+ int backslash, inquote, commentlevel;
+
+ if(in == NULL)
+ return NULL;
+
+ if(strchr(in, '(') == NULL)
+ return NULL;
+
+ assert(out != in);
+
+ if(out == NULL) {
+ out = cli_malloc(strlen(in) + 1);
+ if(out == NULL)
+ return NULL;
+ }
+
+ backslash = commentlevel = inquote = 0;
+ optr = out;
+
+ cli_dbgmsg("rfc822comments: contains a comment\n");
+
+ for(iptr = in; *iptr; iptr++)
+ if(backslash) {
+ if(commentlevel == 0)
+ *optr++ = *iptr;
+ backslash = 0;
+ } else switch(*iptr) {
+ case '\\':
+ backslash = 1;
+ break;
+ case '\"':
+ *optr++ = '\"';
+ inquote = !inquote;
+ break;
+ case '(':
+ if(inquote)
+ *optr++ = '(';
+ else
+ commentlevel++;
+ break;
+ case ')':
+ if(inquote)
+ *optr++ = ')';
+ else if(commentlevel > 0)
+ commentlevel--;
+ break;
+ default:
+ if(commentlevel == 0)
+ *optr++ = *iptr;
+ }
+
+ if(backslash) /* last character was a single backslash */
+ *optr++ = '\\';
+ *optr = '\0';
+
+ /*strstrip(out);*/
+
+ cli_dbgmsg("rfc822comments '%s'=>'%s'\n", in, out);
+
+ return out;
+}
+
+/*
+ * Handle RFC2047 encoding. Returns a malloc'd buffer that the caller must
+ * free, or NULL on error
+ */
+static char *
+rfc2047(const char *in)
+{
+ char *out, *pout;
+ size_t len;
+
+ if((strstr(in, "=?") == NULL) || (strstr(in, "?=") == NULL))
+ return cli_strdup(in);
+
+ cli_dbgmsg("rfc2047 '%s'\n", in);
+ out = cli_malloc(strlen(in) + 1);
+
+ if(out == NULL)
+ return NULL;
+
+ pout = out;
+
+ /* For each RFC2047 string */
+ while(*in) {
+ char encoding, *ptr, *enctext;
+ message *m;
+ blob *b;
+
+ /* Find next RFC2047 string */
+ while(*in) {
+ if((*in == '=') && (in[1] == '?')) {
+ in += 2;
+ break;
+ }
+ *pout++ = *in++;
+ }
+ /* Skip over charset, find encoding */
+ while((*in != '?') && *in)
+ in++;
+ if(*in == '\0')
+ break;
+ encoding = *++in;
+ encoding = (char)tolower(encoding);
+
+ if((encoding != 'q') && (encoding != 'b')) {
+ cli_warnmsg("Unsupported RFC2047 encoding type '%c' - if you believe this file contains a virus, submit it to www.clamav.net\n", encoding);
+ free(out);
+ out = NULL;
+ break;
+ }
+ /* Skip to encoded text */
+ if(*++in != '?')
+ break;
+ if(*++in == '\0')
+ break;
+
+ enctext = cli_strdup(in);
+ if(enctext == NULL) {
+ free(out);
+ out = NULL;
+ break;
+ }
+ in = strstr(in, "?=");
+ if(in == NULL) {
+ free(enctext);
+ break;
+ }
+ in += 2;
+ ptr = strstr(enctext, "?=");
+ assert(ptr != NULL);
+ *ptr = '\0';
+ /*cli_dbgmsg("Need to decode '%s' with method '%c'\n", enctext, encoding);*/
+
+ m = messageCreate();
+ if(m == NULL)
+ break;
+ messageAddStr(m, enctext);
+ free(enctext);
+ switch(encoding) {
+ case 'q':
+ messageSetEncoding(m, "quoted-printable");
+ break;
+ case 'b':
+ messageSetEncoding(m, "base64");
+ break;
+ }
+ b = messageToBlob(m, 1);
+ len = blobGetDataSize(b);
+ cli_dbgmsg("Decoded as '%*.*s'\n", (int)len, (int)len,
+ (const char *)blobGetData(b));
+ memcpy(pout, blobGetData(b), len);
+ blobDestroy(b);
+ messageDestroy(m);
+ if(pout[len - 1] == '\n')
+ pout += len - 1;
+ else
+ pout += len;
+
+ }
+ if(out == NULL)
+ return NULL;
+
+ *pout = '\0';
+
+ cli_dbgmsg("rfc2047 returns '%s'\n", out);
+ return out;
+}
+
+#ifdef PARTIAL_DIR
+/*
+ * Handle partial messages
+ */
+static int
+rfc1341(message *m, const char *dir)
+{
+ fileblob *fb;
+ char *arg, *id, *number, *total, *oldfilename;
+ const char *tmpdir;
+ char pdir[NAME_MAX + 1];
+
+ id = (char *)messageFindArgument(m, "id");
+ if(id == NULL)
+ return -1;
+
+#ifdef C_CYGWIN
+ if((tmpdir = getenv("TEMP")) == (char *)NULL)
+ if((tmpdir = getenv("TMP")) == (char *)NULL)
+ if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
+ tmpdir = "C:\\";
+#else
+ if((tmpdir = getenv("TMPDIR")) == (char *)NULL)
+ if((tmpdir = getenv("TMP")) == (char *)NULL)
+ if((tmpdir = getenv("TEMP")) == (char *)NULL)
+#ifdef P_tmpdir
+ tmpdir = P_tmpdir;
+#else
+ tmpdir = "/tmp";
+#endif
+#endif
+
+ snprintf(pdir, sizeof(pdir) - 1, "%s/clamav-partial", tmpdir);
+
+ if((mkdir(pdir, S_IRWXU) < 0) && (errno != EEXIST)) {
+ cli_errmsg("Can't create the directory '%s'\n", pdir);
+ free(id);
+ return -1;
+ } else if(errno == EEXIST) {
+ struct stat statb;
+
+ if(stat(pdir, &statb) < 0) {
+ cli_errmsg("Partial directory %s: %s\n", pdir,
+ strerror(errno));
+ free(id);
+ return -1;
+ }
+ if(statb.st_mode&(S_IRWXG|S_IRWXO))
+ cli_warnmsg("Insecure partial directory %s (mode 0%o)\n",
+ pdir,
+#ifdef ACCESSPERMS
+ (int)(statb.st_mode&ACCESSPERMS)
+#else
+ (int)(statb.st_mode & 0777)
+#endif
+ );
+ }
+
+ number = (char *)messageFindArgument(m, "number");
+ if(number == NULL) {
+ free(id);
+ return -1;
+ }
+
+ oldfilename = messageGetFilename(m);
+
+ arg = cli_malloc(10 + strlen(id) + strlen(number));
+ if(arg) {
+ sprintf(arg, "filename=%s%s", id, number);
+ messageAddArgument(m, arg);
+ free(arg);
+ }
+
+ if(oldfilename) {
+ cli_warnmsg("Must reset to %s\n", oldfilename);
+ free(oldfilename);
+ }
+
+ if((fb = messageToFileblob(m, pdir, 0)) == NULL) {
+ free(id);
+ free(number);
+ return -1;
+ }
+
+ fileblobDestroy(fb);
+
+ total = (char *)messageFindArgument(m, "total");
+ cli_dbgmsg("rfc1341: %s, %s of %s\n", id, number, (total) ? total : "?");
+ if(total) {
+ int n = atoi(number);
+ int t = atoi(total);
+ DIR *dd = NULL;
+
+ free(total);
+ /*
+ * If it's the last one - reassemble it
+ * FIXME: this assumes that we receive the parts in order
+ */
+ if((n == t) && ((dd = opendir(pdir)) != NULL)) {
+ FILE *fout;
+ char outname[NAME_MAX + 1];
+ time_t now;
+
+ sanitiseName(id);
+
+ snprintf(outname, sizeof(outname) - 1, "%s/%s", dir, id);
+
+ cli_dbgmsg("outname: %s\n", outname);
+
+ fout = fopen(outname, "wb");
+ if(fout == NULL) {
+ cli_errmsg("Can't open '%s' for writing", outname);
+ free(id);
+ free(number);
+ closedir(dd);
+ return -1;
+ }
+
+ time(&now);
+ for(n = 1; n <= t; n++) {
+ char filename[NAME_MAX + 1];
+ const struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+
+ snprintf(filename, sizeof(filename), "%s%d", id, n);
+
+#ifdef HAVE_READDIR_R_3
+ while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *)readdir_r(dd, &result.d))) {
+#else /*!HAVE_READDIR_R*/
+ while((dent = readdir(dd))) {
+#endif
+ FILE *fin;
+ char buffer[BUFSIZ], fullname[NAME_MAX + 1];
+ int nblanks;
+ struct stat statb;
+
+#ifndef C_CYGWIN
+ if(dent->d_ino == 0)
+ continue;
+#endif
+
+ snprintf(fullname, sizeof(fullname) - 1,
+ "%s/%s", pdir, dent->d_name);
+
+ if(strncmp(filename, dent->d_name, strlen(filename)) != 0) {
+ if(!cli_leavetemps_flag)
+ continue;
+ if(stat(fullname, &statb) < 0)
+ continue;
+ if(now - statb.st_mtime > (time_t)(7 * 24 * 3600))
+ if(unlink(fullname) >= 0)
+ cli_warnmsg("removed old RFC1341 file %s\n", fullname);
+ continue;
+ }
+
+ fin = fopen(fullname, "rb");
+ if(fin == NULL) {
+ cli_errmsg("Can't open '%s' for reading", fullname);
+ fclose(fout);
+ unlink(outname);
+ free(id);
+ free(number);
+ closedir(dd);
+ return -1;
+ }
+ nblanks = 0;
+ while(fgets(buffer, sizeof(buffer) - 1, fin) != NULL)
+ /*
+ * Ensure that trailing newlines
+ * aren't copied
+ */
+ if(buffer[0] == '\n')
+ nblanks++;
+ else {
+ if(nblanks)
+ do
+ putc('\n', fout);
+ while(--nblanks > 0);
+ fputs(buffer, fout);
+ }
+ fclose(fin);
+
+ /* don't unlink if leave temps */
+ if(!cli_leavetemps_flag)
+ unlink(fullname);
+ break;
+ }
+ rewinddir(dd);
+ }
+ closedir(dd);
+ fclose(fout);
+ }
+ }
+ free(number);
+ free(id);
+
+ return 0;
+}
+#endif
+
+static void
+hrefs_done(blob *b, tag_arguments_t *hrefs)
+{
+ if(b)
+ blobDestroy(b);
+ html_tag_arg_free(hrefs);
+}
+
+/*
+ * This used to be part of checkURLs, split out, because phishingScan needs it
+ * too, and phishingScan might be used in situations where checkURLs is
+ * disabled (see ifdef)
+ */
+static blob *
+getHrefs(message *m, tag_arguments_t *hrefs)
+{
+ blob *b = messageToBlob(m, 0);
+ size_t len;
+
+ if(b == NULL)
+ return NULL;
+
+ len = blobGetDataSize(b);
+
+ if(len == 0) {
+ blobDestroy(b);
+ return NULL;
+ }
+
+ /* TODO: make this size customisable */
+ if(len > 100*1024) {
+ cli_warnmsg("Viruses pointed to by URLs not scanned in large message\n");
+ blobDestroy(b);
+ return NULL;
+ }
+
+ hrefs->count = 0;
+ hrefs->tag = hrefs->value = NULL;
+ hrefs->contents = NULL;
+
+ cli_dbgmsg("getHrefs: calling html_normalise_mem\n");
+ if(!html_normalise_mem(blobGetData(b), (off_t)len, NULL, hrefs,m->ctx->dconf)) {
+ blobDestroy(b);
+ return NULL;
+ }
+ cli_dbgmsg("getHrefs: html_normalise_mem returned\n");
+
+ /* TODO: Do we need to call remove_html_comments? */
+ return b;
+}
+
+/*
+ * validate URLs for phishes
+ * followurls: see if URLs point to malware
+ */
+static void
+checkURLs(message *mainMessage, mbox_ctx *mctx, mbox_status *rc, int is_html)
+{
+ blob *b;
+ tag_arguments_t hrefs;
+
+ if(*rc == VIRUS)
+ return;
+
+ hrefs.scanContents = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING & PHISHING_CONF_ENGINE);
+
+#if (!defined(FOLLOWURLS)) || (FOLLOWURLS <= 0)
+ if(!hrefs.scanContents)
+ /*
+ * Don't waste time extracting hrefs (parsing html), nobody
+ * will need it
+ */
+ return;
+#endif
+
+ hrefs.count = 0;
+ hrefs.tag = hrefs.value = NULL;
+ hrefs.contents = NULL;
+
+ b = getHrefs(mainMessage, &hrefs);
+ if(b) {
+ if(hrefs.scanContents) {
+ if(phishingScan(mainMessage, mctx->dir, mctx->ctx, &hrefs) == CL_VIRUS) {
+ /*
+ * FIXME: message objects' contents are
+ * encapsulated so we should not access
+ * the members directly
+ */
+ mainMessage->isInfected = TRUE;
+ *rc = VIRUS;
+ cli_dbgmsg("PH:Phishing found\n");
+ }
+ }
+ if(is_html && (mctx->ctx->options&CL_SCAN_MAILURL) && (*rc != VIRUS))
+ do_checkURLs(mctx->dir, &hrefs);
+ }
+ hrefs_done(b,&hrefs);
+}
+
+#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+static void
+do_checkURLs(const char *dir, tag_arguments_t *hrefs)
+{
+ table_t *t;
+ int i, n;
+#ifdef CL_THREAD_SAFE
+ pthread_t tid[FOLLOWURLS];
+ struct arg args[FOLLOWURLS];
+#endif
+
+ t = tableCreate();
+ if(t == NULL)
+ return;
+
+ n = 0;
+
+ /*
+ * Sort .exes higher up so that there's more chance they'll be
+ * downloaded and scanned
+ */
+ for(i = FOLLOWURLS; (i < hrefs->count) && (n < FOLLOWURLS); i++) {
+ char *url = (char *)hrefs->value[i];
+ char *ptr;
+
+ if(strncasecmp("http://", url, 7) != 0)
+ continue;
+
+ ptr = strrchr(url, '.');
+ if(ptr == NULL)
+ continue;
+ if(strcasecmp(ptr, ".exe") == 0) {
+ /* FIXME: Could be swapping with another .exe */
+ cli_dbgmsg("swap %s %s\n", hrefs->value[n], hrefs->value[i]);
+ ptr = (char *)hrefs->value[n];
+ hrefs->value[n++] = (unsigned char *)url;
+ hrefs->value[i] = (unsigned char *)ptr;
+ }
+ }
+
+ n = 0;
+
+ for(i = 0; i < hrefs->count; i++) {
+ const char *url = (const char *)hrefs->value[i];
+
+ /*
+ * TODO: If it's an image source, it'd be nice to note beacons
+ * where width="0" height="0", which needs support from
+ * the HTML normalise code
+ */
+ if(strncasecmp("http://", url, 7) == 0) {
+#ifndef CL_THREAD_SAFE
+ struct arg arg;
+#endif
+ char name[NAME_MAX + 1];
+
+ if(tableFind(t, url) == 1) {
+ cli_dbgmsg("URL %s already downloaded\n", url);
+ continue;
+ }
+ /*
+ * What about foreign character spoofing?
+ */
+ if(strchr(url, '%') && strchr(url, '@'))
+ cli_warnmsg("Possible URL spoofing attempt noticed, but not yet handled (%s)\n", url);
+
+ if(n == FOLLOWURLS) {
+ cli_warnmsg("URL %s will not be scanned (FOLLOWURLS limit %d was reached)\n",
+ url, FOLLOWURLS);
+ break;
+ }
+
+ (void)tableInsert(t, url, 1);
+ cli_dbgmsg("Downloading URL %s to be scanned\n", url);
+ strncpy(name, url, sizeof(name) - 1);
+ name[sizeof(name) - 1] = '\0';
+ sanitiseName(name); /* bug #538 */
+
+#ifdef CL_THREAD_SAFE
+ args[n].dir = dir;
+ args[n].url = cli_strdup(url);
+ args[n].filename = cli_strdup(name);
+ args[n].depth = 0;
+ pthread_create(&tid[n], NULL, getURL, &args[n]);
+#else
+ arg.url = cli_strdup(url);
+ arg.dir = dir;
+ arg.filename = name;
+ arg.depth = 0;
+ getURL(&arg);
+ free(arg.url);
+#endif
+ ++n;
+ }
+ }
+ tableDestroy(t);
+
+#ifdef CL_THREAD_SAFE
+ assert(n <= FOLLOWURLS);
+ cli_dbgmsg("checkURLs: waiting for %d thread(s) to finish\n", n);
+ while(--n >= 0) {
+ pthread_join(tid[n], NULL);
+ free(args[n].filename);
+ free(args[n].url);
+ }
+#endif
+}
+
+#else /*!FOLLOWURLS*/
+
+static void
+do_checkURLs(const char *dir, tag_arguments_t *hrefs)
+{
+}
+
+#endif
+
+#if defined(FOLLOWURLS) && (FOLLOWURLS > 0)
+/*
+ * Includes some Win32 patches by Gianluigi Tiesi <sherpya at netfarm.it>
+ *
+ * FIXME: Often WMF exploits work by sending people an email directing them
+ * to a page which displays a picture containing the exploit. This is not
+ * currently found, since only the HTML on the referred page is downloaded.
+ * It would be useful to scan the HTML for references to pictures and
+ * download them for scanning. But that will hit performance so there is
+ * an issue here.
+ */
+
+/*
+ * Simple implementation of a subset of RFC1945 (HTTP/1.0)
+ * TODO: HTTP/1.1 (RFC2068)
+ */
+static void *
+#ifdef CL_THREAD_SAFE
+getURL(void *a)
+#else
+getURL(struct arg *arg)
+#endif
+{
+ FILE *fp;
+#ifdef CL_THREAD_SAFE
+ struct arg *arg = (struct arg *)a;
+#endif
+ const char *url = arg->url;
+ const char *dir = arg->dir;
+ const char *filename = arg->filename;
+ SOCKET sd;
+ struct sockaddr_in server;
+#ifdef HAVE_IN_ADDR_T
+ in_addr_t ip;
+#else
+ unsigned int ip;
+#endif
+ in_port_t port;
+ static in_port_t default_port;
+ static int tcp;
+ int doingsite, firstpacket;
+ char *ptr;
+ long flags;
+ int via_proxy;
+ const char *proxy;
+ char buf[BUFSIZ + 1], site[BUFSIZ], fout[NAME_MAX + 1];
+
+ if(strlen(url) > (sizeof(site) - 1)) {
+ cli_dbgmsg("Ignoring long URL \"%s\"\n", url);
+ return NULL;
+ }
+
+ snprintf(fout, sizeof(fout) - 1, "%s/%s", dir, filename);
+
+ fp = fopen(fout, "wb");
+
+ if(fp == NULL) {
+ cli_errmsg("Can't open '%s' for writing\n", fout);
+ return NULL;
+ }
+ cli_dbgmsg("Saving %s to %s\n", url, fout);
+
+#ifndef C_BEOS
+ if(tcp == 0) {
+ const struct protoent *proto = getprotobyname("tcp");
+
+ if(proto == NULL) {
+ cli_warnmsg("Unknown prototol tcp, check /etc/protocols\n");
+ fclose(fp);
+ return NULL;
+ }
+ tcp = proto->p_proto;
+#ifndef C_WINDOWS
+ endprotoent();
+#endif
+ }
+#endif
+ if(default_port == 0) {
+ const struct servent *servent = getservbyname("http", "tcp");
+
+ if(servent)
+ default_port = (in_port_t)ntohs(servent->s_port);
+ else
+ default_port = 80;
+#if !defined(C_WINDOWS) && !defined(C_BEOS)
+ endservent();
+#endif
+ }
+ port = default_port;
+
+ doingsite = 1;
+ ptr = site;
+
+ proxy = getenv("http_proxy"); /* FIXME: handle no_proxy */
+
+ via_proxy = (proxy && *proxy);
+
+ if(via_proxy) {
+ if(strncasecmp(proxy, "http://", 7) != 0) {
+ cli_warnmsg("Unsupported proxy protocol (proxy = %s)\n",
+ proxy);
+ fclose(fp);
+ return NULL;
+ }
+
+ cli_dbgmsg("Getting %s via %s\n", url, proxy);
+
+ proxy += 7;
+ while(*proxy) {
+ if(doingsite && (*proxy == ':')) {
+ port = 0;
+ while(isdigit(*++proxy)) {
+ port *= 10;
+ port += *proxy - '0';
+ }
+ continue;
+ }
+ if(doingsite && (*proxy == '/')) {
+ proxy++;
+ break;
+ }
+ *ptr++ = *proxy++;
+ }
+ } else {
+ cli_dbgmsg("Getting %s\n", url);
+
+ if(strncasecmp(url, "http://", 7) != 0) {
+ cli_warnmsg("Unsupported protocol\n");
+ fclose(fp);
+ return NULL;
+ }
+
+ url += 7;
+ while(*url) {
+ if(doingsite && (*url == ':')) {
+ port = 0;
+ while(isdigit(*++url)) {
+ port *= 10;
+ port += *url - '0';
+ }
+ continue;
+ }
+ if(doingsite && (*url == '/')) {
+ url++;
+ break;
+ }
+ *ptr++ = *url++;
+ }
+ }
+ *ptr = '\0';
+
+ memset((char *)&server, '\0', sizeof(struct sockaddr_in));
+ server.sin_family = AF_INET;
+ server.sin_port = (in_port_t)htons(port);
+
+ ip = inet_addr(site);
+#ifdef INADDR_NONE
+ if(ip == INADDR_NONE) {
+#else
+ if(ip == (in_addr_t)-1) {
+#endif
+ struct hostent h;
+
+ if((my_r_gethostbyname(site, &h, buf, sizeof(buf)) != 0) ||
+ (h.h_addr_list == NULL) ||
+ (h.h_addr == NULL)) {
+ cli_dbgmsg("Unknown host %s\n", site);
+ fclose(fp);
+ return NULL;
+ }
+
+ memcpy((char *)&ip, h.h_addr, sizeof(ip));
+ }
+ if((sd = socket(AF_INET, SOCK_STREAM, tcp)) < 0) {
+ fclose(fp);
+ return NULL;
+ }
+#ifdef F_GETFL
+ flags = fcntl(sd, F_GETFL, 0);
+
+ if(flags == -1L)
+ cli_warnmsg("getfl: %s\n", strerror(errno));
+ else if(fcntl(sd, F_SETFL, (long)(flags | O_NONBLOCK)) < 0)
+ cli_warnmsg("setfl: %s\n", strerror(errno));
+#else
+ flags = -1L;
+#endif
+ server.sin_addr.s_addr = ip;
+ if(nonblock_connect(url, sd, (struct sockaddr *)&server) < 0) {
+ closesocket(sd);
+ fclose(fp);
+ return NULL;
+ }
+#ifdef F_SETFL
+ if(flags != -1L)
+ if(fcntl(sd, F_SETFL, flags))
+ cli_warnmsg("f_setfl: %s\n", strerror(errno));
+#endif
+
+ /*
+ * TODO: consider HTTP/1.1
+ */
+ if(via_proxy)
+ snprintf(buf, sizeof(buf) - 1,
+ "GET %s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
+ url, VERSION);
+ else
+ snprintf(buf, sizeof(buf) - 1,
+ "GET /%s HTTP/1.0\r\nUser-Agent: ClamAV %s\r\n\r\n",
+ url, VERSION);
+
+ /*cli_dbgmsg("%s", buf);*/
+
+ if(send(sd, buf, (int)strlen(buf), 0) < 0) {
+ closesocket(sd);
+ fclose(fp);
+ return NULL;
+ }
+
+#ifdef SHUT_WR
+ shutdown(sd, SHUT_WR);
+#else
+ shutdown(sd, 1);
+#endif
+
+ firstpacket = 1;
+
+ for(;;) {
+ fd_set set;
+ struct timeval tv;
+ int n;
+
+ FD_ZERO(&set);
+ FD_SET(sd, &set);
+
+ tv.tv_sec = 30; /* FIXME: make this customisable */
+ tv.tv_usec = 0;
+
+ if(select((int)sd + 1, &set, NULL, NULL, &tv) < 0) {
+ if(errno == EINTR)
+ continue;
+ closesocket(sd);
+ fclose(fp);
+ return NULL;
+ }
+ if(!FD_ISSET(sd, &set)) {
+ fclose(fp);
+ closesocket(sd);
+ return NULL;
+ }
+ n = recv(sd, buf, sizeof(buf) - 1, 0);
+
+ if(n < 0) {
+ fclose(fp);
+ closesocket(sd);
+ return NULL;
+ }
+ if(n == 0)
+ break;
+
+ /*
+ * FIXME: Handle header in more than one packet
+ */
+ if(firstpacket) {
+ char *statusptr;
+
+ buf[n] = '\0';
+
+ statusptr = cli_strtok(buf, 1, " ");
+
+ if(statusptr) {
+ int status = atoi(statusptr);
+
+ cli_dbgmsg("HTTP status %d\n", status);
+
+ free(statusptr);
+
+ if((status == 301) || (status == 302)) {
+ char *location;
+
+ location = strstr(buf, "\nLocation: ");
+
+ if(location) {
+ char *end;
+
+ unlink(fout);
+ if(arg->depth >= FOLLOWURLS) {
+ cli_warnmsg("URL %s will not be followed to %s (FOLLOWURLS limit %d was reached)\n",
+ arg->url, location, FOLLOWURLS);
+ break;
+ }
+
+ fclose(fp);
+ closesocket(sd);
+
+ location += 11;
+ free(arg->url);
+ end = location;
+ while(*end && (*end != '\n'))
+ end++;
+ *end = '\0';
+ arg->url = cli_strdup(location);
+ arg->depth++;
+ cli_dbgmsg("Redirecting to %s\n", arg->url);
+ return getURL(arg);
+ }
+ }
+ }
+ /*
+ * Don't write the HTTP header
+ */
+ if((ptr = strstr(buf, "\r\n\r\n")) != NULL) {
+ ptr += 4;
+ n -= (int)(ptr - buf);
+ } else if((ptr = strstr(buf, "\n\n")) != NULL) {
+ ptr += 2;
+ n -= (int)(ptr - buf);
+ } else
+ ptr = buf;
+
+ firstpacket = 0;
+ } else
+ ptr = buf;
+
+ if(n && (fwrite(ptr, n, 1, fp) != 1)) {
+ cli_warnmsg("Error writing %d bytes to %s\n",
+ n, fout);
+ break;
+ }
+ }
+
+ fclose(fp);
+ closesocket(sd);
+ return NULL;
+}
+
+/*
+ * Have a copy here because r_gethostbyname is in shared not libclamav :-(
+ */
+static int
+my_r_gethostbyname(const char *hostname, struct hostent *hp, char *buf, size_t len)
+{
+#if defined(HAVE_GETHOSTBYNAME_R_6)
+ /* e.g. Linux */
+ struct hostent *hp2;
+ int ret = -1;
+
+ if((hostname == NULL) || (hp == NULL))
+ return -1;
+ if(gethostbyname_r(hostname, hp, buf, len, &hp2, &ret) < 0)
+ return ret;
+#elif defined(HAVE_GETHOSTBYNAME_R_5)
+ /* e.g. BSD, Solaris, Cygwin */
+ /*
+ * Configure doesn't work on BeOS. We need -lnet to link, but configure
+ * doesn't add it, so you need to do something like
+ * LIBS=-lnet ./configure --enable-cache --disable-clamav
+ */
+ int ret = -1;
+
+ if((hostname == NULL) || (hp == NULL))
+ return -1;
+ if(gethostbyname_r(hostname, hp, buf, len, &ret) == NULL)
+ return ret;
+#elif defined(HAVE_GETHOSTBYNAME_R_3)
+ /* e.g. HP/UX, AIX */
+ if((hostname == NULL) || (hp == NULL))
+ return -1;
+ if(gethostbyname_r(hostname, &hp, (struct hostent_data *)buf) < 0)
+ return h_errno;
+#else
+ /* Single thread the code e.g. VS2005 */
+ struct hostent *hp2;
+#ifdef CL_THREAD_SAFE
+ static pthread_mutex_t hostent_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+ if((hostname == NULL) || (hp == NULL))
+ return -1;
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&hostent_mutex);
+#endif
+ if((hp2 = gethostbyname(hostname)) == NULL) {
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&hostent_mutex);
+#endif
+ return h_errno;
+ }
+ memcpy(hp, hp2, sizeof(struct hostent));
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&hostent_mutex);
+#endif
+
+#endif
+ return 0;
+}
+
+/*
+ * Non-blocking connect, based on an idea by Everton da Silva Marques
+ * <everton.marques at gmail.com>
+ */
+static int
+nonblock_connect(const char *url, SOCKET sock, const struct sockaddr *addr)
+{
+ int select_failures; /* Max. of unexpected select() failures */
+ int attempts;
+ struct timeval timeout; /* When we should time out */
+ int numfd; /* Highest fdset fd plus 1 */
+
+ gettimeofday(&timeout, 0); /* store when we started to connect */
+
+ if(connect(sock, addr, sizeof(struct sockaddr_in)) != 0)
+ switch(errno) {
+ case EALREADY:
+ case EINPROGRESS:
+ cli_dbgmsg("%s: connect: %s\n", url, strerror(errno));
+ break; /* wait for connection */
+ case EISCONN:
+ return 0; /* connected */
+ default:
+ cli_warnmsg("%s: connect: %s\n", url, strerror(errno));
+ return -1; /* failed */
+ }
+ else
+ return connect_error(url, sock);
+
+ numfd = (int)sock + 1;
+ select_failures = NONBLOCK_SELECT_MAX_FAILURES;
+ attempts = 1;
+ timeout.tv_sec += URL_TIMEOUT;
+
+ for (;;) {
+ int n, t;
+ fd_set fds;
+ struct timeval now, waittime;
+
+ /* Force timeout if we ran out of time */
+ gettimeofday(&now, 0);
+ t = (now.tv_sec == timeout.tv_sec) ?
+ (now.tv_usec > timeout.tv_usec) :
+ (now.tv_sec > timeout.tv_sec);
+
+ if(t) {
+ cli_warnmsg("%s: connect timeout (%d secs)\n",
+ url, URL_TIMEOUT);
+ break;
+ }
+
+ /* Calculate how long to wait */
+ waittime.tv_sec = timeout.tv_sec - now.tv_sec;
+ waittime.tv_usec = timeout.tv_usec - now.tv_usec;
+ if(waittime.tv_usec < 0) {
+ waittime.tv_sec--;
+ waittime.tv_usec += 1000000;
+ }
+
+ /* Init fds with 'sock' as the only fd */
+ FD_ZERO(&fds);
+ FD_SET(sock, &fds);
+
+ n = select(numfd, 0, &fds, 0, &waittime);
+ if(n < 0) {
+ cli_warnmsg("%s: select attempt %d %s\n",
+ url, select_failures, strerror(errno));
+ if(--select_failures >= 0)
+ continue; /* not timed-out, try again */
+ break; /* failed */
+ }
+
+ cli_dbgmsg("%s: select = %d\n", url, n);
+
+ if(n)
+ return connect_error(url, sock);
+
+ /* timeout */
+ if(attempts++ == NONBLOCK_MAX_ATTEMPTS) {
+ cli_warnmsg("timeout connecting to %s\n", url);
+ break;
+ }
+ }
+
+ return -1; /* failed */
+}
+
+static int
+connect_error(const char *url, SOCKET sock)
+{
+#ifdef SO_ERROR
+ int optval;
+ socklen_t optlen = sizeof(optval);
+
+ getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);
+
+ if(optval) {
+ cli_warnmsg("%s: %s\n", url, strerror(optval));
+ return -1;
+ }
+#endif
+
+ return 0;
+}
+
+#endif
+
+#ifdef HAVE_BACKTRACE
+static void
+sigsegv(int sig)
+{
+ signal(SIGSEGV, SIG_DFL);
+ print_trace(1);
+ exit(SIGSEGV);
+}
+
+static void
+print_trace(int use_syslog)
+{
+ void *array[10];
+ size_t size;
+ char **strings;
+ size_t i;
+ pid_t pid = getpid();
+
+ cli_errmsg("Segmentation fault, attempting to print backtrace\n");
+
+ size = backtrace(array, 10);
+ strings = backtrace_symbols(array, size);
+
+ cli_errmsg("Backtrace of pid %d:\n", pid);
+ if(use_syslog)
+ syslog(LOG_ERR, "Backtrace of pid %d:", pid);
+
+ for(i = 0; i < size; i++) {
+ cli_errmsg("%s\n", strings[i]);
+ if(use_syslog)
+ syslog(LOG_ERR, "bt[%u]: %s", i, strings[i]);
+ }
+
+#ifdef SAVE_TMP
+ cli_errmsg("The errant mail file has been saved\n");
+#endif
+ /* #else TODO: dump the current email */
+
+ free(strings);
+}
+#endif
+
+/* See also clamav-milter */
+static bool
+usefulHeader(int commandNumber, const char *cmd)
+{
+ switch(commandNumber) {
+ case CONTENT_TRANSFER_ENCODING:
+ case CONTENT_DISPOSITION:
+ case CONTENT_TYPE:
+ return TRUE;
+ default:
+ if(strcasecmp(cmd, "From") == 0)
+ return TRUE;
+ if(strcasecmp(cmd, "Received") == 0)
+ return TRUE;
+ if(strcasecmp(cmd, "De") == 0)
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/*
+ * Like fgets but cope with end of line by "\n", "\r\n", "\n\r", "\r"
+ */
+static char *
+getline_from_mbox(char *buffer, size_t len, FILE *fin)
+{
+ char *ret;
+
+ if(feof(fin))
+ return NULL;
+
+ if((len == 0) || (buffer == NULL)) {
+ cli_errmsg("Invalid call to getline_from_mbox(). Refer to http://www.clamav.net/bugs\n");
+ return NULL;
+ }
+
+ ret = buffer;
+
+ do {
+ int c = getc(fin);
+
+ if(ferror(fin))
+ return NULL;
+
+ switch(c) {
+ case '\n':
+ *buffer++ = '\n';
+ c = getc(fin);
+ if((c != '\r') && !feof(fin))
+ ungetc(c, fin);
+ break;
+ default:
+ *buffer++ = (char)c;
+ continue;
+ case EOF:
+ break;
+ case '\r':
+ *buffer++ = '\n';
+ c = getc(fin);
+ if((c != '\n') && !feof(fin))
+ ungetc(c, fin);
+ break;
+ }
+ break;
+ } while(--len > 1);
+
+ if(len == 0) {
+ /* the email probably breaks RFC821 */
+ cli_warnmsg("getline_from_mbox: buffer overflow stopped, line lost\n");
+ return NULL;
+ }
+ *buffer = '\0';
+
+ if(len == 1)
+ /* overflows will have appeared on separate lines */
+ cli_dbgmsg("getline_from_mbox: buffer overflow stopped, line recovered\n");
+
+ return ret;
+}
+
+/*
+ * Is this line a candidate for the start of a bounce message?
+ */
+static bool
+isBounceStart(const char *line)
+{
+ size_t len;
+
+ if(line == NULL)
+ return FALSE;
+ if(*line == '\0')
+ return FALSE;
+ /*if((strncmp(line, "From ", 5) == 0) && !isalnum(line[5]))
+ return FALSE;
+ if((strncmp(line, ">From ", 6) == 0) && !isalnum(line[6]))
+ return FALSE;*/
+
+ len = strlen(line);
+ if((len < 6) || (len >= 72))
+ return FALSE;
+
+ if((memcmp(line, "From ", 5) == 0) ||
+ (memcmp(line, ">From ", 6) == 0)) {
+ int numSpaces = 0, numDigits = 0;
+
+ line += 4;
+
+ do
+ if(*line == ' ')
+ numSpaces++;
+ else if(isdigit((*line) & 0xFF))
+ numDigits++;
+ while(*++line != '\0');
+
+ if(numSpaces < 6)
+ return FALSE;
+ if(numDigits < 11)
+ return FALSE;
+ return TRUE;
+ }
+ return cli_filetype((const unsigned char *)line, len) == CL_TYPE_MAIL;
+}
+
+/*
+ * Extract a binhexEncoded message, return if it's found to be infected as we
+ * extract it
+ */
+static bool
+exportBinhexMessage(mbox_ctx *mctx, message *m)
+{
+ bool infected = FALSE;
+ fileblob *fb;
+
+ if(messageGetEncoding(m) == NOENCODING)
+ messageSetEncoding(m, "x-binhex");
+
+ fb = messageToFileblob(m, mctx->dir, 0);
+
+ if(fb) {
+ cli_dbgmsg("Binhex file decoded to %s\n",
+ fileblobGetFilename(fb));
+
+ if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+ infected = TRUE;
+ mctx->files++;
+ } else
+ cli_errmsg("Couldn't decode binhex file to %s\n", mctx->dir);
+
+ return infected;
+}
+
+/*
+ * Locate any bounce message and extract it. Return cl_status
+ */
+static int
+exportBounceMessage(mbox_ctx *mctx, text *start)
+{
+ int rc = CL_CLEAN;
+ text *t;
+ fileblob *fb;
+
+ /*
+ * Attempt to save the original (unbounced)
+ * message - clamscan will find that in the
+ * directory and call us again (with any luck)
+ * having found an e-mail message to handle.
+ *
+ * This finds a lot of false positives, the
+ * search that a content type is in the
+ * bounce (i.e. it's after the bounce header)
+ * helps a bit.
+ *
+ * messageAddLine
+ * optimisation could help here, but needs
+ * careful thought, do it with line numbers
+ * would be best, since the current method in
+ * messageAddLine of checking encoding first
+ * must remain otherwise non bounce messages
+ * won't be scanned
+ */
+ for(t = start; t; t = t->t_next) {
+ const char *txt = lineGetData(t->t_line);
+ char cmd[RFC2821LENGTH + 1];
+
+ if(txt == NULL)
+ continue;
+ if(cli_strtokbuf(txt, 0, ":", cmd) == NULL)
+ continue;
+
+ switch(tableFind(mctx->rfc821Table, cmd)) {
+ case CONTENT_TRANSFER_ENCODING:
+ if((strstr(txt, "7bit") == NULL) &&
+ (strstr(txt, "8bit") == NULL))
+ break;
+ continue;
+ case CONTENT_DISPOSITION:
+ break;
+ case CONTENT_TYPE:
+ if(strstr(txt, "text/plain") != NULL)
+ t = NULL;
+ break;
+ default:
+ if(strcasecmp(cmd, "From") == 0)
+ start = t;
+ else if(strcasecmp(cmd, "Received") == 0)
+ start = t;
+ continue;
+ }
+ break;
+ }
+ if(t && ((fb = fileblobCreate()) != NULL)) {
+ cli_dbgmsg("Found a bounce message\n");
+ fileblobSetFilename(fb, mctx->dir, "bounce");
+ fileblobSetCTX(fb, mctx->ctx);
+ if(textToFileblob(start, fb, 1) == NULL) {
+ cli_dbgmsg("Nothing new to save in the bounce message\n");
+ fileblobDestroy(fb);
+ } else
+ rc = fileblobScanAndDestroy(fb);
+ mctx->files++;
+ } else
+ cli_dbgmsg("Not found a bounce message\n");
+
+ return rc;
+}
+
+/*
+ * Handle the ith element of a number of multiparts, e.g. multipart/alternative
+ */
+static message *
+do_multipart(message *mainMessage, message **messages, int i, mbox_status *rc, mbox_ctx *mctx, message *messageIn, text **tptr, unsigned int recursion_level)
+{
+ bool addToText = FALSE;
+ const char *dtype;
+#ifndef SAVE_TO_DISC
+ message *body;
+#endif
+ message *aMessage = messages[i];
+ const int doPhishingScan = mctx->ctx->engine->dboptions&CL_DB_PHISHING_URLS && (DCONF_PHISHING&PHISHING_CONF_ENGINE);
+
+ if(aMessage == NULL)
+ return mainMessage;
+
+ if(*rc != OK)
+ return mainMessage;
+
+ cli_dbgmsg("Mixed message part %d is of type %d\n",
+ i, messageGetMimeType(aMessage));
+
+ switch(messageGetMimeType(aMessage)) {
+ case APPLICATION:
+ case AUDIO:
+ case IMAGE:
+ case VIDEO:
+ break;
+ case NOMIME:
+ cli_dbgmsg("No mime headers found in multipart part %d\n", i);
+ if(mainMessage) {
+ if(binhexBegin(aMessage)) {
+ cli_dbgmsg("Found binhex message in multipart/mixed mainMessage\n");
+
+ if(exportBinhexMessage(mctx, mainMessage))
+ *rc = VIRUS;
+ }
+ if(mainMessage != messageIn)
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ } else if(aMessage) {
+ if(binhexBegin(aMessage)) {
+ cli_dbgmsg("Found binhex message in multipart/mixed non mime part\n");
+ if(exportBinhexMessage(mctx, aMessage))
+ *rc = VIRUS;
+ assert(aMessage == messages[i]);
+ messageReset(messages[i]);
+ }
+ }
+ addToText = TRUE;
+ if(messageGetBody(aMessage) == NULL)
+ /*
+ * No plain text version
+ */
+ cli_dbgmsg("No plain text alternative\n");
+ break;
+ case TEXT:
+ dtype = messageGetDispositionType(aMessage);
+ cli_dbgmsg("Mixed message text part disposition \"%s\"\n",
+ dtype);
+ if(strcasecmp(dtype, "attachment") == 0)
+ break;
+ if((*dtype == '\0') || (strcasecmp(dtype, "inline") == 0)) {
+ const char *cptr;
+
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ cptr = messageGetMimeSubtype(aMessage);
+ cli_dbgmsg("Mime subtype \"%s\"\n", cptr);
+ if((tableFind(mctx->subtypeTable, cptr) == PLAIN) &&
+ (messageGetEncoding(aMessage) == NOENCODING)) {
+ /*
+ * Strictly speaking, a text/plain part
+ * is not an attachment. We pretend it
+ * is so that we can decode and scan it
+ */
+ if(!messageHasFilename(aMessage)) {
+ cli_dbgmsg("Adding part to main message\n");
+ addToText = TRUE;
+ } else
+ cli_dbgmsg("Treating inline as attachment\n");
+ } else {
+ const int is_html = (tableFind(mctx->subtypeTable, cptr) == HTML);
+ if((mctx->ctx->options&CL_SCAN_MAILURL) && is_html)
+ checkURLs(aMessage, mctx, rc, 1);
+ else if(doPhishingScan)
+ checkURLs(aMessage, mctx, rc, is_html);
+ messageAddArgument(aMessage,
+ "filename=mixedtextportion");
+ }
+ break;
+ }
+ cli_dbgmsg("Text type %s is not supported\n", dtype);
+ return mainMessage;
+ case MESSAGE:
+ /* Content-Type: message/rfc822 */
+ cli_dbgmsg("Found message inside multipart (encoding type %d)\n",
+ messageGetEncoding(aMessage));
+#ifndef SCAN_UNENCODED_BOUNCES
+ switch(messageGetEncoding(aMessage)) {
+ case NOENCODING:
+ case EIGHTBIT:
+ case BINARY:
+ if(encodingLine(aMessage) == NULL) {
+ /*
+ * This means that the message
+ * has no attachments
+ *
+ * The test for
+ * messageGetEncoding is needed
+ * since encodingLine won't have
+ * been set if the message
+ * itself has been encoded
+ */
+ cli_dbgmsg("Unencoded multipart/message will not be scanned\n");
+ assert(aMessage == messages[i]);
+ messageDestroy(messages[i]);
+ messages[i] = NULL;
+ return mainMessage;
+ }
+ /* FALLTHROUGH */
+ default:
+ cli_dbgmsg("Encoded multipart/message will be scanned\n");
+ }
+#endif
+#if 0
+ messageAddStrAtTop(aMessage,
+ "Received: by clamd (message/rfc822)");
+#endif
+#ifdef SAVE_TO_DISC
+ /*
+ * Save this embedded message
+ * to a temporary file
+ */
+ if(saveTextPart(mctx, aMessage, 1) == CL_VIRUS)
+ *rc = VIRUS;
+ assert(aMessage == messages[i]);
+ messageDestroy(messages[i]);
+ messages[i] = NULL;
+#else
+ /*
+ * Scan in memory, faster but is open to DoS attacks
+ * when many nested levels are involved.
+ */
+ body = parseEmailHeaders(aMessage, mctx->rfc821Table);
+
+ /*
+ * We've fininished with the
+ * original copy of the message,
+ * so throw that away and
+ * deal with the encapsulated
+ * message as a message.
+ * This can save a lot of memory
+ */
+ assert(aMessage == messages[i]);
+ messageDestroy(messages[i]);
+ messages[i] = NULL;
+ if(body) {
+ messageSetCTX(body, mctx->ctx);
+ *rc = parseEmailBody(body, NULL, mctx, recursion_level + 1);
+ if((*rc == OK) && messageContainsVirus(body))
+ *rc = VIRUS;
+ messageDestroy(body);
+ }
+#endif
+ return mainMessage;
+ case MULTIPART:
+ /*
+ * It's a multi part within a multi part
+ * Run the message parser on this bit, it won't
+ * be an attachment
+ */
+ cli_dbgmsg("Found multipart inside multipart\n");
+ if(aMessage) {
+ /*
+ * The headers were parsed when reading in the
+ * whole multipart section
+ */
+ *rc = parseEmailBody(aMessage, *tptr, mctx, recursion_level + 1);
+ cli_dbgmsg("Finished recursion, rc = %d\n", (int)*rc);
+ assert(aMessage == messages[i]);
+ messageDestroy(messages[i]);
+ messages[i] = NULL;
+ } else {
+ *rc = parseEmailBody(NULL, NULL, mctx, recursion_level + 1);
+ if(mainMessage && (mainMessage != messageIn))
+ messageDestroy(mainMessage);
+ mainMessage = NULL;
+ }
+ return mainMessage;
+ default:
+ cli_warnmsg("Only text and application attachments are fully supported, type = %d\n",
+ messageGetMimeType(aMessage));
+ /* fall through - we may be able to salvage something */
+ }
+
+ if(*rc != VIRUS) {
+ if(addToText) {
+ cli_dbgmsg("Adding to non mime-part\n");
+ if(messageGetBody(aMessage))
+ *tptr = textMove(*tptr, messageGetBody(aMessage));
+ } else {
+ fileblob *fb = messageToFileblob(aMessage, mctx->dir, 1);
+
+ if(fb) {
+ if(fileblobScanAndDestroy(fb) == CL_VIRUS)
+ *rc = VIRUS;
+ mctx->files++;
+ }
+ }
+ if(messageContainsVirus(aMessage))
+ *rc = VIRUS;
+ }
+ messageDestroy(aMessage);
+ messages[i] = NULL;
+
+ return mainMessage;
+}
+
+/*
+ * Returns the number of quote characters in the given string
+ */
+static int
+count_quotes(const char *buf)
+{
+ int quotes = 0;
+
+ while(*buf)
+ if(*buf++ == '\"')
+ quotes++;
+
+ return quotes;
+}
+
+/*
+ * Will the next line be a folded header? See RFC2822 section 2.2.3
+ */
+static bool
+next_is_folded_header(const text *t)
+{
+ const text *next = t->t_next;
+ const char *data, *ptr;
+
+ if(next == NULL)
+ return FALSE;
+
+ if(next->t_line == NULL)
+ return FALSE;
+
+ data = lineGetData(next->t_line);
+
+ /*
+ * Section B.2 of RFC822 says TAB or SPACE means a continuation of the
+ * previous entry.
+ */
+ if(isblank(data[0]))
+ return TRUE;
+
+ if(strchr(data, '=') == NULL)
+ /*
+ * Avoid false positives with
+ * Content-Type: text/html;
+ * Content-Transfer-Encoding: quoted-printable
+ */
+ return FALSE;
+
+ /*
+ * Some are broken and don't fold headers lines
+ * correctly as per section 2.2.3 of RFC2822.
+ * Generally they miss the white space at
+ * the start of the fold line:
+ * Content-Type: multipart/related;
+ * type="multipart/alternative";
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
+ * should read:
+ * Content-Type: multipart/related;
+ * type="multipart/alternative";
+ * boundary="----=_NextPart_000_006A_01C6AC47.348CB550"
+ * Since we're a virus checker not an RFC
+ * verifier we need to handle these
+ */
+ data = lineGetData(t->t_line);
+
+ ptr = strchr(data, '\0');
+
+ while(--ptr > data)
+ switch(*ptr) {
+ case ';':
+ return TRUE;
+ case '\n':
+ case ' ':
+ case '\r':
+ case '\t':
+ continue; /* white space at end of line */
+ default:
+ return FALSE;
+ }
+ return FALSE;
+}
+
+/*
+ * This routine is called on the first line of the body of
+ * an email to handle broken messages that have newlines
+ * in the middle of its headers
+ */
+static bool
+newline_in_header(const char *line)
+{
+ cli_dbgmsg("newline_in_header, check \"%s\"\n", line);
+
+ if(strncmp(line, "Message-Id: ", 12) == 0)
+ return TRUE;
+ if(strncmp(line, "Date: ", 6) == 0)
+ return TRUE;
+
+ return FALSE;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_md5.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,271 @@
+/*
+ * This is an OpenSSL-compatible implementation of the RSA Data Security,
+ * Inc. MD5 Message-Digest Algorithm (RFC 1321).
+ *
+ * Written by Solar Designer <solar at openwall.com> in 2001, and placed
+ * in the public domain. There's absolutely no warranty.
+ *
+ * This differs from Colin Plumb's older public domain implementation in
+ * that no 32-bit integer data type is required, there's no compile-time
+ * endianness configuration, and the function prototypes match OpenSSL's.
+ * The primary goals are portability and ease of use.
+ *
+ * This implementation is meant to be fast, but not as fast as possible.
+ * Some known optimizations are not included to reduce source code size
+ * and avoid compile-time configuration.
+ */
+
+#include <string.h>
+
+#include "md5.h"
+
+/*
+ * The basic MD5 functions.
+ *
+ * F and G are optimized compared to their RFC 1321 definitions for
+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb's
+ * implementation.
+ */
+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z) ((x) ^ (y) ^ (z))
+#define I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define STEP(f, a, b, c, d, x, t, s) \
+ (a) += f((b), (c), (d)) + (x) + (t); \
+ (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+ (a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them
+ * in a properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures that tolerate unaligned
+ * memory accesses is just an optimization. Nothing will break if it
+ * doesn't work.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define SET(n) \
+ (*(MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+ SET(n)
+#else
+#define SET(n) \
+ (ctx->block[(n)] = \
+ (MD5_u32plus)ptr[(n) * 4] | \
+ ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
+#define GET(n) \
+ (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update
+ * the bit counters. There are no alignment requirements.
+ */
+static void *body(cli_md5_ctx *ctx, void *data, unsigned long size)
+{
+ unsigned char *ptr;
+ MD5_u32plus a, b, c, d;
+ MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+ ptr = data;
+
+ a = ctx->a;
+ b = ctx->b;
+ c = ctx->c;
+ d = ctx->d;
+
+ do {
+ saved_a = a;
+ saved_b = b;
+ saved_c = c;
+ saved_d = d;
+
+/* Round 1 */
+ STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+ STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+ STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+ STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+ STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+ STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+ STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+ STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+ STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+ STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+ STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+ STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+ STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+ STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+ STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+ STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+/* Round 2 */
+ STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+ STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+ STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+ STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+ STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+ STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+ STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+ STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+ STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+ STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+ STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+ STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+ STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+ STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+ STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+ STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+/* Round 3 */
+ STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+ STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
+ STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+ STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
+ STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+ STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+ STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+ STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
+ STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+ STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
+ STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+ STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
+ STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+ STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
+ STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+ STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+/* Round 4 */
+ STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+ STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+ STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+ STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+ STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+ STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+ STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+ STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+ STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+ STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+ STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+ STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+ STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+ STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+ STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+ STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+ a += saved_a;
+ b += saved_b;
+ c += saved_c;
+ d += saved_d;
+
+ ptr += 64;
+ } while (size -= 64);
+
+ ctx->a = a;
+ ctx->b = b;
+ ctx->c = c;
+ ctx->d = d;
+
+ return ptr;
+}
+
+void cli_md5_init(cli_md5_ctx *ctx)
+{
+ ctx->a = 0x67452301;
+ ctx->b = 0xefcdab89;
+ ctx->c = 0x98badcfe;
+ ctx->d = 0x10325476;
+
+ ctx->lo = 0;
+ ctx->hi = 0;
+}
+
+void cli_md5_update(cli_md5_ctx *ctx, void *data, unsigned long size)
+{
+ MD5_u32plus saved_lo;
+ unsigned long used, free;
+
+ saved_lo = ctx->lo;
+ if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+ ctx->hi++;
+ ctx->hi += size >> 29;
+
+ used = saved_lo & 0x3f;
+
+ if (used) {
+ free = 64 - used;
+
+ if (size < free) {
+ memcpy(&ctx->buffer[used], data, size);
+ return;
+ }
+
+ memcpy(&ctx->buffer[used], data, free);
+ data = (unsigned char *)data + free;
+ size -= free;
+ body(ctx, ctx->buffer, 64);
+ }
+
+ if (size >= 64) {
+ data = body(ctx, data, size & ~(unsigned long)0x3f);
+ size &= 0x3f;
+ }
+
+ memcpy(ctx->buffer, data, size);
+}
+
+void cli_md5_final(unsigned char *result, cli_md5_ctx *ctx)
+{
+ unsigned long used, free;
+
+ used = ctx->lo & 0x3f;
+
+ ctx->buffer[used++] = 0x80;
+
+ free = 64 - used;
+
+ if (free < 8) {
+ memset(&ctx->buffer[used], 0, free);
+ body(ctx, ctx->buffer, 64);
+ used = 0;
+ free = 64;
+ }
+
+ memset(&ctx->buffer[used], 0, free - 8);
+
+ ctx->lo <<= 3;
+ ctx->buffer[56] = ctx->lo;
+ ctx->buffer[57] = ctx->lo >> 8;
+ ctx->buffer[58] = ctx->lo >> 16;
+ ctx->buffer[59] = ctx->lo >> 24;
+ ctx->buffer[60] = ctx->hi;
+ ctx->buffer[61] = ctx->hi >> 8;
+ ctx->buffer[62] = ctx->hi >> 16;
+ ctx->buffer[63] = ctx->hi >> 24;
+
+ body(ctx, ctx->buffer, 64);
+
+ result[0] = ctx->a;
+ result[1] = ctx->a >> 8;
+ result[2] = ctx->a >> 16;
+ result[3] = ctx->a >> 24;
+ result[4] = ctx->b;
+ result[5] = ctx->b >> 8;
+ result[6] = ctx->b >> 16;
+ result[7] = ctx->b >> 24;
+ result[8] = ctx->c;
+ result[9] = ctx->c >> 8;
+ result[10] = ctx->c >> 16;
+ result[11] = ctx->c >> 24;
+ result[12] = ctx->d;
+ result[13] = ctx->d >> 8;
+ result[14] = ctx->d >> 16;
+ result[15] = ctx->d >> 24;
+
+ memset(ctx, 0, sizeof(*ctx));
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_message.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,2830 @@
+/*
+ * Copyright (C) 2002-2006 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ * TODO: Optimise messageExport, decodeLine, messageIsEncoding
+ */
+static char const rcsid[] = "$Id: message.c,v 1.195 2007/02/12 20:46:09 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG /* map CLAMAV debug onto standard */
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT /* for Solaris 2.8 */
+#endif
+#endif
+
+#ifdef C_DARWIN
+#include <sys/types.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+
+#ifdef CL_THREAD_SAFE
+#include <pthread.h>
+#endif
+
+#include "others.h"
+#include "str.h"
+#include "filetypes.h"
+
+#include "mbox.h"
+
+#ifndef isblank
+#define isblank(c) (((c) == ' ') || ((c) == '\t'))
+#endif
+
+#define RFC2045LENGTH 76 /* maximum number of characters on a line */
+
+#ifdef HAVE_STDBOOL_H
+#include <stdbool.h>
+#else
+#ifdef FALSE
+typedef unsigned char bool;
+#else
+typedef enum { FALSE = 0, TRUE = 1 } bool;
+#endif
+#endif
+
+static int messageHasArgument(const message *m, const char *variable);
+static void messageIsEncoding(message *m);
+static unsigned char *decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast);
+static void sanitiseBase64(char *s);
+#ifdef __GNUC__
+static unsigned char hex(char c) __attribute__((const));
+static unsigned char base64(char c) __attribute__((const));
+static unsigned char uudecode(char c) __attribute__((const));
+#else
+static unsigned char hex(char c);
+static unsigned char base64(char c);
+static unsigned char uudecode(char c);
+#endif
+static const char *messageGetArgument(const message *m, int arg);
+static void *messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void (*setCTX)(void *, cli_ctx *), int destroy_text);
+static int usefulArg(const char *arg);
+static void messageDedup(message *m);
+static char *rfc2231(const char *in);
+static int simil(const char *str1, const char *str2);
+
+/*
+ * These maps are ordered in decreasing likelyhood of their appearance
+ * in an e-mail. Probably these should be in a table...
+ */
+static const struct encoding_map {
+ const char *string;
+ encoding_type type;
+} encoding_map[] = { /* rfc2045 */
+ { "7bit", NOENCODING },
+ { "text/plain", NOENCODING },
+ { "quoted-printable", QUOTEDPRINTABLE }, /* rfc2045 */
+ { "base64", BASE64 }, /* rfc2045 */
+ { "8bit", EIGHTBIT },
+ { "binary", BINARY },
+ { "x-uuencode", UUENCODE }, /* uuencode(5) */
+ { "x-yencode", YENCODE },
+ { "x-binhex", BINHEX },
+ { "us-ascii", NOENCODING }, /* incorrect */
+ { "x-uue", UUENCODE }, /* incorrect */
+ { "uuencode", UUENCODE }, /* incorrect */
+ { NULL, NOENCODING }
+};
+
+static const struct mime_map {
+ const char *string;
+ mime_type type;
+} mime_map[] = {
+ { "text", TEXT },
+ { "multipart", MULTIPART },
+ { "application", APPLICATION },
+ { "audio", AUDIO },
+ { "image", IMAGE },
+ { "message", MESSAGE },
+ { "video", VIDEO },
+ { NULL, TEXT }
+};
+
+/*
+ * See RFC2045, section 6.8, table 1
+ */
+static const unsigned char base64Table[256] = {
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,62,255,255,255,63,
+ 52,53,54,55,56,57,58,59,60,61,255,255,255,0,255,255,
+ 255,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,255,255,255,255,255,
+ 255,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,51,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+};
+
+message *
+messageCreate(void)
+{
+ message *m = (message *)cli_calloc(1, sizeof(message));
+
+ if(m)
+ m->mimeType = NOMIME;
+
+ return m;
+}
+
+void
+messageDestroy(message *m)
+{
+ assert(m != NULL);
+
+ messageReset(m);
+
+ free(m);
+}
+
+void
+messageReset(message *m)
+{
+ int i;
+
+ assert(m != NULL);
+
+ if(m->mimeSubtype)
+ free(m->mimeSubtype);
+
+ if(m->mimeDispositionType)
+ free(m->mimeDispositionType);
+
+ if(m->mimeArguments) {
+ for(i = 0; i < m->numberOfArguments; i++)
+ free(m->mimeArguments[i]);
+ free(m->mimeArguments);
+ }
+
+ if(m->body_first)
+ textDestroy(m->body_first);
+
+ assert(m->base64chars == 0);
+
+ if(m->encodingTypes) {
+ assert(m->numberOfEncTypes > 0);
+ free(m->encodingTypes);
+ }
+
+ memset(m, '\0', sizeof(message));
+ m->mimeType = NOMIME;
+}
+
+/*
+ * Handle the Content-Type header. The syntax is in RFC1341.
+ * Return success (1) or failure (0). Failure only happens when it's an
+ * unknown type and we've already received a known type, or we've received an
+ * empty type. If we receive an unknown type by itself we default to application
+ */
+int
+messageSetMimeType(message *mess, const char *type)
+{
+#ifdef CL_THREAD_SAFE
+ static pthread_mutex_t mime_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+ const struct mime_map *m;
+ int typeval;
+ static table_t *mime_table;
+
+ assert(mess != NULL);
+ if(type == NULL) {
+ cli_warnmsg("Empty content-type field\n");
+ return 0;
+ }
+
+ cli_dbgmsg("messageSetMimeType: '%s'\n", type);
+
+ /* Ignore leading spaces */
+ while(!isalpha(*type))
+ if(*type++ == '\0')
+ return 0;
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&mime_mutex);
+#endif
+ if(mime_table == NULL) {
+ mime_table = tableCreate();
+ if(mime_table == NULL) {
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&mime_mutex);
+#endif
+ return 0;
+ }
+
+ for(m = mime_map; m->string; m++)
+ if(!tableInsert(mime_table, m->string, m->type)) {
+ tableDestroy(mime_table);
+ mime_table = NULL;
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&mime_mutex);
+#endif
+ return 0;
+ }
+ }
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&mime_mutex);
+#endif
+
+ typeval = tableFind(mime_table, type);
+
+ if(typeval != -1) {
+ mess->mimeType = (mime_type)typeval;
+ return 1;
+ }
+ if(mess->mimeType == NOMIME) {
+ if(strncasecmp(type, "x-", 2) == 0)
+ mess->mimeType = MEXTENSION;
+ else {
+ /*
+ * Based on a suggestion by James Stevens
+ * <James at kyzo.com>
+ * Force scanning of strange messages
+ */
+ if(strcasecmp(type, "plain") == 0) {
+ cli_dbgmsg("Incorrect MIME type: `plain', set to Text\n");
+ mess->mimeType = TEXT;
+ } else {
+ /*
+ * Don't handle broken e-mail probably sending
+ * Content-Type: plain/text
+ * instead of
+ * Content-Type: text/plain
+ * as an attachment
+ */
+ int highestSimil = 0, t = -1;
+ const char *closest = NULL;
+
+ for(m = mime_map; m->string; m++) {
+ const int s = simil(m->string, type);
+
+ if(s > highestSimil) {
+ highestSimil = s;
+ closest = m->string;
+ t = m->type;
+ }
+ }
+ if(highestSimil >= 50) {
+ cli_dbgmsg("Unknown MIME type \"%s\" - guessing as %s (%u%% certainty)\n",
+ type, closest,
+ (int)highestSimil);
+ mess->mimeType = (mime_type)t;
+ } else {
+ cli_dbgmsg("Unknown MIME type: `%s', set to Application - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
+ mess->mimeType = APPLICATION;
+ }
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+mime_type
+messageGetMimeType(const message *m)
+{
+ assert(m != NULL);
+
+ return m->mimeType;
+}
+
+void
+messageSetMimeSubtype(message *m, const char *subtype)
+{
+ assert(m != NULL);
+
+ if(subtype == NULL) {
+ /*
+ * Handle broken content-type lines, e.g.
+ * Content-Type: text/
+ */
+ cli_dbgmsg("Empty content subtype\n");
+ subtype = "";
+ }
+
+ if(m->mimeSubtype)
+ free(m->mimeSubtype);
+
+ m->mimeSubtype = cli_strdup(subtype);
+}
+
+const char *
+messageGetMimeSubtype(const message *m)
+{
+ return (m->mimeSubtype) ? m->mimeSubtype : "";
+}
+
+void
+messageSetDispositionType(message *m, const char *disptype)
+{
+ assert(m != NULL);
+
+ if(m->mimeDispositionType)
+ free(m->mimeDispositionType);
+ if(disptype == NULL) {
+ m->mimeDispositionType = NULL;
+ return;
+ }
+
+ /*
+ * It's broken for there to be an entry such as "Content-Disposition:"
+ * However some spam and viruses are rather broken, it's a sign
+ * that something is wrong if we get that - maybe we should force a
+ * scan of this part
+ */
+ while(*disptype && isspace((int)*disptype))
+ disptype++;
+ if(*disptype) {
+ m->mimeDispositionType = cli_strdup(disptype);
+ if(m->mimeDispositionType)
+ strstrip(m->mimeDispositionType);
+ } else
+ m->mimeDispositionType = NULL;
+}
+
+const char *
+messageGetDispositionType(const message *m)
+{
+ return (m->mimeDispositionType) ? m->mimeDispositionType : "";
+}
+
+/*
+ * TODO:
+ * Arguments are held on a per message basis, they should be held on
+ * a per section basis. Otherwise what happens if two sections have two
+ * different values for charset? Probably doesn't matter for the use this
+ * code will be given, but will need fixing if this code is used elsewhere
+ */
+void
+messageAddArgument(message *m, const char *arg)
+{
+ int offset;
+
+ assert(m != NULL);
+
+ if(arg == NULL)
+ return; /* Note: this is not an error condition */
+
+ while(isspace(*arg))
+ arg++;
+
+ if(*arg == '\0')
+ /* Empty argument? Probably a broken mail client... */
+ return;
+
+ cli_dbgmsg("messageAddArgument, arg='%s'\n", arg);
+
+ if(!usefulArg(arg))
+ return;
+
+ for(offset = 0; offset < m->numberOfArguments; offset++)
+ if(m->mimeArguments[offset] == NULL)
+ break;
+ else if(strcasecmp(arg, m->mimeArguments[offset]) == 0)
+ return; /* already in there */
+
+ if(offset == m->numberOfArguments) {
+ char **ptr;
+
+ m->numberOfArguments++;
+ ptr = (char **)cli_realloc(m->mimeArguments, m->numberOfArguments * sizeof(char *));
+ if(ptr == NULL) {
+ m->numberOfArguments--;
+ return;
+ }
+ m->mimeArguments = ptr;
+ }
+
+ arg = m->mimeArguments[offset] = rfc2231(arg);
+
+ /*
+ * This is terribly broken from an RFC point of view but is useful
+ * for catching viruses which have a filename but no type of
+ * mime. By pretending defaulting to an application rather than
+ * to nomime we can ensure they're saved and scanned
+ */
+ if(arg && ((strncasecmp(arg, "filename=", 9) == 0) || (strncasecmp(arg, "name=", 5) == 0)))
+ if(messageGetMimeType(m) == NOMIME) {
+ cli_dbgmsg("Force mime encoding to application\n");
+ messageSetMimeType(m, "application");
+ }
+}
+
+/*
+ * Add in all the arguments.
+ * Cope with:
+ * name="foo bar.doc"
+ * charset=foo name=bar
+ */
+void
+messageAddArguments(message *m, const char *s)
+{
+ const char *string = s;
+
+ cli_dbgmsg("Add arguments '%s'\n", string);
+
+ assert(string != NULL);
+
+ while(*string) {
+ const char *key, *cptr;
+ char *data, *field;
+
+ if(isspace(*string) || (*string == ';')) {
+ string++;
+ continue;
+ }
+
+ key = string;
+
+ data = strchr(string, '=');
+
+ /*
+ * Some spam breaks RFC2045 by using ':' instead of '='
+ * e.g.:
+ * Content-Type: text/html; charset:ISO-8859-1
+ * should be:
+ * Content-type: text/html; charset=ISO-8859-1
+ *
+ * We give up with lines that are completely broken because
+ * we don't have ESP and don't know what was meant to be there.
+ * It's unlikely to really be a problem.
+ */
+ if(data == NULL)
+ data = strchr(string, ':');
+
+ if(data == NULL) {
+ /*
+ * Completely broken, give up
+ */
+ cli_dbgmsg("Can't parse header \"%s\"\n", s);
+ return;
+ }
+
+ string = &data[1];
+
+ /*
+ * Handle white space to the right of the equals sign
+ * This breaks RFC2045 which has:
+ * parameter := attribute "=" value
+ * attribute := token ; case-insensitive
+ * token := 1*<any (ASCII) CHAR except SPACE, CTLs,
+ * or tspecials>
+ * But too many MUAs ignore this
+ */
+ while(isspace(*string) && (*string != '\0'))
+ string++;
+
+ cptr = string++;
+
+ if(strlen(key) == 0)
+ continue;
+
+ if(*cptr == '"') {
+ char *ptr, *kcopy;
+
+ /*
+ * The field is in quotes, so look for the
+ * closing quotes
+ */
+ kcopy = cli_strdup(key);
+
+ if(kcopy == NULL)
+ return;
+
+ ptr = strchr(kcopy, '=');
+ if(ptr == NULL)
+ ptr = strchr(kcopy, ':');
+ *ptr = '\0';
+
+ string = strchr(++cptr, '"');
+
+ if(string == NULL) {
+ cli_dbgmsg("Unbalanced quote character in \"%s\"\n", s);
+ string = "";
+ } else
+ string++;
+
+ if(!usefulArg(kcopy)) {
+ free(kcopy);
+ continue;
+ }
+
+ data = cli_strdup(cptr);
+
+ ptr = (data) ? strchr(data, '"') : NULL;
+ if(ptr == NULL) {
+ /*
+ * Weird e-mail header such as:
+ * Content-Type: application/octet-stream; name="
+ * "
+ * Content-Transfer-Encoding: base64
+ * Content-Disposition: attachment; filename="
+ * "
+ *
+ * TODO: the file should still be saved and
+ * virus checked
+ */
+ cli_dbgmsg("Can't parse header \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", s);
+ if(data)
+ free(data);
+ free(kcopy);
+ return;
+ }
+
+ *ptr = '\0';
+
+ field = cli_realloc(kcopy, strlen(kcopy) + strlen(data) + 2);
+ if(field) {
+ strcat(field, "=");
+ strcat(field, data);
+ } else
+ free(kcopy);
+ free(data);
+ } else {
+ size_t len;
+
+ if(*cptr == '\0') {
+ cli_dbgmsg("Ignoring empty field in \"%s\"\n", s);
+ return;
+ }
+
+ /*
+ * The field is not in quotes, so look for the closing
+ * white space
+ */
+ while((*string != '\0') && !isspace(*string))
+ string++;
+
+ len = (size_t)string - (size_t)key + 1;
+ field = cli_malloc(len);
+
+ if(field) {
+ memcpy(field, key, len - 1);
+ field[len - 1] = '\0';
+ }
+ }
+ if(field) {
+ messageAddArgument(m, field);
+ free(field);
+ }
+ }
+}
+
+static const char *
+messageGetArgument(const message *m, int arg)
+{
+ assert(m != NULL);
+ assert(arg >= 0);
+ assert(arg < m->numberOfArguments);
+
+ return (m->mimeArguments[arg]) ? m->mimeArguments[arg] : "";
+}
+
+/*
+ * Find a MIME variable from the header and return a COPY to the value of that
+ * variable. The caller must free the copy
+ */
+char *
+messageFindArgument(const message *m, const char *variable)
+{
+ int i;
+ size_t len;
+
+ assert(m != NULL);
+ assert(variable != NULL);
+
+ len = strlen(variable);
+
+ for(i = 0; i < m->numberOfArguments; i++) {
+ const char *ptr;
+
+ ptr = messageGetArgument(m, i);
+ if((ptr == NULL) || (*ptr == '\0'))
+ continue;
+#ifdef CL_DEBUG
+ cli_dbgmsg("messageFindArgument: compare %lu bytes of %s with %s\n",
+ (unsigned long)len, variable, ptr);
+#endif
+ if(strncasecmp(ptr, variable, len) == 0) {
+ ptr = &ptr[len];
+ while(isspace(*ptr))
+ ptr++;
+ if(*ptr != '=') {
+ cli_warnmsg("messageFindArgument: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
+ return NULL;
+ }
+ if((*++ptr == '"') && (strchr(&ptr[1], '"') != NULL)) {
+ /* Remove any quote characters */
+ char *ret = cli_strdup(++ptr);
+ char *p;
+
+ if(ret == NULL)
+ return NULL;
+
+ /*
+ * Thomas Lamy <Thomas.Lamy at in-online.net>:
+ * fix un-quoting of boundary strings from
+ * header, occurs if boundary was given as
+ * 'boundary="_Test_";'
+ *
+ * At least two quotes in string, assume
+ * quoted argument
+ * end string at next quote
+ */
+ if((p = strchr(ret, '"')) != NULL) {
+ ret[strlen(ret) - 1] = '\0';
+ *p = '\0';
+ }
+ return ret;
+ }
+ return cli_strdup(ptr);
+ }
+ }
+ return NULL;
+}
+
+char *
+messageGetFilename(const message *m)
+{
+ char *filename = (char *)messageFindArgument(m, "filename");
+
+ if(filename)
+ return filename;
+
+ return (char *)messageFindArgument(m, "name");
+}
+
+/* Returns true or false */
+static int
+messageHasArgument(const message *m, const char *variable)
+{
+ int i;
+ size_t len;
+
+ assert(m != NULL);
+ assert(variable != NULL);
+
+ len = strlen(variable);
+
+ for(i = 0; i < m->numberOfArguments; i++) {
+ const char *ptr;
+
+ ptr = messageGetArgument(m, i);
+ if((ptr == NULL) || (*ptr == '\0'))
+ continue;
+#ifdef CL_DEBUG
+ cli_dbgmsg("messageArgumentExists: compare %lu bytes of %s with %s\n",
+ (unsigned long)len, variable, ptr);
+#endif
+ if(strncasecmp(ptr, variable, len) == 0) {
+ ptr = &ptr[len];
+ while(isspace(*ptr))
+ ptr++;
+ if(*ptr != '=') {
+ cli_warnmsg("messageArgumentExists: no '=' sign found in MIME header '%s' (%s)\n", variable, messageGetArgument(m, i));
+ return 0;
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+messageHasFilename(const message *m)
+{
+ return messageHasArgument(m, "filename") || messageHasArgument(m, "file");
+}
+
+void
+messageSetEncoding(message *m, const char *enctype)
+{
+ const struct encoding_map *e;
+ int i;
+ char *type;
+
+ assert(m != NULL);
+ assert(enctype != NULL);
+
+ /*m->encodingType = EEXTENSION;*/
+
+ while(isblank(*enctype))
+ enctype++;
+
+ cli_dbgmsg("messageSetEncoding: '%s'\n", enctype);
+
+ if(strcasecmp(enctype, "8 bit") == 0) {
+ cli_dbgmsg("Broken content-transfer-encoding: '8 bit' changed to '8bit'\n");
+ enctype = "8bit";
+ }
+
+ /*
+ * Iterate through
+ * Content-Transfer-Encoding: base64 binary
+ * cli_strtok's fieldno counts from 0
+ */
+ i = 0;
+ while((type = cli_strtok(enctype, i++, " \t")) != NULL) {
+ int highestSimil = 0;
+ const char *closest = NULL;
+
+ for(e = encoding_map; e->string; e++) {
+ int sim;
+ const char lowertype = tolower(type[0]);
+
+ if((lowertype != tolower(e->string[0])) && (lowertype != 'x'))
+ /*
+ * simil is expensive, I'm yet to encounter only
+ * one example of a missent encoding when the
+ * first character was wrong, so lets assume no
+ * match to save the call.
+ *
+ * That example was quoted-printable sent as
+ * X-quoted-printable.
+ */
+ continue;
+
+ if(strcmp(e->string, "uuencode") == 0)
+ /*
+ * No need to test here - fast track visa will have
+ * handled uuencoded files
+ */
+ continue;
+
+ sim = simil(type, e->string);
+
+ if(sim == 100) {
+ int j;
+ encoding_type *et;
+
+ for(j = 0; j < m->numberOfEncTypes; j++)
+ if(m->encodingTypes[j] == e->type)
+ break;
+
+ if(j < m->numberOfEncTypes) {
+ cli_dbgmsg("Ignoring duplicate encoding mechanism '%s'\n",
+ type);
+ break;
+ }
+
+ et = (encoding_type *)cli_realloc(m->encodingTypes, (m->numberOfEncTypes + 1) * sizeof(encoding_type));
+ if(et == NULL)
+ break;
+
+ m->encodingTypes = et;
+ m->encodingTypes[m->numberOfEncTypes++] = e->type;
+
+ cli_dbgmsg("Encoding type %d is \"%s\"\n", m->numberOfEncTypes, type);
+ break;
+ } else if(sim > highestSimil) {
+ closest = e->string;
+ highestSimil = sim;
+ }
+ }
+
+ if(e->string == NULL) {
+ /*
+ * The stated encoding type is illegal, so we
+ * use a best guess of what it should be.
+ *
+ * 50% is arbitary. For example 7bi will match as
+ * 66% certain to be 7bit
+ */
+ if(highestSimil >= 50) {
+ cli_dbgmsg("Unknown encoding type \"%s\" - guessing as %s (%u%% certainty)\n",
+ type, closest, highestSimil);
+ messageSetEncoding(m, closest);
+ } else {
+ cli_dbgmsg("Unknown encoding type \"%s\" - if you believe this file contains a virus, submit it to www.clamav.net\n", type);
+ /*
+ * Err on the side of safety, enable all
+ * decoding modules
+ */
+ messageSetEncoding(m, "base64");
+ messageSetEncoding(m, "quoted-printable");
+ }
+ }
+
+ free(type);
+ }
+}
+
+encoding_type
+messageGetEncoding(const message *m)
+{
+ assert(m != NULL);
+
+ if(m->numberOfEncTypes == 0)
+ return NOENCODING;
+ return m->encodingTypes[0];
+}
+
+int
+messageAddLine(message *m, line_t *line)
+{
+ assert(m != NULL);
+
+ if(m->body_first == NULL)
+ m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
+ else {
+ m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+ m->body_last = m->body_last->t_next;
+ }
+
+ if(m->body_last == NULL)
+ return -1;
+
+ m->body_last->t_next = NULL;
+
+ if(line && lineGetData(line)) {
+ m->body_last->t_line = lineLink(line);
+
+ messageIsEncoding(m);
+ } else
+ m->body_last->t_line = NULL;
+
+ return 1;
+}
+
+/*
+ * Add the given line to the end of the given message
+ * If needed a copy of the given line is taken which the caller must free
+ * Line must not be terminated by a \n
+ */
+int
+messageAddStr(message *m, const char *data)
+{
+ line_t *repeat = NULL;
+
+ assert(m != NULL);
+
+ if(data) {
+ if(*data == '\0')
+ data = NULL;
+ else {
+ /*
+ * If it's only white space, just store one space to
+ * save memory. You must store something since it may
+ * be a header line
+ */
+ int iswhite = 1;
+ const char *p;
+
+ for(p = data; *p; p++)
+ if(((*p) & 0x80) || !isspace(*p)) {
+ iswhite = 0;
+ break;
+ }
+ if(iswhite) {
+ /*cli_dbgmsg("messageAddStr: empty line: '%s'\n", data);*/
+ data = " ";
+ }
+ }
+ }
+
+ if(m->body_first == NULL)
+ m->body_last = m->body_first = (text *)cli_malloc(sizeof(text));
+ else {
+ assert(m->body_last != NULL);
+ if((data == NULL) && (m->body_last->t_line == NULL))
+ /*
+ * Although this would save time and RAM, some
+ * phish signatures have been built which need the
+ * blank lines
+ */
+ if(messageGetMimeType(m) != TEXT)
+ /* don't save two blank lines in sucession */
+ return 1;
+
+ m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+ if(m->body_last->t_next == NULL) {
+ messageDedup(m);
+ m->body_last->t_next = (text *)cli_malloc(sizeof(text));
+ if(m->body_last->t_next == NULL) {
+ cli_errmsg("messageAddStr: out of memory\n");
+ return -1;
+ }
+ }
+
+ if(data && m->body_last->t_line && (strcmp(data, lineGetData(m->body_last->t_line)) == 0))
+ repeat = m->body_last->t_line;
+ m->body_last = m->body_last->t_next;
+ }
+
+ if(m->body_last == NULL) {
+ cli_errmsg("messageAddStr: out of memory\n");
+ return -1;
+ }
+
+ m->body_last->t_next = NULL;
+
+ if(data && *data) {
+ if(repeat)
+ m->body_last->t_line = lineLink(repeat);
+ else {
+ m->body_last->t_line = lineCreate(data);
+
+ if(m->body_last->t_line == NULL) {
+ messageDedup(m);
+ m->body_last->t_line = lineCreate(data);
+
+ if(m->body_last->t_line == NULL) {
+ cli_errmsg("messageAddStr: out of memory\n");
+ return -1;
+ }
+ }
+ /* cli_chomp(m->body_last->t_text); */
+ messageIsEncoding(m);
+ }
+ } else
+ m->body_last->t_line = NULL;
+
+ return 1;
+}
+
+/*
+ * Add the given line to the start of the given message
+ * A copy of the given line is taken which the caller must free
+ * Line must not be terminated by a \n
+ */
+int
+messageAddStrAtTop(message *m, const char *data)
+{
+ text *oldfirst;
+
+ assert(m != NULL);
+
+ if(m->body_first == NULL)
+ return messageAddLine(m, lineCreate(data));
+
+ oldfirst = m->body_first;
+ m->body_first = (text *)cli_malloc(sizeof(text));
+ if(m->body_first == NULL) {
+ m->body_first = oldfirst;
+ return -1;
+ }
+
+ m->body_first->t_next = oldfirst;
+ m->body_first->t_line = lineCreate((data) ? data : "");
+
+ if(m->body_first->t_line == NULL) {
+ cli_errmsg("messageAddStrAtTop: out of memory\n");
+ return -1;
+ }
+ return 1;
+}
+
+/*
+ * Put the contents of the given text at the end of the current object.
+ * Can be used either to move a text object into a message, or to move a
+ * message's text into another message only moving from a given offset.
+ * The given text emptied; it can be used again if needed, though be warned that
+ * it will have an empty line at the start.
+ * Returns 0 for failure, 1 for success
+ */
+int
+messageMoveText(message *m, text *t, message *old_message)
+{
+ int rc;
+
+ if(m->body_first == NULL) {
+ if(old_message) {
+ text *u;
+ /*
+ * t is within old_message which is about to be
+ * destroyed
+ */
+ assert(old_message->body_first != NULL);
+
+ m->body_first = t;
+ for(u = old_message->body_first; u != t;) {
+ text *next;
+
+ if(u->t_line)
+ lineUnlink(u->t_line);
+ next = u->t_next;
+
+ free(u);
+ u = next;
+
+ if(u == NULL) {
+ cli_errmsg("messageMoveText sanity check: t not within old_message\n");
+ return -1;
+ }
+ }
+ assert(old_message->body_last->t_next == NULL);
+
+ m->body_last = old_message->body_last;
+ old_message->body_first = old_message->body_last = NULL;
+
+ /* Do any pointers need to be reset? */
+ if((old_message->bounce == NULL) &&
+ (old_message->encoding == NULL) &&
+ (old_message->binhex == NULL) &&
+ (old_message->yenc == NULL))
+ return 0;
+
+ m->body_last = m->body_first;
+ rc = 0;
+ } else {
+ m->body_last = m->body_first = textMove(NULL, t);
+ if(m->body_first == NULL)
+ rc = -1;
+ else
+ rc = 0;
+ }
+ } else {
+ m->body_last = textMove(m->body_last, t);
+ if(m->body_last == NULL) {
+ rc = -1;
+ m->body_last = m->body_first;
+ } else
+ rc = 0;
+ }
+
+ while(m->body_last->t_next) {
+ m->body_last = m->body_last->t_next;
+ if(m->body_last->t_line)
+ messageIsEncoding(m);
+ }
+
+ return rc;
+}
+
+/*
+ * See if the last line marks the start of a non MIME inclusion that
+ * will need to be scanned
+ */
+static void
+messageIsEncoding(message *m)
+{
+ static const char encoding[] = "Content-Transfer-Encoding";
+ static const char binhex[] = "(This file must be converted with BinHex 4.0)";
+ const char *line = lineGetData(m->body_last->t_line);
+
+ if((m->encoding == NULL) &&
+ (strncasecmp(line, encoding, sizeof(encoding) - 1) == 0) &&
+ (strstr(line, "7bit") == NULL))
+ m->encoding = m->body_last;
+ else if((m->bounce == NULL) &&
+ (strncasecmp(line, "Received: ", 10) == 0) &&
+ (cli_filetype((const unsigned char *)line, strlen(line)) == CL_TYPE_MAIL))
+ m->bounce = m->body_last;
+ /* Not needed with fast track visa technology */
+ /*else if((m->uuencode == NULL) && isuuencodebegin(line))
+ m->uuencode = m->body_last;*/
+ else if((m->binhex == NULL) &&
+ strstr(line, "BinHex") &&
+ (simil(line, binhex) > 90))
+ /*
+ * Look for close matches for BinHex, but
+ * simil() is expensive so only do it if it's
+ * likely to be found
+ */
+ m->binhex = m->body_last;
+ else if((m->yenc == NULL) && (strncmp(line, "=ybegin line=", 13) == 0))
+ m->yenc = m->body_last;
+}
+
+/*
+ * Returns a pointer to the body of the message. Note that it does NOT return
+ * a copy of the data
+ */
+text *
+messageGetBody(message *m)
+{
+ assert(m != NULL);
+ return m->body_first;
+}
+
+/*
+ * Export a message using the given export routines
+ *
+ * TODO: It really should export into an array, one
+ * for each encoding algorithm. However, what it does is it returns the
+ * last item that was exported. That's sufficient for now.
+ */
+static void *
+messageExport(message *m, const char *dir, void *(*create)(void), void (*destroy)(void *), void (*setFilename)(void *, const char *, const char *), void (*addData)(void *, const unsigned char *, size_t), void *(*exportText)(text *, void *, int), void(*setCTX)(void *, cli_ctx *), int destroy_text)
+{
+ void *ret;
+ text *t_line;
+ char *filename;
+ int i;
+
+ assert(m != NULL);
+
+ if(messageGetBody(m) == NULL)
+ return NULL;
+
+ ret = (*create)();
+
+ if(ret == NULL)
+ return NULL;
+
+ cli_dbgmsg("messageExport: numberOfEncTypes == %d\n", m->numberOfEncTypes);
+
+ if((t_line = binhexBegin(m)) != NULL) {
+ unsigned char byte;
+ unsigned long newlen = 0L, len, dataforklen, resourceforklen, l;
+ unsigned char *data;
+ char *ptr;
+ int bytenumber;
+ blob *tmp;
+
+ /*
+ * Table look up by Thomas Lamy <Thomas.Lamy at in-online.net>
+ * HQX conversion table - illegal chars are 0xff
+ */
+ const unsigned char hqxtbl[] = {
+ /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */
+ /* 00-0f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ /* 10-1f */ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
+ /* 20-2f */ 0xff,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0xff,0xff,
+ /* 30-3f */ 0x0d,0x0e,0x0f,0x10,0x11,0x12,0x13,0xff,0x14,0x15,0xff,0xff,0xff,0xff,0xff,0xff,
+ /* 40-4f */ 0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,0x21,0x22,0x23,0x24,0xff,
+ /* 50-5f */ 0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0xff,0x2c,0x2d,0x2e,0x2f,0xff,0xff,0xff,0xff,
+ /* 60-6f */ 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0xff,0x37,0x38,0x39,0x3a,0x3b,0x3c,0xff,0xff,
+ /* 70-7f */ 0x3d,0x3e,0x3f,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+ };
+
+ cli_dbgmsg("messageExport: decode binhex\n");
+ /*
+ * Decode BinHex4. First create a temporary blob which contains
+ * the encoded message. Then decode that blob to the target
+ * blob, free the temporary blob and return the target one
+ *
+ * FIXME: EICAR isn't detected: should create 3 files in fork
+ * format: .info, .data and .rsrc. This is needed for
+ * position dependant detection such as EICAR
+ *
+ * See RFC1741
+ */
+ while(((t_line = t_line->t_next) != NULL) &&
+ (t_line->t_line == NULL))
+ ;
+
+ tmp = textToBlob(t_line, NULL,
+ ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX)) ? destroy_text : 0);
+
+ if(tmp == NULL) {
+ /*
+ * FIXME: We've probably run out of memory during the
+ * text to blob.
+ */
+ cli_warnmsg("Couldn't start binhex parser\n");
+ (*destroy)(ret);
+ return NULL;
+ }
+
+ data = blobGetData(tmp);
+
+ if(data == NULL) {
+ cli_warnmsg("Couldn't locate the binhex message that was claimed to be there\n");
+ blobDestroy(tmp);
+ (*destroy)(ret);
+ return NULL;
+ }
+ len = blobGetDataSize(tmp);
+
+ if(data[0] == ':') {
+ unsigned char *uptr;
+ /* 7 bit (ala RFC1741) */
+
+ /*
+ * FIXME: this is dirty code, modification of the
+ * contents of a member of the blob object should be
+ * done through blob.c
+ *
+ * Convert 7 bit data into 8 bit
+ */
+ cli_dbgmsg("decode HQX7 message (%lu bytes)\n", len);
+
+ uptr = cli_malloc(len);
+ if(uptr == NULL) {
+ blobDestroy(tmp);
+ (*destroy)(ret);
+ return NULL;
+ }
+ memcpy(uptr, data, len);
+ bytenumber = 0;
+
+ /*
+ * uptr now contains the encoded (7bit) data - len bytes long
+ * data will contain the unencoded (8bit) data
+ */
+ for(l = 1; l < len; l++) {
+ unsigned char c = uptr[l];
+
+ if(c == ':')
+ break;
+
+ if((c == '\n') || (c == '\r'))
+ continue;
+
+ if((c < 0x20) || (c > 0x7f) || (hqxtbl[c] == 0xff)) {
+ cli_warnmsg("Invalid HQX7 character '%c' (0x%02x)\n", c, c);
+ break;
+ }
+ c = hqxtbl[c];
+ assert(c <= 63);
+
+ /*
+ * These masks probably aren't needed, but
+ * they're here to verify the code is correct
+ */
+ switch(bytenumber) {
+ case 0:
+ data[newlen] = (c << 2) & 0xFC;
+ bytenumber = 1;
+ break;
+ case 1:
+ data[newlen++] |= (c >> 4) & 0x3;
+ data[newlen] = (c << 4) & 0xF0;
+ bytenumber = 2;
+ break;
+ case 2:
+ data[newlen++] |= (c >> 2) & 0xF;
+ data[newlen] = (c << 6) & 0xC0;
+ bytenumber = 3;
+ break;
+ case 3:
+ data[newlen++] |= c & 0x3F;
+ bytenumber = 0;
+ break;
+ }
+ }
+
+ cli_dbgmsg("decoded HQX7 message (now %lu bytes)\n", newlen);
+
+ /*
+ * Throw away the old encoded (7bit) data
+ * data now points to the encoded (8bit) data - newlen bytes
+ *
+ * The data array may contain repetitive characters
+ */
+ free(uptr);
+ } else {
+ cli_warnmsg("HQX8 messages not yet supported, extraction may fail - if you believe this file contains a virus, submit it to www.clamav.net\n");
+ newlen = len;
+ }
+
+ /*
+ * Uncompress repetitive characters
+ */
+ if(memchr(data, 0x90, newlen)) {
+ blob *u = blobCreate(); /* uncompressed data */
+
+ if(u == NULL) {
+ (*destroy)(ret);
+ blobDestroy(tmp);
+ return NULL;
+ }
+ /*
+ * Includes compression
+ */
+ for(l = 0L; l < newlen; l++) {
+ unsigned char c = data[l];
+
+ /*
+ * TODO: handle the case where the first byte
+ * is 0x90
+ */
+ blobAddData(u, &c, 1);
+
+ if((l < (newlen - 1L)) && (data[l + 1] == 0x90)) {
+ int count;
+
+ l += 2;
+ count = data[l];
+
+ if(count == 0) {
+ c = 0x90;
+ blobAddData(u, &c, 1);
+ } else {
+#ifdef CL_DEBUG
+ cli_dbgmsg("uncompress HQX7 at 0x%06lu: %d repetitive bytes\n", l, count);
+#endif
+ blobGrow(u, count);
+ while(--count > 0)
+ blobAddData(u, &c, 1);
+ }
+ }
+ }
+ blobDestroy(tmp);
+ tmp = u;
+ data = blobGetData(tmp);
+ len = blobGetDataSize(tmp);
+ cli_dbgmsg("Uncompressed %lu bytes to %lu\n", newlen, len);
+ } else {
+ len = newlen;
+ cli_dbgmsg("HQX7 message (%lu bytes) is not compressed\n",
+ len);
+ }
+ if(len == 0) {
+ cli_warnmsg("Discarding empty binHex attachment\n");
+ (*destroy)(ret);
+ blobDestroy(tmp);
+ return NULL;
+ }
+
+ /*
+ * The blob tmp now contains the uncompressed data
+ * of len bytes, i.e. the repetitive bytes have been removed
+ */
+
+ /*
+ * Parse the header
+ *
+ * TODO: set filename argument in message as well
+ */
+ byte = data[0];
+ if(byte >= len) {
+ (*destroy)(ret);
+ blobDestroy(tmp);
+ return NULL;
+ }
+ filename = cli_malloc(byte + 1);
+ if(filename == NULL) {
+ (*destroy)(ret);
+ blobDestroy(tmp);
+ return NULL;
+ }
+ memcpy(filename, &data[1], byte);
+ filename[byte] = '\0';
+ (*setFilename)(ret, dir, filename);
+ /*ptr = cli_malloc(strlen(filename) + 6);*/
+ ptr = cli_malloc(byte + 6);
+ if(ptr) {
+ sprintf(ptr, "name=%s", filename);
+ messageAddArgument(m, ptr);
+ free(ptr);
+ }
+
+ /*
+ * skip over length, filename, version, type, creator and flags
+ */
+ byte = 1 + byte + 1 + 4 + 4 + 2;
+
+ /*
+ * Set len to be the data fork length
+ */
+ dataforklen = ((data[byte] << 24) & 0xFF000000) |
+ ((data[byte + 1] << 16) & 0xFF0000) |
+ ((data[byte + 2] << 8) & 0xFF00) |
+ (data[byte + 3] & 0xFF);
+
+ resourceforklen = ((data[byte + 4] << 24) & 0xFF000000) |
+ ((data[byte + 5] << 16) & 0xFF0000) |
+ ((data[byte + 6] << 8) & 0xFF00) |
+ (data[byte + 7] & 0xFF);
+
+ cli_dbgmsg("Filename = '%s', data fork length = %lu, resource fork length = %lu bytes\n",
+ filename, dataforklen, resourceforklen);
+
+ free((char *)filename);
+
+ /*
+ * Skip over data fork length, resource fork length and CRC
+ */
+ byte += 10;
+
+ l = blobGetDataSize(tmp) - byte;
+
+ if(l < dataforklen) {
+ cli_warnmsg("Corrupt BinHex file, claims it is %lu bytes long in a message of %lu bytes\n",
+ dataforklen, l);
+ dataforklen = l;
+ }
+ if(setCTX && m->ctx)
+ (*setCTX)(ret, m->ctx);
+
+ (*addData)(ret, &data[byte], dataforklen);
+
+ blobDestroy(tmp);
+
+ if(destroy_text)
+ m->binhex = NULL;
+
+ if((m->numberOfEncTypes == 0) ||
+ ((m->numberOfEncTypes == 1) && (m->encodingTypes[0] == BINHEX))) {
+ cli_dbgmsg("Finished exporting binhex file\n");
+ return ret;
+ }
+ }
+
+ if(m->numberOfEncTypes == 0) {
+ /*
+ * Fast copy
+ */
+ cli_dbgmsg("messageExport: Entering fast copy mode\n");
+
+#if 0
+ filename = messageGetFilename(m);
+
+ if(filename == NULL) {
+ cli_dbgmsg("Unencoded attachment sent with no filename\n");
+ messageAddArgument(m, "name=attachment");
+ } else if((strcmp(filename, "textportion") != 0) && (strcmp(filename, "mixedtextportion") != 0))
+ /*
+ * Some virus attachments don't say how they've
+ * been encoded. We assume base64
+ */
+ messageSetEncoding(m, "base64");
+#else
+ filename = (char *)messageFindArgument(m, "filename");
+ if(filename == NULL) {
+ filename = (char *)messageFindArgument(m, "name");
+
+ if(filename == NULL) {
+ cli_dbgmsg("Unencoded attachment sent with no filename\n");
+ messageAddArgument(m, "name=attachment");
+ } else
+ /*
+ * Some virus attachments don't say how they've
+ * been encoded. We assume base64
+ */
+ messageSetEncoding(m, "base64");
+ }
+#endif
+
+ (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+
+ if(filename)
+ free((char *)filename);
+
+ if(m->numberOfEncTypes == 0)
+ return exportText(messageGetBody(m), ret, destroy_text);
+ }
+
+ if(setCTX && m->ctx)
+ (*setCTX)(ret, m->ctx);
+
+ for(i = 0; i < m->numberOfEncTypes; i++) {
+ encoding_type enctype = m->encodingTypes[i];
+ size_t size;
+
+ if(i > 0) {
+ void *newret;
+
+ newret = (*create)();
+ if(newret == NULL) {
+ cli_errmsg("Not all decoding algorithms were run\n");
+ return ret;
+ }
+ (*destroy)(ret);
+ ret = newret;
+ }
+ cli_dbgmsg("messageExport: enctype %d is %d\n", i, (int)enctype);
+ /*
+ * Find the filename to decode
+ */
+ if(((enctype == YENCODE) || (i == 0)) && yEncBegin(m)) {
+ const char *f;
+
+ /*
+ * TODO: handle multipart yEnc encoded files
+ */
+ t_line = yEncBegin(m);
+ f = lineGetData(t_line->t_line);
+
+ if((filename = strstr(f, " name=")) != NULL) {
+ filename = cli_strdup(&filename[6]);
+ if(filename) {
+ cli_chomp(filename);
+ strstrip(filename);
+ cli_dbgmsg("Set yEnc filename to \"%s\"\n", filename);
+ }
+ }
+
+ (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+ if(filename) {
+ free((char *)filename);
+ filename = NULL;
+ }
+ t_line = t_line->t_next;
+ enctype = YENCODE;
+ m->yenc = NULL;
+ } else {
+ if(enctype == UUENCODE) {
+ /*
+ * The body will have been stripped out by the
+ * fast track visa system. Treat as plain/text,
+ * which means we'll still scan for funnies
+ * outside of the uuencoded portion.
+ */
+ cli_dbgmsg("messageExport: treat uuencode as text/plain\n");
+ enctype = m->encodingTypes[i] = NOENCODING;
+ }
+ filename = messageGetFilename(m);
+
+ if(filename == NULL) {
+ cli_dbgmsg("Attachment sent with no filename\n");
+ messageAddArgument(m, "name=attachment");
+ } else if(enctype == NOENCODING)
+ /*
+ * Some virus attachments don't say how
+ * they've been encoded. We assume
+ * base64.
+ *
+ * FIXME: don't do this if it's a fall
+ * through from uuencode
+ */
+ messageSetEncoding(m, "base64");
+
+ (*setFilename)(ret, dir, (filename && *filename) ? filename : "attachment");
+
+ t_line = messageGetBody(m);
+ }
+
+ if(filename)
+ free((char *)filename);
+
+ /*
+ * t_line should now point to the first (encoded) line of the
+ * message
+ */
+ if(t_line == NULL) {
+ cli_warnmsg("Empty attachment not saved\n");
+ (*destroy)(ret);
+ return NULL;
+ }
+
+ if(enctype == NOENCODING) {
+ /*
+ * Fast copy
+ */
+ if(i == m->numberOfEncTypes - 1) {
+ /* last one */
+ (void)exportText(t_line, ret, destroy_text);
+ break;
+ }
+ (void)exportText(t_line, ret, 0);
+ continue;
+ }
+
+ size = 0;
+ do {
+ unsigned char smallbuf[1024];
+ unsigned char *uptr, *data;
+ const char *line = lineGetData(t_line->t_line);
+ unsigned char *bigbuf;
+ size_t datasize;
+
+ if(enctype == YENCODE) {
+ if(line == NULL)
+ continue;
+ if(strncmp(line, "=yend ", 6) == 0)
+ break;
+ }
+
+ /*
+ * Add two bytes for '\n' and '\0'
+ */
+ datasize = (line) ? strlen(line) + 2 : 0;
+
+ if(datasize >= sizeof(smallbuf))
+ data = bigbuf = (unsigned char *)cli_malloc(datasize);
+ else {
+ bigbuf = NULL;
+ data = smallbuf;
+ datasize = sizeof(smallbuf);
+ }
+
+ uptr = decodeLine(m, enctype, line, data, datasize);
+ if(uptr == NULL) {
+ if(data == bigbuf)
+ free(data);
+ break;
+ }
+
+ if(uptr != data) {
+ assert((size_t)(uptr - data) < datasize);
+ (*addData)(ret, data, (size_t)(uptr - data));
+ size += (size_t)(uptr - data);
+ }
+
+ if(data == bigbuf)
+ free(data);
+
+ /*
+ * According to RFC2045, '=' is used to pad out
+ * the last byte and should be used as evidence
+ * of the end of the data. Some mail clients
+ * annoyingly then put plain text after the '='
+ * byte and viruses exploit this bug. Sigh
+ */
+ /*if(enctype == BASE64)
+ if(strchr(line, '='))
+ break;*/
+ if(line && destroy_text && (i == m->numberOfEncTypes - 1)) {
+ lineUnlink(t_line->t_line);
+ t_line->t_line = NULL;
+ }
+ } while((t_line = t_line->t_next) != NULL);
+
+ cli_dbgmsg("Exported %lu bytes using enctype %d\n",
+ (unsigned long)size, (int)enctype);
+
+ /* Verify we have nothing left to flush out */
+ if(m->base64chars) {
+ unsigned char data[4];
+ unsigned char *ptr;
+
+ ptr = base64Flush(m, data);
+ if(ptr)
+ (*addData)(ret, data, (size_t)(ptr - data));
+ }
+ }
+
+ return ret;
+}
+
+unsigned char *
+base64Flush(message *m, unsigned char *buf)
+{
+ cli_dbgmsg("%d trailing bytes to export\n", m->base64chars);
+
+ if(m->base64chars) {
+ unsigned char *ret = decode(m, NULL, buf, base64, FALSE);
+
+ m->base64chars = 0;
+
+ return ret;
+ }
+ return NULL;
+}
+
+/*
+ * Decode and transfer the contents of the message into a fileblob
+ * The caller must free the returned fileblob
+ */
+fileblob *
+messageToFileblob(message *m, const char *dir, int destroy)
+{
+ fileblob *fb;
+
+ cli_dbgmsg("messageToFileblob\n");
+ fb = messageExport(m, dir,
+ (void *(*)(void))fileblobCreate,
+ (void(*)(void *))fileblobDestroy,
+ (void(*)(void *, const char *, const char *))fileblobSetFilename,
+ (void(*)(void *, const unsigned char *, size_t))fileblobAddData,
+ (void *(*)(text *, void *, int))textToFileblob,
+ (void(*)(void *, cli_ctx *))fileblobSetCTX,
+ destroy);
+ if(destroy && m->body_first) {
+ textDestroy(m->body_first);
+ m->body_first = m->body_last = NULL;
+ }
+ return fb;
+}
+
+/*
+ * Decode and transfer the contents of the message into a closed blob
+ * The caller must free the returned blob
+ */
+blob *
+messageToBlob(message *m, int destroy)
+{
+ blob *b;
+
+ cli_dbgmsg("messageToBlob\n");
+
+ b = messageExport(m, NULL,
+ (void *(*)(void))blobCreate,
+ (void(*)(void *))blobDestroy,
+ (void(*)(void *, const char *, const char *))blobSetFilename,
+ (void(*)(void *, const unsigned char *, size_t))blobAddData,
+ (void *(*)(text *, void *, int))textToBlob,
+ (void(*)(void *, cli_ctx *))NULL,
+ destroy);
+
+ if(destroy && m->body_first) {
+ textDestroy(m->body_first);
+ m->body_first = m->body_last = NULL;
+ }
+ return b;
+}
+
+/*
+ * Decode and transfer the contents of the message into a text area
+ * The caller must free the returned text
+ */
+text *
+messageToText(message *m)
+{
+ int i;
+ text *first = NULL, *last = NULL;
+ const text *t_line;
+
+ assert(m != NULL);
+
+ if(m->numberOfEncTypes == 0) {
+ /*
+ * Fast copy
+ */
+ for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
+ if(first == NULL)
+ first = last = cli_malloc(sizeof(text));
+ else {
+ last->t_next = cli_malloc(sizeof(text));
+ last = last->t_next;
+ }
+
+ if(last == NULL) {
+ if(first)
+ textDestroy(first);
+ return NULL;
+ }
+ if(t_line->t_line)
+ last->t_line = lineLink(t_line->t_line);
+ else
+ last->t_line = NULL; /* empty line */
+ }
+ if(last)
+ last->t_next = NULL;
+
+ return first;
+ }
+ /*
+ * Scan over the data a number of times once for each claimed encoding
+ * type
+ */
+ for(i = 0; i < m->numberOfEncTypes; i++) {
+ const encoding_type enctype = m->encodingTypes[i];
+
+ cli_dbgmsg("messageToText: export transfer method %d = %d\n",
+ i, (int)enctype);
+
+ switch(enctype) {
+ case NOENCODING:
+ case BINARY:
+ case EIGHTBIT:
+ /*
+ * Fast copy
+ */
+ for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next) {
+ if(first == NULL)
+ first = last = cli_malloc(sizeof(text));
+ else {
+ last->t_next = cli_malloc(sizeof(text));
+ last = last->t_next;
+ }
+
+ if(last == NULL) {
+ if(first) {
+ last->t_next = NULL;
+ textDestroy(first);
+ }
+ return NULL;
+ }
+ if(t_line->t_line)
+ last->t_line = lineLink(t_line->t_line);
+ else
+ last->t_line = NULL; /* empty line */
+ }
+ continue;
+ case UUENCODE:
+ cli_errmsg("messageToText: Unexpected attempt to handle uuencoded file - report to http://bugs.clamav.net\n");
+ if(first) {
+ last->t_next = NULL;
+ textDestroy(first);
+ }
+ return NULL;
+ case YENCODE:
+ t_line = yEncBegin(m);
+
+ if(t_line == NULL) {
+ /*cli_warnmsg("YENCODED attachment is missing begin statement\n");*/
+ if(first) {
+ last->t_next = NULL;
+ textDestroy(first);
+ }
+ return NULL;
+ }
+ t_line = t_line->t_next;
+ default:
+ if((i == 0) && binhexBegin(m))
+ cli_warnmsg("Binhex messages not supported yet.\n");
+ t_line = messageGetBody(m);
+ }
+
+ for(; t_line; t_line = t_line->t_next) {
+ unsigned char data[1024];
+ unsigned char *uptr;
+ const char *line = lineGetData(t_line->t_line);
+
+ if(enctype == BASE64)
+ /*
+ * ignore blanks - breaks RFC which is
+ * probably the point!
+ */
+ if(line == NULL)
+ continue;
+
+ assert((line == NULL) || (strlen(line) <= sizeof(data)));
+
+ uptr = decodeLine(m, enctype, line, data, sizeof(data));
+
+ if(uptr == NULL)
+ break;
+
+ assert(uptr <= &data[sizeof(data)]);
+
+ if(first == NULL)
+ first = last = cli_malloc(sizeof(text));
+ else {
+ last->t_next = cli_malloc(sizeof(text));
+ last = last->t_next;
+ }
+
+ if(last == NULL)
+ break;
+
+ /*
+ * If the decoded line is the same as the encoded
+ * there's no need to take a copy, just link it.
+ * Note that the comparison is done without the
+ * trailing newline that the decoding routine may have
+ * added - that's why there's a strncmp rather than a
+ * strcmp - that'd be bad for MIME decoders, but is OK
+ * for AV software
+ */
+ if((data[0] == '\n') || (data[0] == '\0'))
+ last->t_line = NULL;
+ else if(line && (strncmp((const char *)data, line, strlen(line)) == 0)) {
+#ifdef CL_DEBUG
+ cli_dbgmsg("messageToText: decoded line is the same(%s)\n", data);
+#endif
+ last->t_line = lineLink(t_line->t_line);
+ } else
+ last->t_line = lineCreate((char *)data);
+
+ if(line && enctype == BASE64)
+ if(strchr(line, '='))
+ break;
+ }
+ if(m->base64chars) {
+ unsigned char data[4];
+
+ memset(data, '\0', sizeof(data));
+ if(decode(m, NULL, data, base64, FALSE) && data[0]) {
+ if(first == NULL)
+ first = last = cli_malloc(sizeof(text));
+ else {
+ last->t_next = cli_malloc(sizeof(text));
+ last = last->t_next;
+ }
+
+ if(last != NULL)
+ last->t_line = lineCreate((char *)data);
+ }
+ m->base64chars = 0;
+ }
+ }
+
+ if(last)
+ last->t_next = NULL;
+
+ return first;
+}
+
+text *
+yEncBegin(message *m)
+{
+ return m->yenc;
+}
+
+/*
+ * Scan to find the BINHEX message (if any)
+ */
+#if 0
+const text *
+binhexBegin(message *m)
+{
+ const text *t_line;
+
+ for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
+ if(strcasecmp(t_line->t_text, "(This file must be converted with BinHex 4.0)") == 0)
+ return t_line;
+
+ return NULL;
+}
+#else
+text *
+binhexBegin(message *m)
+{
+ return m->binhex;
+}
+#endif
+
+/*
+ * Scan to find a bounce message. There is no standard for these, not
+ * even a convention, so don't expect this to be foolproof
+ */
+#if 0
+text *
+bounceBegin(message *m)
+{
+ const text *t_line;
+
+ for(t_line = messageGetBody(m); t_line; t_line = t_line->t_next)
+ if(cli_filetype(t_line->t_text, strlen(t_line->t_text)) == CL_TYPE_MAIL)
+ return t_line;
+
+ return NULL;
+}
+#else
+text *
+bounceBegin(message *m)
+{
+ return m->bounce;
+}
+#endif
+
+/*
+ * If a message doesn't not contain another message which could be harmful
+ * it is deemed to be safe.
+ *
+ * TODO: ensure nothing can get through this
+ *
+ * TODO: check to see if we need to
+ * find anything else, perhaps anything
+ * from the RFC821 table?
+ */
+#if 0
+int
+messageIsAllText(const message *m)
+{
+ const text *t;
+
+ for(t = messageGetBody(m); t; t = t->t_next)
+ if(strncasecmp(t->t_text,
+ "Content-Transfer-Encoding",
+ strlen("Content-Transfer-Encoding")) == 0)
+ return 0;
+
+ return 1;
+}
+#else
+text *
+encodingLine(message *m)
+{
+ return m->encoding;
+}
+#endif
+
+void
+messageClearMarkers(message *m)
+{
+ m->encoding = m->bounce = m->binhex = NULL;
+}
+
+/*
+ * Decode a line and add it to a buffer, return the end of the buffer
+ * to help appending callers. There is no new line at the end of "line"
+ *
+ * len is sizeof(ptr)
+ */
+unsigned char *
+decodeLine(message *m, encoding_type et, const char *line, unsigned char *buf, size_t buflen)
+{
+ size_t len, reallen;
+ bool softbreak;
+ char *p2, *copy;
+ char base64buf[RFC2045LENGTH + 1];
+
+ /*cli_dbgmsg("decodeLine(et = %d buflen = %u)\n", (int)et, buflen);*/
+
+ assert(m != NULL);
+ assert(buf != NULL);
+
+ switch(et) {
+ case BINARY:
+ /*
+ * TODO: find out what this is, encoded as binary??
+ */
+ /* fall through */
+ case NOENCODING:
+ case EIGHTBIT:
+ default: /* unknown encoding type - try our best */
+ if(line) /* empty line? */
+ buf = (unsigned char *)cli_strrcpy((char *)buf, line);
+ /* Put the new line back in */
+ return (unsigned char *)cli_strrcpy((char *)buf, "\n");
+
+ case QUOTEDPRINTABLE:
+ if(line == NULL) { /* empty line */
+ *buf++ = '\n';
+ break;
+ }
+
+ softbreak = FALSE;
+ while(buflen && *line) {
+ if(*line == '=') {
+ unsigned char byte;
+
+ if((*++line == '\0') || (*line == '\n')) {
+ softbreak = TRUE;
+ /* soft line break */
+ break;
+ }
+
+ byte = hex(*line);
+
+ if((*++line == '\0') || (*line == '\n')) {
+ /*
+ * broken e-mail, not
+ * adhering to RFC2045
+ */
+ *buf++ = byte;
+ break;
+ }
+
+ /*
+ * Fix by Torok Edvin
+ * <edwintorok at gmail.com>
+ * Handle messages that use a broken
+ * quoted-printable encoding of
+ * href=\"http://, instead of =3D
+ */
+ if(byte != '=') {
+ byte <<= 4;
+ byte += hex(*line);
+ } else
+ line -= 2;
+
+ *buf++ = byte;
+ } else
+ *buf++ = *line;
+ ++line;
+ --buflen;
+ }
+ if(!softbreak)
+ /* Put the new line back in */
+ *buf++ = '\n';
+ break;
+
+ case BASE64:
+ if(line == NULL)
+ break;
+ /*
+ * RFC2045 sets the maximum length to 76 bytes
+ * but many e-mail clients ignore that
+ */
+ if(strlen(line) < sizeof(base64buf)) {
+ strcpy(base64buf, line);
+ copy = base64buf;
+ } else {
+ copy = cli_strdup(line);
+ if(copy == NULL)
+ break;
+ }
+
+ p2 = strchr(copy, '=');
+ if(p2)
+ *p2 = '\0';
+
+ sanitiseBase64(copy);
+
+ /*
+ * Klez doesn't always put "=" on the last line
+ */
+ buf = decode(m, copy, buf, base64, (p2 == NULL) && ((strlen(copy) & 3) == 0));
+
+ if(copy != base64buf)
+ free(copy);
+ break;
+
+ case UUENCODE:
+ assert(m->base64chars == 0);
+
+ if((line == NULL) || (*line == '\0')) /* empty line */
+ break;
+ if(strcasecmp(line, "end") == 0)
+ break;
+ if(isuuencodebegin(line))
+ break;
+
+ if((line[0] & 0x3F) == ' ')
+ break;
+
+ /*
+ * reallen contains the number of bytes that were
+ * encoded
+ */
+ reallen = (size_t)uudecode(*line++);
+ if(reallen <= 0)
+ break;
+ if(reallen > 62)
+ break;
+ len = strlen(line);
+
+ if((len > buflen) || (reallen > len))
+ /*
+ * In practice this should never occur since
+ * the maximum length of a uuencoded line is
+ * 62 characters
+ */
+ cli_warnmsg("uudecode: buffer overflow stopped, attempting to ignore but decoding may fail\n");
+ else {
+ (void)decode(m, line, buf, uudecode, (len & 3) == 0);
+ buf = &buf[reallen];
+ }
+ m->base64chars = 0; /* this happens with broken uuencoded files */
+ break;
+ case YENCODE:
+ if((line == NULL) || (*line == '\0')) /* empty line */
+ break;
+ if(strncmp(line, "=yend ", 6) == 0)
+ break;
+
+ while(*line)
+ if(*line == '=') {
+ if(*++line == '\0')
+ break;
+ *buf++ = ((*line++ - 64) & 255);
+ } else
+ *buf++ = ((*line++ - 42) & 255);
+ break;
+ }
+
+ *buf = '\0';
+ return buf;
+}
+
+/*
+ * Remove the non base64 characters such as spaces from a string. Spaces
+ * shouldn't appear mid string in base64 files, but some broken mail clients
+ * ignore such errors rather than discarding the mail, and virus writers
+ * exploit this bug
+ */
+static void
+sanitiseBase64(char *s)
+{
+ /*cli_dbgmsg("sanitiseBase64 '%s'\n", s);*/
+ for(; *s; s++)
+ if(base64Table[(unsigned int)(*s & 0xFF)] == 255) {
+ char *p1;
+
+ for(p1 = s; p1[0] != '\0'; p1++)
+ p1[0] = p1[1];
+ --s;
+ }
+}
+
+/*
+ * Returns one byte after the end of the decoded data in "out"
+ *
+ * Update m->base64chars with the last few bytes of data that we haven't
+ * decoded. After the last line is found, decode will be called with in = NULL
+ * to flush these out
+ */
+static unsigned char *
+decode(message *m, const char *in, unsigned char *out, unsigned char (*decoder)(char), bool isFast)
+{
+ unsigned char b1, b2, b3, b4;
+ unsigned char cb1, cb2, cb3; /* carried over from last line */
+
+ /*cli_dbgmsg("decode %s (len %d isFast %d base64chars %d)\n", in,
+ in ? strlen(in) : 0,
+ isFast, m->base64chars);*/
+
+ cb1 = cb2 = cb3 = '\0';
+
+ switch(m->base64chars) {
+ case 3:
+ cb3 = m->base64_3;
+ /* FALLTHROUGH */
+ case 2:
+ cb2 = m->base64_2;
+ /* FALLTHROUGH */
+ case 1:
+ cb1 = m->base64_1;
+ isFast = FALSE;
+ break;
+ default:
+ assert(m->base64chars <= 3);
+ }
+
+ if(isFast)
+ /* Fast decoding if not last line */
+ while(*in) {
+ b1 = (*decoder)(*in++);
+ b2 = (*decoder)(*in++);
+ b3 = (*decoder)(*in++);
+ /*
+ * Put this line here to help on some compilers which
+ * can make use of some architecure's ability to
+ * multiprocess when different variables can be
+ * updated at the same time - here b3 is used in
+ * one line, b1/b2 in the next and b4 in the next after
+ * that, b3 and b4 rely on in but b1/b2 don't
+ */
+ *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+ b4 = (*decoder)(*in++);
+ *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+ *out++ = (b3 << 6) | (b4 & 0x3F);
+ }
+ else if(in == NULL) { /* flush */
+ int nbytes;
+
+ if(m->base64chars == 0)
+ return out;
+
+ cli_dbgmsg("base64chars = %d (%c %c %c)\n", m->base64chars,
+ isalnum(cb1) ? cb1 : '@',
+ isalnum(cb2) ? cb2 : '@',
+ isalnum(cb3) ? cb3 : '@');
+
+ m->base64chars--;
+ b1 = cb1;
+ nbytes = 1;
+
+ if(m->base64chars) {
+ m->base64chars--;
+ b2 = cb2;
+
+ if(m->base64chars) {
+ nbytes = 2;
+ m->base64chars--;
+ b3 = cb3;
+ nbytes = 3;
+ } else if(b2)
+ nbytes = 2;
+ }
+
+ switch(nbytes) {
+ case 3:
+ b4 = '\0';
+ /* fall through */
+ case 4:
+ *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+ *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+ if((nbytes == 4) || b3)
+ *out++ = (b3 << 6) | (b4 & 0x3F);
+ break;
+ case 2:
+ *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+ if((b2 << 4) & 0xFF)
+ *out++ = b2 << 4;
+ break;
+ case 1:
+ *out++ = b1 << 2;
+ break;
+ default:
+ assert(0);
+ }
+ } else while(*in) {
+ int nbytes;
+
+ if(m->base64chars) {
+ m->base64chars--;
+ b1 = cb1;
+ } else
+ b1 = (*decoder)(*in++);
+
+ if(*in == '\0') {
+ b2 = '\0';
+ nbytes = 1;
+ } else {
+ if(m->base64chars) {
+ m->base64chars--;
+ b2 = cb2;
+ } else
+ b2 = (*decoder)(*in++);
+
+ if(*in == '\0') {
+ b3 = '\0';
+ nbytes = 2;
+ } else {
+ if(m->base64chars) {
+ m->base64chars--;
+ b3 = cb3;
+ } else
+ b3 = (*decoder)(*in++);
+
+ if(*in == '\0') {
+ b4 = '\0';
+ nbytes = 3;
+ } else {
+ b4 = (*decoder)(*in++);
+ nbytes = 4;
+ }
+ }
+ }
+
+ switch(nbytes) {
+ case 4:
+ *out++ = (b1 << 2) | ((b2 >> 4) & 0x3);
+ *out++ = (b2 << 4) | ((b3 >> 2) & 0xF);
+ *out++ = (b3 << 6) | (b4 & 0x3F);
+ continue;
+ case 3:
+ m->base64_3 = b3;
+ case 2:
+ m->base64_2 = b2;
+ case 1:
+ m->base64_1 = b1;
+ m->base64chars = nbytes;
+ break;
+ default:
+ assert(0);
+ }
+ break; /* nbytes != 4 => EOL */
+ }
+ return out;
+}
+
+static unsigned char
+hex(char c)
+{
+ if(isdigit(c))
+ return c - '0';
+ if((c >= 'A') && (c <= 'F'))
+ return c - 'A' + 10;
+ if((c >= 'a') && (c <= 'f'))
+ return c - 'a' + 10;
+ cli_dbgmsg("Illegal hex character '%c'\n", c);
+
+ /*
+ * Some mails (notably some spam) break RFC2045 by failing to encode
+ * the '=' character
+ */
+ return '=';
+}
+
+static unsigned char
+base64(char c)
+{
+ const unsigned char ret = base64Table[(unsigned int)(c & 0xFF)];
+
+ if(ret == 255) {
+ /*cli_dbgmsg("Illegal character <%c> in base64 encoding\n", c);*/
+ return 63;
+ }
+ return ret;
+}
+
+static unsigned char
+uudecode(char c)
+{
+ return c - ' ';
+}
+
+/*
+ * These are the only arguments we're interested in.
+ * Do 'fgrep messageFindArgument *.c' if you don't believe me!
+ * It's probably not good doing this since each time a new
+ * messageFindArgument is added I need to remember to look here,
+ * but it can save a lot of memory...
+ */
+static int
+usefulArg(const char *arg)
+{
+ if((strncasecmp(arg, "name", 4) != 0) &&
+ (strncasecmp(arg, "filename", 8) != 0) &&
+ (strncasecmp(arg, "boundary", 8) != 0) &&
+ (strncasecmp(arg, "protocol", 8) != 0) &&
+ (strncasecmp(arg, "id", 2) != 0) &&
+ (strncasecmp(arg, "number", 6) != 0) &&
+ (strncasecmp(arg, "total", 5) != 0) &&
+ (strncasecmp(arg, "type", 4) != 0)) {
+ cli_dbgmsg("Discarding unwanted argument '%s'\n", arg);
+ return 0;
+ }
+ return 1;
+}
+
+void
+messageSetCTX(message *m, cli_ctx *ctx)
+{
+ m->ctx = ctx;
+}
+
+int
+messageContainsVirus(const message *m)
+{
+ return m->isInfected ? TRUE : FALSE;
+}
+
+/*
+ * We've run out of memory. Try to recover some by
+ * deduping the message
+ *
+ * FIXME: this can take a long time. The real solution is for system admins
+ * to refrain from setting ulimits too low, then this routine won't be
+ * called
+ */
+static void
+messageDedup(message *m)
+{
+ const text *t1;
+ size_t saved = 0;
+
+ cli_dbgmsg("messageDedup\n");
+
+ t1 = m->dedupedThisFar ? m->dedupedThisFar : m->body_first;
+
+ for(t1 = m->body_first; t1; t1 = t1->t_next) {
+ const char *d1;
+ text *t2;
+ line_t *l1;
+ unsigned int r1;
+
+ if(saved >= 100*1000)
+ break; /* that's enough */
+ l1 = t1->t_line;
+ if(l1 == NULL)
+ continue;
+ d1 = lineGetData(l1);
+ if(strlen(d1) < 8)
+ continue; /* wouldn't recover many bytes */
+
+ r1 = (unsigned int)lineGetRefCount(l1);
+ if(r1 == 255)
+ continue;
+ /*
+ * We don't want to foul up any pointers
+ */
+ if(t1 == m->encoding)
+ continue;
+ if(t1 == m->bounce)
+ continue;
+ if(t1 == m->binhex)
+ continue;
+ if(t1 == m->yenc)
+ continue;
+
+ for(t2 = t1->t_next; t2; t2 = t2->t_next) {
+ const char *d2;
+ line_t *l2 = t2->t_line;
+
+ if(l2 == NULL)
+ continue;
+ d2 = lineGetData(l2);
+ if(d1 == d2)
+ /* already linked */
+ continue;
+ if(strcmp(d1, d2) == 0) {
+ if(lineUnlink(l2) == NULL)
+ saved += strlen(d1) + 1;
+ t2->t_line = lineLink(l1);
+ if(t2->t_line == NULL) {
+ cli_errmsg("messageDedup: out of memory\n");
+ return;
+ }
+ if(++r1 == 255)
+ break;
+ }
+ }
+ }
+
+ cli_dbgmsg("messageDedup reclaimed %lu bytes\n", (unsigned long)saved);
+ m->dedupedThisFar = t1;
+}
+
+/*
+ * Handle RFC2231 encoding. Returns a malloc'd buffer that the caller must
+ * free, or NULL on error.
+ *
+ * TODO: Currently only handles paragraph 4 of RFC2231 e.g.
+ * protocol*=ansi-x3.4-1968''application%2Fpgp-signature;
+ */
+static char *
+rfc2231(const char *in)
+{
+ const char *ptr;
+ char *ret, *out;
+ enum { LANGUAGE, CHARSET, CONTENTS } field;
+
+ if(strstr(in, "*0*=") != NULL) {
+ cli_warnmsg("RFC2231 parameter continuations are not yet handled\n");
+ return cli_strdup(in);
+ }
+
+ ptr = strstr(in, "*0=");
+ if(ptr != NULL)
+ /*
+ * Parameter continuation, with no continuation
+ * Thunderbird 1.5 (and possibly other versions) does this
+ */
+ field = CONTENTS;
+ else {
+ ptr = strstr(in, "*=");
+ field = LANGUAGE;
+ }
+
+ if(ptr == NULL) /* quick return */
+ return cli_strdup(in);
+
+ cli_dbgmsg("rfc2231 '%s'\n", in);
+
+ ret = cli_malloc(strlen(in) + 1);
+
+ if(ret == NULL)
+ return NULL;
+
+ /*
+ * memcpy(out, in, (ptr - in));
+ * out = &out[ptr - in];
+ * in = ptr;
+ */
+ out = ret;
+ while(in != ptr)
+ *out++ = *in++;
+
+ *out++ = '=';
+
+ while(*ptr++ != '=')
+ ;
+
+ /*
+ * We don't do anything with the language and character set, just skip
+ * over them!
+ */
+ while(*ptr) {
+ switch(field) {
+ case LANGUAGE:
+ if(*ptr == '\'')
+ field = CHARSET;
+ break;
+ case CHARSET:
+ if(*ptr == '\'')
+ field = CONTENTS;
+ break;
+ case CONTENTS:
+ if(*ptr == '%') {
+ unsigned char byte;
+
+ if((*++ptr == '\0') || (*ptr == '\n'))
+ break;
+
+ byte = hex(*ptr);
+
+ if((*++ptr == '\0') || (*ptr == '\n')) {
+ *out++ = byte;
+ break;
+ }
+
+ byte <<= 4;
+ byte += hex(*ptr);
+ *out++ = byte;
+ } else
+ *out++ = *ptr;
+ }
+ if(*ptr++ == '\0')
+ /*
+ * Incorrect message that has just one character after
+ * a '%'.
+ * FIXME: stash something in out that would, for example
+ * treat %2 as %02, assuming field == CONTENTS
+ */
+ break;
+ }
+
+ if(field != CONTENTS) {
+ free(ret);
+ cli_warnmsg("Invalid RFC2231 header: '%s'\n", in);
+ return cli_strdup("");
+ }
+
+ *out = '\0';
+
+ cli_dbgmsg("rfc2231 returns '%s'\n", ret);
+
+ return ret;
+}
+
+/*
+ * common/simil:
+ * From Computing Magazine 20/8/92
+ * Returns %ge number from 0 to 100 - how similar are 2 strings?
+ * 100 for exact match, < for error
+ */
+struct pstr_list { /* internal stack */
+ char *d1;
+ struct pstr_list *next;
+};
+
+#define OUT_OF_MEMORY (-2)
+#define FAILURE (-3)
+#define SUCCESS (-4)
+#define ARRAY_OVERFLOW (-5)
+typedef struct pstr_list ELEMENT1;
+typedef ELEMENT1 *LINK1;
+
+static int push(LINK1 *top, const char *string);
+static int pop(LINK1 *top, char *buffer);
+static unsigned int compare(char *ls1, char **rs1, char *ls2, char **rs2);
+
+#define MAX_PATTERN_SIZ 50 /* maximum string lengths */
+
+static int
+simil(const char *str1, const char *str2)
+{
+ LINK1 top = NULL;
+ unsigned int score = 0;
+ size_t common, total;
+ size_t len1, len2;
+ char *rs1 = NULL, *rs2 = NULL;
+ char *s1, *s2;
+ char ls1[MAX_PATTERN_SIZ], ls2[MAX_PATTERN_SIZ];
+
+ if(strcasecmp(str1, str2) == 0)
+ return 100;
+
+ if((s1 = cli_strdup(str1)) == NULL)
+ return OUT_OF_MEMORY;
+ if((s2 = cli_strdup(str2)) == NULL) {
+ free(s1);
+ return OUT_OF_MEMORY;
+ }
+
+ if(((total = strstrip(s1)) > MAX_PATTERN_SIZ - 1) || ((len2 = strstrip(s2)) > MAX_PATTERN_SIZ - 1)) {
+ free(s1);
+ free(s2);
+ return ARRAY_OVERFLOW;
+ }
+
+ total += len2;
+
+ if((push(&top, s1) == OUT_OF_MEMORY) ||
+ (push(&top, s2) == OUT_OF_MEMORY)) {
+ free(s1);
+ free(s2);
+ return OUT_OF_MEMORY;
+ }
+
+ while(pop(&top, ls2) == SUCCESS) {
+ pop(&top, ls1);
+ common = compare(ls1, &rs1, ls2, &rs2);
+ if(common > 0) {
+ score += (unsigned int)common;
+ len1 = strlen(ls1);
+ len2 = strlen(ls2);
+
+ if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
+ if((push(&top, ls1) == OUT_OF_MEMORY) || (push(&top, ls2) == OUT_OF_MEMORY)) {
+ free(s1);
+ free(s2);
+ return OUT_OF_MEMORY;
+ }
+ len1 = strlen(rs1);
+ len2 = strlen(rs2);
+
+ if((len1 > 1 && len2 >= 1) || (len2 > 1 && len1 >= 1))
+ if((push(&top, rs1) == OUT_OF_MEMORY) || (push(&top, rs2) == OUT_OF_MEMORY)) {
+ free(s1);
+ free(s2);
+ return OUT_OF_MEMORY;
+ }
+ }
+ }
+ free(s1);
+ free(s2);
+ return (total > 0) ? ((score * 200) / total) : 0;
+}
+
+static unsigned int
+compare(char *ls1, char **rs1, char *ls2, char **rs2)
+{
+ unsigned int common, maxchars = 0;
+ bool some_similarity = FALSE;
+ char *s1, *s2;
+ char *maxs1 = NULL, *maxs2 = NULL, *maxe1 = NULL, *maxe2 = NULL;
+ char *cs1, *cs2, *start1, *end1, *end2;
+
+ end1 = ls1 + strlen(ls1);
+ end2 = ls2 + strlen(ls2);
+ start1 = ls1;
+
+ for(;;) {
+ s1 = start1;
+ s2 = ls2;
+
+ if(s1 < end1) {
+ while(s1 < end1 && s2 < end2) {
+ if(tolower(*s1) == tolower(*s2)) {
+ some_similarity = TRUE;
+ cs1 = s1;
+ cs2 = s2;
+ common = 0;
+ do
+ if(s1 == end1 || s2 == end2)
+ break;
+ else {
+ s1++;
+ s2++;
+ common++;
+ }
+ while(tolower(*s1) == tolower(*s2));
+
+ if(common > maxchars) {
+ unsigned int diff = common - maxchars;
+ maxchars = common;
+ maxs1 = cs1;
+ maxs2 = cs2;
+ maxe1 = s1;
+ maxe2 = s2;
+ end1 -= diff;
+ end2 -= diff;
+ } else
+ s1 -= common;
+ } else
+ s2++;
+ }
+ start1++;
+ } else
+ break;
+ }
+ if(some_similarity) {
+ *maxs1 = '\0';
+ *maxs2 = '\0';
+ *rs1 = maxe1;
+ *rs2 = maxe2;
+ }
+ return maxchars;
+}
+
+static int
+push(LINK1 *top, const char *string)
+{
+ LINK1 element;
+
+ if((element = (LINK1)cli_malloc(sizeof(ELEMENT1))) == NULL)
+ return OUT_OF_MEMORY;
+ if((element->d1 = cli_strdup(string)) == NULL)
+ return OUT_OF_MEMORY;
+ element->next = *top;
+ *top = element;
+
+ return SUCCESS;
+}
+
+static int
+pop(LINK1 *top, char *buffer)
+{
+ LINK1 t1;
+
+ if((t1 = *top) != NULL) {
+ (void)strcpy(buffer, t1->d1);
+ *top = t1->next;
+ free(t1->d1);
+ free((char *)t1);
+ return SUCCESS;
+ }
+ return FAILURE;
+}
+
+/*
+ * Have we found a line that is a start of a uuencoded file (see uuencode(5))?
+ */
+int
+isuuencodebegin(const char *line)
+{
+ if(line[0] != 'b') /* quick check */
+ return 0;
+
+ if(strlen(line) < 10)
+ return 0;
+
+ return (strncasecmp(line, "begin ", 6) == 0) &&
+ isdigit(line[6]) && isdigit(line[7]) &&
+ isdigit(line[8]) && (line[9] == ' ');
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mew.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,890 @@
+/*
+ * Copyright (C) 2006 Michal 'GiM' Spadlinski http://gim.org.pl/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+/*
+ * lzma.c
+ *
+ * o2:28:18 CEST 2oo6-25-o6 - initial 0xA4/0x536
+ * oo:29:4o CEST 2oo6-26-o6 - 0x1cd/0x536 [+0x129]
+ * o2:13:19 CEST 2oo6-o1-o7, 2oo6-3o-o6 - 0x536/0x536
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+
+#include "cltypes.h"
+#include "pe.h"
+#include "others.h"
+#include "mew.h"
+#include "packlibs.h"
+#include "rebuildpe.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define CE32(x) be32_to_host(x) /* Convert big endian to host */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+/* modifies all parameters */
+/* northfox does this shitty way,
+ * this should be done with just a bswap
+ */
+static char *lzma_bswap_4861dc(struct lzmastate *p, char *old_edx)
+{
+ /* dumb_dump_start
+ *
+
+ old_edx was 'uint32_t *' before and in mew_lzma there was
+ &new_edx where new_edx = var1C
+
+ uint32_t loc_esi, loc_edi;
+ uint8_t *loc_eax;
+
+ p->p2 = loc_esi = 0;
+ p->p0 = loc_eax = (uint8_t *)*old_edx;
+ *old_edx = 5;
+ do {
+ loc_esi = p->p2 << 8;
+ loc_edi = *(uint8_t *)((loc_eax)++);
+ loc_esi |= loc_edi;
+ (*old_edx)--;
+ p->p2 = loc_esi;
+ } while (*old_edx);
+ p->p0 = loc_eax;
+ p->p1 = 0xffffffff;
+
+ * dumb_dump_end
+ */
+
+ /* XXX, mine replacement */
+ p->p2 = EC32(CE32(((uint32_t)cli_readint32(old_edx + 1))));
+ p->p1 = 0xffffffff;
+ p->p0 = old_edx + 5;
+
+ return p->p0;
+}
+
+static uint32_t lzma_486248 (struct lzmastate *p, char **old_ecx, char *src, uint32_t size)
+{
+ uint32_t loc_esi, loc_edi, loc_eax, loc_ecx, ret;
+ if (!CLI_ISCONTAINED(src, size, *old_ecx, 4) || !CLI_ISCONTAINED(src, size, p->p0, 1))
+ return 0xffffffff;
+ loc_esi = p->p1;
+ loc_eax = loc_esi >> 0xb;
+ loc_ecx = cli_readint32(*old_ecx);
+ ret = loc_ecx&0xffff;
+ (loc_eax) *= ret;
+ loc_edi = p->p2;
+ if (loc_edi < loc_eax)
+ {
+ /* 48625f */
+ p->p1 = loc_eax;
+ loc_esi = ret;
+ loc_edi = ((int32_t)(0x800 - ret) >> 5) + ((loc_eax&0xffff0000) | ret);
+ /* signed<-sar, &|<-mov ax, [ecx] */
+ loc_ecx = (loc_ecx&0xffff0000)|(loc_edi&0xffff);
+ cli_writeint32(*old_ecx, loc_ecx);
+
+ ret = 0;
+ } else {
+ /* 48629e */
+ loc_esi -= loc_eax;
+ loc_edi -= loc_eax;
+ p->p1 = loc_esi;
+ p->p2 = loc_edi;
+ loc_eax = (loc_eax & 0xffff0000) | ret;
+ loc_esi = (loc_esi & 0xffff0000) | (ret >> 5);
+ loc_eax -= loc_esi;
+
+ loc_ecx = (loc_ecx&0xffff0000)|(loc_eax&0xffff);
+ cli_writeint32(*old_ecx, loc_ecx);
+
+ ret = 1;
+ }
+ loc_eax = p->p1;
+ if (loc_eax < 0x1000000)
+ {
+ *old_ecx = p->p0;
+ loc_edi = (*(uint8_t *)(p->p0));
+ loc_esi = ((p->p2) << 8) | loc_edi;
+ (*old_ecx)++;
+ loc_eax <<= 8;
+ p->p2 = loc_esi;
+ p->p1 = loc_eax;
+ p->p0 = *old_ecx;
+ }
+ return ret;
+
+}
+
+static uint32_t lzma_48635C(uint8_t znaczek, char **old_ecx, struct lzmastate *p, uint32_t *retval, char *src, uint32_t size)
+{
+ uint32_t loc_esi = (znaczek&0xff) >> 7, /* msb */
+ loc_ebx, ret;
+ char *loc_edi;
+ znaczek <<= 1;
+ ret = loc_esi << 9;
+ loc_edi = *old_ecx;
+ *old_ecx = loc_edi + ret + 0x202;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return 0xffffffff;
+ loc_ebx = ret | 2;
+
+ while (loc_esi == ret)
+ {
+ if (loc_ebx >= 0x100)
+ {
+ ret = (ret&0xffffff00) | (loc_ebx&0xff);
+ *retval = ret;
+ return 0;
+ }
+ loc_esi = (znaczek&0xff) >> 7;
+ znaczek <<= 1;
+ ret = ((loc_esi + 1) << 8) + loc_ebx;
+ *old_ecx = loc_edi + ret*2;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return 0xffffffff;
+ loc_ebx += loc_ebx;
+ loc_ebx |= ret;
+ }
+ loc_esi = 0x100;
+ while (loc_ebx < loc_esi)
+ {
+ loc_ebx += loc_ebx;
+ *old_ecx = loc_edi + loc_ebx;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return 0xffffffff;
+ loc_ebx |= ret;
+ }
+ ret = (ret&0xffffff00) | (loc_ebx&0xff);
+ *retval = ret;
+ return 0;
+}
+
+static uint32_t lzma_4862e0 (struct lzmastate *p, char **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+ uint32_t loc_ebx, loc_esi, stack_ecx, ret;
+ char *loc_edi;
+
+ loc_ebx = *old_edx;
+ ret = 1;
+ loc_edi = *old_ecx;
+ if (loc_ebx && !(loc_ebx&0x80000000))
+ {
+ /* loc_4862f1 */
+ stack_ecx = loc_ebx;
+ do {
+ loc_esi = ret+ret;
+ *old_ecx = loc_edi + loc_esi;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return 0xffffffff;
+ ret += loc_esi;
+ stack_ecx--;
+ } while (stack_ecx);
+ }
+ /* loc_48630b */
+ /* unneeded
+ *old_ecx = (uint8_t *)loc_ebx;
+ */
+
+ *old_edx = 1 << (loc_ebx&0xff);
+ ret -= *old_edx;
+ *retval = ret;
+ return 0;
+}
+
+/* old_edx - write only */
+static uint32_t lzma_4863da (uint32_t var0, struct lzmastate *p, char **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+ uint32_t ret;
+ char *loc_esi = *old_ecx;
+
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return -1;
+ if (ret)
+ {
+ /* loc_4863ff */
+ *old_ecx = loc_esi+2;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return -1;
+ if (ret)
+ {
+ /* loc_486429 */
+ *old_edx = 8;
+ *old_ecx = loc_esi + 0x204;
+ if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+ return -1;
+ ret += 0x10;
+ } else {
+ /* loc_48640e */
+ ret = var0 << 4;
+ *old_edx = 3;
+ *old_ecx = loc_esi + 0x104 + ret;
+ if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+ return -1;
+ ret += 0x8;
+ }
+ } else {
+ /* loc_4863e9 */
+ ret = var0 << 4;
+ *old_edx = 3;
+ *old_ecx = loc_esi + 0x4 + ret;
+ if (lzma_4862e0 (p, old_ecx, old_edx, &ret, src, size) == 0xffffffff)
+ return -1;
+ }
+ *retval = ret;
+ return 0;
+}
+
+static uint32_t lzma_486204 (struct lzmastate *p, uint32_t old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+ uint32_t loc_esi, loc_edi, loc_ebx, loc_eax;
+ char *loc_edx;
+ loc_esi = p->p1;
+ loc_edi = p->p2;
+ loc_eax = 0;
+ if (old_edx && !(old_edx&0x80000000))
+ {
+ /* loc_4866212 */
+ loc_ebx = old_edx;
+ do {
+ loc_esi >>= 1;
+ loc_eax <<= 1;
+ if (loc_edi >= loc_esi)
+ {
+ loc_edi -= loc_esi;
+ loc_eax |= 1;
+ }
+ /* loc_486222 */
+ if (loc_esi < 0x1000000)
+ {
+ if (!CLI_ISCONTAINED(src, size, p->p0, 1))
+ return 0xffffffff;
+ loc_edx = p->p0;
+ loc_edi <<= 8;
+ loc_esi <<= 8;
+ loc_edi |= (*loc_edx)&0xff; /* movzx ebp, byte ptr [edx] */
+ p->p0 = ++loc_edx;
+ }
+ loc_ebx--;
+ } while (loc_ebx);
+
+ }
+ p->p2 = loc_edi;
+ p->p1 = loc_esi;
+ *retval = loc_eax;
+ return 0;
+}
+
+static uint32_t lzma_48631a (struct lzmastate *p, char **old_ecx, uint32_t *old_edx, uint32_t *retval, char *src, uint32_t size)
+{
+ uint32_t copy1, copy2;
+ uint32_t loc_esi, loc_edi, ret;
+ char *loc_ebx;
+
+ copy1 = *old_edx;
+ loc_edi = 0;
+ loc_ebx = *old_ecx;
+ *old_edx = 1;
+ copy2 = (uint32_t)loc_edi;
+
+ if (copy1 <= (uint32_t)loc_edi)
+ {
+ *retval = copy2;
+ return 0;
+ }
+
+ do {
+ loc_esi = *old_edx + *old_edx;
+ *old_ecx = loc_esi + loc_ebx;
+ if ((ret = lzma_486248 (p, old_ecx, src, size)) == 0xffffffff)
+ return 0xffffffff;
+ /* unneeded *old_ecx = loc_edi; */
+ *old_edx = loc_esi + ret;
+ /* ret <<= (uint32_t)(*old_ecx)&0xff; */
+ ret <<= (loc_edi&0xff);
+ copy2 |= ret;
+ loc_edi++;
+ } while (loc_edi < copy1);
+
+ *retval = copy2;
+ return 0;
+}
+
+
+int mew_lzma(char *orgsource, char *buf, uint32_t size_sum, uint32_t vma, uint32_t special)
+{
+ uint32_t var08, var0C, var10, var14, var20, var24, var28, var34;
+ struct lzmastate var40;
+ uint32_t new_eax, new_edx, temp;
+ int i, mainloop;
+
+ char var1, var30;
+ char *source = buf, *dest, *new_ebx, *new_ecx, *var0C_ecxcopy, *var2C;
+ char *pushed_esi = NULL, *pushed_ebx = NULL;
+ uint32_t pushed_edx=0;
+
+ uint32_t loc_esi, loc_edi;
+ uint8_t *var18;
+
+ if (special)
+ {
+ pushed_edx = cli_readint32(source);
+ source += 4;
+ }
+ temp = cli_readint32(source) - vma;
+ source += 4;
+ if (!special) pushed_ebx = source;
+ new_ebx = orgsource + temp;
+
+ do {
+ mainloop = 1;
+ do {
+ /* loc_486450 */
+ if (!special)
+ {
+ source = pushed_ebx;
+ if (cli_readint32(source) == 0)
+ {
+ return 0;
+ }
+ }
+ var28 = cli_readint32 (source);
+ source += 4;
+ temp = cli_readint32 (source) - vma;
+ var18 = orgsource + temp;
+ if (special) pushed_esi = orgsource + temp;
+ source += 4;
+ temp = cli_readint32 (source);
+ source += 5; /* yes, five */
+ var2C = source;
+ source += temp;
+ if (special) pushed_ebx = source;
+ else pushed_ebx = source;
+ var1 = 0;
+ dest = new_ebx;
+
+ if(!CLI_ISCONTAINED(orgsource, size_sum, dest, 0x6E6C))
+ return -1;
+ for (i=0; i<0x1b9b; i++)
+ {
+ cli_writeint32(dest, 0x4000400);
+ dest += 4;
+ }
+ loc_esi = 0;
+ var08 = var20 = 0;
+ loc_edi = 1;
+ var14 = var10 = var24 = 1;
+
+ lzma_bswap_4861dc(&var40, var2C);
+ new_edx = 0;
+ } while (var28 <= loc_esi); /* source = 0 */
+
+ cli_dbgmsg("MEWlzma: entering do while loop\n");
+ do {
+ /* loc_4864a5 */
+ new_eax = var08 & 3;
+ new_ecx = (((loc_esi << 4) + new_eax)*2) + new_ebx;
+ var0C = new_eax;
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (new_eax)
+ {
+ /* loc_486549 */
+ new_ecx = new_ebx + loc_esi*2 + 0x180;
+ var20 = 1;
+ /* eax=1 */
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (new_eax != 1)
+ {
+ /* loc_486627 */
+ var24 = var10;
+ var10 = var14;
+ /* xor eax,eax; cmp esi, 7; setnl al; dec eax; add eax, 0Ah */
+ /* new_eax = (((loc_esi >= 7)-1)&0xFFFFFFFD) + 0xA; */
+ new_eax = loc_esi>=7 ? 10:7;
+ new_ecx = new_ebx + 0x664;
+ var14 = loc_edi;
+ loc_esi = new_eax;
+ if (lzma_4863da (var0C, &var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ var0C = new_eax;
+ if (var0C >= 4)
+ new_eax = 3;
+
+ /* loc_486662 */
+ new_edx = 6;
+ new_eax <<= 7;
+ new_ecx = new_eax + new_ebx + 0x360;
+ if (lzma_4862e0 (&var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ if (new_eax < 4)
+ {
+ /* loc_4866ca */
+ loc_edi = new_eax;
+ } else {
+ /* loc_48667d */
+ uint32_t loc_ecx;
+ loc_ecx = ((int32_t)new_eax >> 1)-1; /* sar */
+ loc_edi = ((new_eax&1)|2) << (loc_ecx&0xff);
+ if (new_eax >= 0xe)
+ {
+ /* loc_4866ab */
+ new_edx = loc_ecx - 4;
+ if (lzma_486204 (&var40, new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ loc_edi += new_eax << 4;
+
+ new_edx = 4;
+ new_ecx = new_ebx + 0x644;
+ } else {
+ /* loc_486691 */
+ new_edx = loc_ecx;
+ loc_ecx = loc_edi - new_eax;
+ new_ecx = new_ebx + loc_ecx*2 + 0x55e;
+ }
+ /* loc_4866a2 */
+ if (lzma_48631a (&var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ loc_edi += new_eax;
+ }
+ loc_edi++;
+ } else {
+ /* loc_486568 */
+ new_ecx = new_ebx + loc_esi*2 + 0x198;
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (new_eax)
+ {
+ /* loc_4865bd */
+ new_ecx = new_ebx + loc_esi*2 + 0x1B0;
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (new_eax)
+ {
+ /* loc_4865d2 */
+ new_ecx = new_ebx + loc_esi*2 + 0x1C8;
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (new_eax) {
+ /* loc_4865ea */
+ new_eax = var24;
+ var24 = var10;
+ } else {
+ /* loc_4865e5 */
+ new_eax = var10;
+ }
+ /* loc_4865f3 */
+ var10 = var14;
+ } else {
+ /* loc_4865cd */
+ new_eax = var14;
+ }
+ /* loc_4865f9 */
+ var14 = loc_edi;
+ loc_edi = new_eax;
+ } else {
+ /* loc_48657e */
+ new_eax = ((loc_esi + 0xf) << 4) + var0C;
+ new_ecx = new_ebx + new_eax*2;
+ if ((new_eax = lzma_486248 (&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ if (!new_eax) {
+ uint32_t loc_ecx;
+ /* loc_486593 */
+ loc_ecx = var08;
+ loc_ecx -= loc_edi;
+ /* loc_esi = ((((loc_esi >= 7)-1)&0xFFFFFFFE) + 0xB); */
+ loc_esi = loc_esi>=7 ? 11:9;
+ if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + loc_ecx, 1))
+ return -1;
+ var1 = *(var18 + loc_ecx);
+ loc_ecx = (loc_ecx&0xffffff00) | var1;
+ /* loc_4865af */
+ new_edx = var08++;
+ if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_edx, 1))
+ return -1;
+ *(var18 + new_edx) = loc_ecx & 0xff;
+
+ /* loc_4866fe */
+ new_eax = var08;
+ continue; /* !!! */
+ }
+
+ }
+ /* loc_4865fe */
+ new_ecx = new_ebx + 0xa68;
+ if (lzma_4863da (var0C, &var40, &new_ecx, &new_edx, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ var0C = new_eax;
+ /* new_eax = (((loc_esi >= 7)-1)&0xFFFFFFFD) + 0xB; */
+ new_eax = loc_esi>=7 ? 11:8;
+ loc_esi = new_eax;
+ }
+ /* loc_4866cd */
+ if (!loc_edi)
+ {
+ break;
+ } else {
+ var0C += 2;
+ new_ecx = var18;
+ new_edx = new_eax = var08;
+ new_eax -= loc_edi;
+ if ( ((var0C < var28 - new_edx) &&
+ (!CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_eax), var0C) ||
+ !CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_edx), var0C))) ||
+ (!CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_eax), var28 - new_edx) ||
+ !CLI_ISCONTAINED(orgsource, size_sum, (char*)(new_ecx + new_edx), var28 - new_edx)) )
+ return -1;
+ do {
+ var1 = *(uint8_t *)(new_ecx + new_eax);
+ *(uint8_t *)(new_ecx + new_edx) = var1;
+
+ new_edx++;
+ new_eax++;
+ var0C--;
+ if (var0C <= 0)
+ break;
+ } while (new_edx < var28);
+ var08 = new_edx;
+ }
+ } else {
+ /* loc_4864C8 */
+ new_eax = (((var1 & 0xff) >> 4)*3) << 9;
+ new_ecx = new_eax + new_ebx + 0xe6c;
+ var0C_ecxcopy = new_ecx;
+ if (loc_esi >= 4)
+ {
+ /* loc_4864e8 */
+ if (loc_esi >= 10)
+ loc_esi -= 6;
+ else
+ loc_esi -= 3;
+
+ } else {
+ /* loc_4864e4 */
+ loc_esi = 0;
+ }
+
+ if (var20 == 0) {
+ /* loc_48651D */
+ new_eax = 1;
+ do {
+ /* loc_486525 */
+ /*new_ecx = var0C_ecxcopy;*/
+ new_eax += new_eax;
+ new_ecx += new_eax;
+ var34 = new_eax;
+ if ((new_eax = lzma_486248(&var40, &new_ecx, orgsource, size_sum)) == 0xffffffff)
+ return -1;
+ new_eax |= var34;
+ /* loc_486522 */
+ /* keeping it here instead of at the top
+ * seems to work faster
+ */
+ if (new_eax < 0x100)
+ {
+ new_ecx = var0C_ecxcopy;
+ }
+ } while (new_eax < 0x100);
+ /* loc_48653e */
+ var1 = (uint8_t)(new_eax & 0xff);
+ } else {
+ int t;
+ /* loc_4864FB */
+ new_eax = var08 - loc_edi;
+ if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_eax, 1))
+ return -1;
+ t = *(var18+new_eax);
+ new_eax = (new_eax&0xffffff00) | t;
+
+ var30 = t;
+ if (lzma_48635C (t, &new_ecx, &var40, &new_eax, orgsource, size_sum) == 0xffffffff)
+ return -1;
+ var20 = 0;
+ var1 = new_eax&0xff;
+ }
+
+ /* loc_486541 */
+
+ /* unneeded: new_ecx = (new_ecx&0xffffff00) | var1; */
+
+ /* loc_4865af */
+ new_edx = var08++;
+
+ if (!CLI_ISCONTAINED((uint8_t *)orgsource, size_sum, var18 + new_edx, 1))
+ return -1;
+ *(var18 + new_edx) = var1;
+ }
+ /* loc_4866fe */
+ new_eax = var08;
+ } while (new_eax < var28);
+
+ if (special) {
+ uint32_t loc_ecx;
+ /* let's fix calls */
+ loc_ecx = 0;
+ cli_dbgmsg("MEWlen: %08x ? %08x\n", new_edx, pushed_edx);
+
+ if (!CLI_ISCONTAINED(orgsource, size_sum, pushed_esi, pushed_edx))
+ return -1;
+ do {
+ /* 0xe8, 0xe9 call opcodes */
+ if (pushed_esi[loc_ecx] == '\xe8' || pushed_esi[loc_ecx] == '\xe9')
+ {
+ char *adr = (char *)(pushed_esi + loc_ecx + 1);
+ loc_ecx++;
+
+ cli_writeint32(adr, EC32(CE32((uint32_t)cli_readint32(adr)))-loc_ecx);
+ loc_ecx += 4;
+ } else
+ loc_ecx++;
+ } while (loc_ecx != pushed_edx);
+ return 0; /*pushed_edx;*/
+ }
+ } while (mainloop);
+
+ return 0xbadc0de;
+}
+
+
+/* UPack lzma */
+
+/* compare with 486248 */
+uint32_t lzma_upack_esi_00(struct lzmastate *p, char *old_ecx, char *bb, uint32_t bl)
+{
+ uint32_t loc_eax, ret, loc_edi;
+ loc_eax = p->p1 >> 0xb;
+ if (!CLI_ISCONTAINED(bb, bl, old_ecx, 4) || !CLI_ISCONTAINED(bb, bl, p->p0, 4))
+ {
+ if (!CLI_ISCONTAINED(bb, bl, old_ecx, 4))
+ cli_dbgmsg("contain error! %08x %08x ecx: %08x [%08x]\n", bb, bl, old_ecx,bb+bl);
+ else
+ cli_dbgmsg("contain error! %08x %08x p0: %08x [%08x]\n", bb, bl, p->p0,bb+bl);
+ return 0xffffffff;
+ }
+ ret = cli_readint32(old_ecx);
+ loc_eax *= ret;
+ loc_edi = cli_readint32((char *)p->p0);
+ loc_edi = EC32(CE32(loc_edi)); /* bswap */
+ loc_edi -= p->p2;
+ if (loc_edi < loc_eax)
+ {
+ p->p1 = loc_eax;
+ loc_eax = (0x800 - ret) >> 5;
+ cli_writeint32(old_ecx, cli_readint32(old_ecx) + loc_eax);
+ ret = 0;
+ } else {
+ p->p2 += loc_eax;
+ p->p1 -= loc_eax;
+ loc_eax = ret >> 5;
+ cli_writeint32(old_ecx, cli_readint32(old_ecx) - loc_eax);
+ ret = 1;
+ }
+ if(((p->p1)&0xff000000) == 0)
+ {
+ p->p2 <<= 8;
+ p->p1 <<= 8;
+ p->p0++;
+ }
+ return ret;
+}
+
+/* compare with lzma_4862e0 */
+/* lzma_upack_esi_4c 0x1 as eax!
+ */
+uint32_t lzma_upack_esi_50(struct lzmastate *p, uint32_t old_eax, uint32_t old_ecx, char **old_edx, char *old_ebp, uint32_t *retval, char *bs, uint32_t bl)
+{
+ uint32_t loc_eax = old_eax, original = old_eax, ret;
+
+ do {
+ *old_edx = old_ebp + (loc_eax<<2);
+ if ((ret = lzma_upack_esi_00(p, *old_edx, bs, bl)) == 0xffffffff)
+ return 0xffffffff;
+ loc_eax += loc_eax;
+ loc_eax += ret;
+ } while (loc_eax < old_ecx);
+
+/* cli_dbgmsg("loc_eax: %08x - ecx: %08x = %08x || original: %08x\n", loc_eax, old_ecx, loc_eax - old_ecx, original); */
+ *retval = loc_eax - old_ecx;
+ return 0;
+}
+
+uint32_t lzma_upack_esi_54(struct lzmastate *p, uint32_t old_eax, uint32_t *old_ecx, char **old_edx, uint32_t *retval, char *bs, uint32_t bl)
+{
+ uint32_t ret, loc_eax = old_eax;
+
+ *old_ecx = ((*old_ecx)&0xffffff00)|8;
+ ret = lzma_upack_esi_00 (p, *old_edx, bs, bl);
+ *old_edx = ((*old_edx) + 4);
+ loc_eax = (loc_eax&0xffffff00)|1;
+ if (ret)
+ {
+ ret = lzma_upack_esi_00 (p, *old_edx, bs, bl);
+ loc_eax |= 8; /* mov al, 9 */
+ if (ret)
+ {
+ *old_ecx <<= 5;
+ loc_eax = 0x11; /* mov al, 11 */
+ }
+ }
+ ret = loc_eax;
+ if (lzma_upack_esi_50(p, 1, *old_ecx, old_edx, *old_edx + (loc_eax << 2), &loc_eax, bs, bl) == 0xffffffff)
+ return 0xffffffff;
+
+ *retval = ret + loc_eax;
+ return 0;
+}
+
+
+int unmew11(int sectnum, char *src, int off, int ssize, int dsize, uint32_t base, uint32_t vadd, int uselzma, char **endsrc, char **enddst, int filedesc)
+{
+ uint32_t entry_point, newedi, loc_ds=dsize, loc_ss=ssize;
+ char *source = src + dsize + off; /*EC32(section_hdr[sectnum].VirtualSize) + off;*/
+ char *lesi = source + 12, *ledi;
+ char *f1, *f2;
+ int i;
+ struct cli_exe_section *section = NULL;
+ uint32_t vma = base + vadd, size_sum = ssize + dsize;
+
+ entry_point = cli_readint32(source + 4);
+ newedi = cli_readint32(source + 8);
+ ledi = src + (newedi - vma);
+
+ i = 0;
+ ssize -= 12;
+ while (1)
+ {
+ cli_dbgmsg("MEW unpacking section %d (%08x->%08x)\n", i, lesi, ledi);
+ if (!CLI_ISCONTAINED(src, size_sum, lesi, 4) || !CLI_ISCONTAINED(src, size_sum, ledi, 4))
+ {
+ cli_dbgmsg("Possibly programmer error or hand-crafted PE file, report to clamav team\n");
+ return -1;
+ }
+ if (unmew(lesi, ledi, loc_ss, loc_ds, &f1, &f2))
+ {
+ free(section);
+ return -1;
+ }
+
+ /* we don't need last section in sections since this is information for fixing imptbl */
+ if (!CLI_ISCONTAINED(src, size_sum, f1, 4))
+ {
+ free(section);
+ return -1;
+ }
+
+ /* XXX */
+ loc_ss -= (f1+4-lesi);
+ loc_ds -= (f2-ledi);
+ ledi = src + (cli_readint32(f1) - vma);
+ lesi = f1+4;
+
+ if (!uselzma)
+ {
+ uint32_t val = PESALIGN(f2 - src, 0x1000);
+ void *newsect;
+
+ if (i && val < section[i].raw) {
+ cli_dbgmsg("MEW: WTF - please report\n");
+ free(section);
+ return -1;
+ }
+
+ if (!(newsect=cli_realloc(section, (i+2)*sizeof(struct cli_exe_section)))) {
+ cli_dbgmsg("MEW: Out of memory\n");
+ free(section);
+ return -1;
+ }
+
+ section = (struct cli_exe_section *)newsect;
+ section[0].raw = 0;
+ section[0].rva = vadd;
+ section[i+1].raw = val;
+ section[i+1].rva = val + vadd;
+ section[i].rsz = section[i].vsz = ((i)?(val - section[i].raw):val);
+ }
+ i++;
+
+ if (!cli_readint32(f1))
+ break;
+ }
+
+ /* LZMA stuff */
+ if (uselzma) {
+ free(section);
+
+ /* put everything in one section */
+ i = 1;
+ if (!CLI_ISCONTAINED(src, size_sum, src+uselzma+8, 1))
+ {
+ cli_dbgmsg("MEW: couldn't access lzma 'special' tag\n");
+ return -1;
+ }
+ /* 0x50 -> push eax */
+ cli_dbgmsg("MEW: lzma %swas used, unpacking\n", (*(src + uselzma+8) == '\x50')?"special ":"");
+ if (!CLI_ISCONTAINED(src, size_sum, f1+4, 20 + 4 + 5))
+ {
+ cli_dbgmsg("MEW: lzma initialization data not available!\n");
+ return -1;
+ }
+
+ if(mew_lzma(src, f1+4, size_sum, vma, *(src + uselzma+8) == '\x50'))
+ {
+ return -1;
+ }
+ loc_ds=PESALIGN(loc_ds, 0x1000);
+
+ section = cli_calloc(1, sizeof(struct cli_exe_section));
+ if(!section) {
+ cli_dbgmsg("MEW: Out of memory\n");
+ return -1;
+ }
+
+ section[0].raw = 0; section[0].rva = vadd;
+ section[0].rsz = section[0].vsz = dsize;
+ }
+ if (!cli_rebuildpe(src, section, i, base, entry_point - base, 0, 0, filedesc))
+ {
+ cli_dbgmsg("MEW: Rebuilding failed\n");
+ free(section);
+ return -1;
+ }
+ free(section);
+ return 1;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_msexpand.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,146 @@
+/*
+ * msexpand: Microsoft "compress.exe/expand.exe" compatible decompressor
+ *
+ * Copyright (c) 2000 Martin Hinner <mhi at penguin.cz>
+ * Algorithm & data structures by M. Winterhoff <100326.2776 at compuserve.com>
+ *
+ * Corrected and adapted to ClamAV by Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <string.h>
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+#include "cltypes.h"
+#include "others.h"
+#include "msexpand.h"
+
+int cli_msexpand(FILE *in, FILE *out)
+{
+ int bits, ch, i, j, len, mask;
+ unsigned char *buffer;
+ uint32_t magic1, magic2, magic3, filesize;
+ uint16_t reserved;
+
+
+ if(fread(&magic1, sizeof(magic1), 1, in) != 1) {
+ return -1;
+ }
+
+ if(magic1 == le32_to_host(0x44445A53L))
+ {
+ if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
+ return -1;
+ }
+
+ if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
+ return -1;
+ }
+
+ if(fread(&filesize, sizeof(filesize), 1, in) != 1) {
+ return -1;
+ }
+
+ if(magic2 != le32_to_host(0x3327F088L))
+ {
+ cli_warnmsg("msexpand: Not a MS-compressed file\n");
+ return -1;
+ }
+
+ } else
+ if(magic1 == le32_to_host(0x4A41574BL))
+ {
+ if(fread(&magic2, sizeof(magic2), 1, in) != 1) {
+ return -1;
+ }
+
+ if(fread(&magic3, sizeof(magic3), 1, in) != 1) {
+ return -1;
+ }
+
+ if(fread(&reserved, sizeof(reserved), 1, in) != 1) {
+ return -1;
+ }
+
+ if(magic2 != le32_to_host(0xD127F088L) || magic3 != le32_to_host(0x00120003L))
+ {
+ cli_warnmsg("msexpand: Not a MS-compressed file\n");
+ return -1;
+ }
+
+ cli_warnmsg("msexpand: unsupported version 6.22\n");
+ return -1;
+
+ } else {
+ cli_warnmsg("msexpand: Not a MS-compressed file\n");
+ return -1;
+ }
+
+ if((buffer = (unsigned char *) cli_calloc(4096, sizeof(char))) == NULL) {
+ cli_errmsg("msexpand: Can't allocate memory\n");
+ return -1;
+ }
+
+ i = 4096 - 16;
+
+ while (1) {
+ if((bits = fgetc(in)) == EOF)
+ break;
+
+ for(mask = 0x01; mask & 0xFF; mask <<= 1) {
+ if(!(bits & mask)) {
+ if((j = fgetc(in)) == EOF)
+ break;
+ len = fgetc(in);
+ j += (len & 0xF0) << 4;
+ len = (len & 15) + 3;
+ while(len--) {
+ buffer[i] = buffer[j];
+ if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
+ free(buffer);
+ return -1;
+ }
+ j++;
+ j %= 4096;
+ i++;
+ i %= 4096;
+ }
+ } else {
+ if((ch = fgetc(in)) == EOF)
+ break;
+
+ buffer[i] = ch;
+ if(fwrite(&buffer[i], sizeof(unsigned char), 1, out) != 1) {
+ free(buffer);
+ return -1;
+ }
+ i++;
+ i %= 4096;
+ }
+ }
+ }
+
+ free(buffer);
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_mspack.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1959 @@
+/*
+ * This file includes code from libmspack adapted for libclamav by
+ * tkojm at clamav.net
+ *
+ * Copyright (C) 2003-2004 Stuart Caie
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "others.h"
+#include "clamav.h"
+#include "mspack.h"
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+#ifndef CHAR_BIT
+# define CHAR_BIT (8)
+#endif
+
+
+/***************************************************************************
+ * MS-ZIP decompression implementation
+ ***************************************************************************
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ * The deflate method was created by Phil Katz. MSZIP is equivalent to the
+ * deflate method.
+ *
+ */
+
+/* match lengths for literal codes 257.. 285 */
+static const unsigned short mszip_lit_lengths[29] = {
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27,
+ 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258
+};
+
+/* match offsets for distance codes 0 .. 29 */
+static const unsigned short mszip_dist_offsets[30] = {
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385,
+ 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577
+};
+
+/* extra bits required for literal codes 257.. 285 */
+static const unsigned char mszip_lit_extrabits[29] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2,
+ 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0
+};
+
+/* extra bits required for distance codes 0 .. 29 */
+static const unsigned char mszip_dist_extrabits[30] = {
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+ 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13
+};
+
+/* the order of the bit length Huffman code lengths */
+static const unsigned char mszip_bitlen_order[19] = {
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+
+/* ANDing with mszip_bit_mask[n] masks the lower n bits */
+static const unsigned short mszip_bit_mask_tab[17] = {
+ 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+#define MSZIP_STORE_BITS do { \
+ zip->i_ptr = i_ptr; \
+ zip->i_end = i_end; \
+ zip->bit_buffer = bit_buffer; \
+ zip->bits_left = bits_left; \
+} while (0)
+
+#define MSZIP_RESTORE_BITS do { \
+ i_ptr = zip->i_ptr; \
+ i_end = zip->i_end; \
+ bit_buffer = zip->bit_buffer; \
+ bits_left = zip->bits_left; \
+} while (0)
+
+#define MSZIP_ENSURE_BITS(nbits) do { \
+ while (bits_left < (nbits)) { \
+ if (i_ptr >= i_end) { \
+ if (mszip_read_input(zip)) return zip->error; \
+ i_ptr = zip->i_ptr; \
+ i_end = zip->i_end; \
+ } \
+ bit_buffer |= *i_ptr++ << bits_left; bits_left += 8; \
+ } \
+} while (0)
+
+#define MSZIP_PEEK_BITS(nbits) (bit_buffer & ((1<<(nbits))-1))
+#define MSZIP_PEEK_BITS_T(nbits) (bit_buffer & mszip_bit_mask_tab[(nbits)])
+
+#define MSZIP_REMOVE_BITS(nbits) ((bit_buffer >>= (nbits)), (bits_left -= (nbits)))
+
+#define MSZIP_READ_BITS(val, nbits) do { \
+ MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS(nbits); MSZIP_REMOVE_BITS(nbits); \
+} while (0)
+
+#define MSZIP_READ_BITS_T(val, nbits) do { \
+ MSZIP_ENSURE_BITS(nbits); (val) = MSZIP_PEEK_BITS_T(nbits); MSZIP_REMOVE_BITS(nbits); \
+} while (0)
+
+static int mszip_read_input(struct mszip_stream *zip) {
+ int read = zip->read ? zip->read(zip->file, zip->inbuf, (int)zip->inbuf_size) : cli_readn(zip->fd, zip->inbuf, (int)zip->inbuf_size);
+ if (read < 0) return zip->error = CL_EIO;
+
+ if (read == 0) {
+ if (zip->input_end) {
+ cli_dbgmsg("mszip_read_input: out of input bytes\n");
+ return zip->error = CL_EIO;
+ }
+ else {
+ read = 1;
+ zip->inbuf[0] = 0;
+ zip->input_end = 1;
+ }
+ }
+
+ zip->i_ptr = &zip->inbuf[0];
+ zip->i_end = &zip->inbuf[read];
+
+ return CL_SUCCESS;
+}
+
+/* inflate() error codes */
+#define INF_ERR_BLOCKTYPE (-1) /* unknown block type */
+#define INF_ERR_COMPLEMENT (-2) /* block size complement mismatch */
+#define INF_ERR_FLUSH (-3) /* error from flush_window() callback */
+#define INF_ERR_BITBUF (-4) /* too many bits in bit buffer */
+#define INF_ERR_SYMLENS (-5) /* too many symbols in blocktype 2 header */
+#define INF_ERR_BITLENTBL (-6) /* failed to build bitlens huffman table */
+#define INF_ERR_LITERALTBL (-7) /* failed to build literals huffman table */
+#define INF_ERR_DISTANCETBL (-8) /* failed to build distance huffman table */
+#define INF_ERR_BITOVERRUN (-9) /* bitlen RLE code goes over table size */
+#define INF_ERR_BADBITLEN (-10) /* invalid bit-length code */
+#define INF_ERR_LITCODE (-11) /* out-of-range literal code */
+#define INF_ERR_DISTCODE (-12) /* out-of-range distance code */
+#define INF_ERR_DISTANCE (-13) /* somehow, distance is beyond 32k */
+#define INF_ERR_HUFFSYM (-14) /* out of bits decoding huffman symbol */
+
+/* mszip_make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table out of just a canonical huffman code lengths table.
+ *
+ * NOTE: this is NOT identical to the mszip_make_decode_table() in lzxd.c. This
+ * one reverses the quick-lookup bit pattern. Bits are read MSB to LSB in LZX,
+ * but LSB to MSB in MSZIP.
+ *
+ * nsyms = total number of symbols in this huffman tree.
+ * nbits = any symbols with a code length of nbits or less can be decoded
+ * in one lookup of the table.
+ * length = A table to get code lengths from [0 to nsyms-1]
+ * table = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+static int mszip_make_decode_table(unsigned int nsyms, unsigned int nbits,
+ unsigned char *length, unsigned short *table)
+{
+ register unsigned int leaf, reverse, fill;
+ register unsigned short sym, next_sym;
+ register unsigned char bit_num;
+ unsigned int pos = 0; /* the current position in the decode table */
+ unsigned int table_mask = 1 << nbits;
+ unsigned int mszip_bit_mask = table_mask >> 1; /* don't do 0 length codes */
+
+ /* fill entries for codes short enough for a direct mapping */
+ for (bit_num = 1; bit_num <= nbits; bit_num++) {
+ for (sym = 0; sym < nsyms; sym++) {
+ if (length[sym] != bit_num) continue;
+
+ /* reverse the significant bits */
+ fill = length[sym]; reverse = pos >> (nbits - fill); leaf = 0;
+ do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill);
+
+ if((pos += mszip_bit_mask) > table_mask) return 1; /* table overrun */
+
+ /* fill all possible lookups of this symbol with the symbol itself */
+ fill = mszip_bit_mask; next_sym = 1 << bit_num;
+ do { table[leaf] = sym; leaf += next_sym; } while (--fill);
+ }
+ mszip_bit_mask >>= 1;
+ }
+
+ /* exit with success if table is now complete */
+ if (pos == table_mask) return 0;
+
+ /* mark all remaining table entries as unused */
+ for (sym = pos; sym < table_mask; sym++) {
+ reverse = sym; leaf = 0; fill = nbits;
+ do { leaf <<= 1; leaf |= reverse & 1; reverse >>= 1; } while (--fill);
+ table[leaf] = 0xFFFF;
+ }
+
+ /* where should the longer codes be allocated from? */
+ next_sym = ((table_mask >> 1) < nsyms) ? nsyms : (table_mask >> 1);
+
+ /* give ourselves room for codes to grow by up to 16 more bits.
+ * codes now start at bit nbits+16 and end at (nbits+16-codelength) */
+ pos <<= 16;
+ table_mask <<= 16;
+ mszip_bit_mask = 1 << 15;
+
+ for (bit_num = nbits+1; bit_num <= MSZIP_MAX_HUFFBITS; bit_num++) {
+ for (sym = 0; sym < nsyms; sym++) {
+ if (length[sym] != bit_num) continue;
+
+ /* leaf = the first nbits of the code, reversed */
+ reverse = pos >> 16; leaf = 0; fill = nbits;
+ do {leaf <<= 1; leaf |= reverse & 1; reverse >>= 1;} while (--fill);
+
+ for (fill = 0; fill < (bit_num - nbits); fill++) {
+ /* if this path hasn't been taken yet, 'allocate' two entries */
+ if (table[leaf] == 0xFFFF) {
+ table[(next_sym << 1) ] = 0xFFFF;
+ table[(next_sym << 1) + 1 ] = 0xFFFF;
+ table[leaf] = next_sym++;
+ }
+ /* follow the path and select either left or right for next bit */
+ leaf = (table[leaf] << 1) | ((pos >> (15 - fill)) & 1);
+ }
+ table[leaf] = sym;
+
+ if ((pos += mszip_bit_mask) > table_mask) return 1; /* table overflow */
+ }
+ mszip_bit_mask >>= 1;
+ }
+
+ /* full table? */
+ return (pos != table_mask) ? 1 : 0;
+}
+
+/* MSZIP_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define MSZIP_READ_HUFFSYM(tbl, var) do { \
+ /* huffman symbols can be up to 16 bits long */ \
+ MSZIP_ENSURE_BITS(MSZIP_MAX_HUFFBITS); \
+ /* immediate table lookup of [tablebits] bits of the code */ \
+ sym = zip->tbl##_table[MSZIP_PEEK_BITS(MSZIP_##tbl##_TABLEBITS)]; \
+ /* is the symbol is longer than [tablebits] bits? (i=node index) */ \
+ if (sym >= MSZIP_##tbl##_MAXSYMBOLS) { \
+ /* decode remaining bits by tree traversal */ \
+ i = MSZIP_##tbl##_TABLEBITS - 1; \
+ do { \
+ /* check next bit. error if we run out of bits before decode */ \
+ if (i++ > MSZIP_MAX_HUFFBITS) { \
+ cli_dbgmsg("zip_inflate: out of bits in huffman decode\n"); \
+ return INF_ERR_HUFFSYM; \
+ } \
+ sym = (sym << 1) | ((bit_buffer >> i) & 1); \
+ if(sym >= MSZIP_##tbl##_TABLESIZE) { \
+ cli_dbgmsg("zip_inflate: index out of table\n"); \
+ return INF_ERR_HUFFSYM; \
+ } \
+ /* double node index and add 0 (left branch) or 1 (right) */ \
+ sym = zip->tbl##_table[sym]; \
+ /* while we are still in node indicies, not decoded symbols */ \
+ } while (sym >= MSZIP_##tbl##_MAXSYMBOLS); \
+ } \
+ /* result */ \
+ (var) = sym; \
+ /* look up the code length of that symbol and discard those bits */ \
+ i = zip->tbl##_len[sym]; \
+ MSZIP_REMOVE_BITS(i); \
+} while (0)
+
+static int mszip_read_lens(struct mszip_stream *zip) {
+ /* for the bit buffer and huffman decoding */
+ register unsigned int bit_buffer;
+ register int bits_left;
+ unsigned char *i_ptr, *i_end;
+
+ /* bitlen Huffman codes -- immediate lookup, 7 bit max code length */
+ unsigned short bl_table[(1 << 7)];
+ unsigned char bl_len[19];
+
+ unsigned char lens[MSZIP_LITERAL_MAXSYMBOLS + MSZIP_DISTANCE_MAXSYMBOLS];
+ unsigned int lit_codes, dist_codes, code, last_code=0, bitlen_codes, i, run;
+
+ MSZIP_RESTORE_BITS;
+
+ /* read the number of codes */
+ MSZIP_READ_BITS(lit_codes, 5); lit_codes += 257;
+ MSZIP_READ_BITS(dist_codes, 5); dist_codes += 1;
+ MSZIP_READ_BITS(bitlen_codes, 4); bitlen_codes += 4;
+ if (lit_codes > MSZIP_LITERAL_MAXSYMBOLS) return INF_ERR_SYMLENS;
+ if (dist_codes > MSZIP_DISTANCE_MAXSYMBOLS) return INF_ERR_SYMLENS;
+
+ /* read in the bit lengths in their unusual order */
+ for (i = 0; i < bitlen_codes; i++) MSZIP_READ_BITS(bl_len[mszip_bitlen_order[i]], 3);
+ while (i < 19) bl_len[mszip_bitlen_order[i++]] = 0;
+
+ /* create decoding table with an immediate lookup */
+ if (mszip_make_decode_table(19, 7, &bl_len[0], &bl_table[0])) {
+ return INF_ERR_BITLENTBL;
+ }
+
+ /* read literal / distance code lengths */
+ for (i = 0; i < (lit_codes + dist_codes); i++) {
+ /* single-level huffman lookup */
+ MSZIP_ENSURE_BITS(7);
+ code = bl_table[MSZIP_PEEK_BITS(7)];
+ MSZIP_REMOVE_BITS(bl_len[code]);
+
+ if (code < 16) lens[i] = last_code = code;
+ else {
+ switch (code) {
+ case 16: MSZIP_READ_BITS(run, 2); run += 3; code = last_code; break;
+ case 17: MSZIP_READ_BITS(run, 3); run += 3; code = 0; break;
+ case 18: MSZIP_READ_BITS(run, 7); run += 11; code = 0; break;
+ default: cli_dbgmsg("zip_read_lens: bad code!: %u\n", code); return INF_ERR_BADBITLEN;
+ }
+ if ((i + run) > (lit_codes + dist_codes)) return INF_ERR_BITOVERRUN;
+ while (run--) lens[i++] = code;
+ i--;
+ }
+ }
+
+ /* copy LITERAL code lengths and clear any remaining */
+ i = lit_codes;
+ memcpy(&zip->LITERAL_len[0], &lens[0], i);
+ while (i < MSZIP_LITERAL_MAXSYMBOLS) zip->LITERAL_len[i++] = 0;
+
+ i = dist_codes;
+ memcpy(&zip->DISTANCE_len[0], &lens[lit_codes], i);
+ while (i < MSZIP_DISTANCE_MAXSYMBOLS) zip->DISTANCE_len[i++] = 0;
+
+ MSZIP_STORE_BITS;
+ return 0;
+}
+
+/* a clean implementation of RFC 1951 / inflate */
+static int mszip_inflate(struct mszip_stream *zip) {
+ unsigned int last_block, block_type, distance, length, this_run, i;
+
+ /* for the bit buffer and huffman decoding */
+ register unsigned int bit_buffer;
+ register int bits_left;
+ register unsigned short sym;
+ unsigned char *i_ptr, *i_end;
+
+ MSZIP_RESTORE_BITS;
+
+ do {
+ /* read in last block bit */
+ MSZIP_READ_BITS(last_block, 1);
+
+ /* read in block type */
+ MSZIP_READ_BITS(block_type, 2);
+
+ if (block_type == 0) {
+ /* uncompressed block */
+ unsigned char lens_buf[4];
+
+ /* go to byte boundary */
+ i = bits_left & 7; MSZIP_REMOVE_BITS(i);
+
+ /* read 4 bytes of data, emptying the bit-buffer if necessary */
+ for (i = 0; (bits_left >= 8); i++) {
+ if (i == 4) return INF_ERR_BITBUF;
+ lens_buf[i] = MSZIP_PEEK_BITS(8);
+ MSZIP_REMOVE_BITS(8);
+ }
+ if (bits_left != 0) return INF_ERR_BITBUF;
+ while (i < 4) {
+ if (i_ptr >= i_end) {
+ if (mszip_read_input(zip)) return zip->error;
+ i_ptr = zip->i_ptr;
+ i_end = zip->i_end;
+ }
+ lens_buf[i++] = *i_ptr++;
+ }
+
+ /* get the length and its complement */
+ length = lens_buf[0] | (lens_buf[1] << 8);
+ i = lens_buf[2] | (lens_buf[3] << 8);
+ if (length != (~i & 0xFFFF)) return INF_ERR_COMPLEMENT;
+
+ /* read and copy the uncompressed data into the window */
+ while (length > 0) {
+ if (i_ptr >= i_end) {
+ if (mszip_read_input(zip)) return zip->error;
+ i_ptr = zip->i_ptr;
+ i_end = zip->i_end;
+ }
+
+ this_run = length;
+ if (this_run > (unsigned int)(i_end - i_ptr)) this_run = i_end - i_ptr;
+ if (this_run > (MSZIP_FRAME_SIZE - zip->window_posn))
+ this_run = MSZIP_FRAME_SIZE - zip->window_posn;
+
+ memcpy(&zip->window[zip->window_posn], i_ptr, this_run);
+ zip->window_posn += this_run;
+ i_ptr += this_run;
+ length -= this_run;
+
+ if (zip->window_posn == MSZIP_FRAME_SIZE) {
+ if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH;
+ zip->window_posn = 0;
+ }
+ }
+ }
+ else if ((block_type == 1) || (block_type == 2)) {
+ /* Huffman-compressed LZ77 block */
+ unsigned int window_posn, match_posn, code;
+
+ if (block_type == 1) {
+ /* block with fixed Huffman codes */
+ i = 0;
+ while (i < 144) zip->LITERAL_len[i++] = 8;
+ while (i < 256) zip->LITERAL_len[i++] = 9;
+ while (i < 280) zip->LITERAL_len[i++] = 7;
+ while (i < 288) zip->LITERAL_len[i++] = 8;
+ for (i = 0; i < 32; i++) zip->DISTANCE_len[i] = 5;
+ }
+ else {
+ /* block with dynamic Huffman codes */
+ MSZIP_STORE_BITS;
+ if ((i = mszip_read_lens(zip))) return i;
+ MSZIP_RESTORE_BITS;
+ }
+
+ /* now huffman lengths are read for either kind of block,
+ * create huffman decoding tables */
+ if (mszip_make_decode_table(MSZIP_LITERAL_MAXSYMBOLS, MSZIP_LITERAL_TABLEBITS,
+ &zip->LITERAL_len[0], &zip->LITERAL_table[0]))
+ {
+ return INF_ERR_LITERALTBL;
+ }
+
+ if (mszip_make_decode_table(MSZIP_DISTANCE_MAXSYMBOLS,MSZIP_DISTANCE_TABLEBITS,
+ &zip->DISTANCE_len[0], &zip->DISTANCE_table[0]))
+ {
+ return INF_ERR_DISTANCETBL;
+ }
+
+ /* decode forever until end of block code */
+ window_posn = zip->window_posn;
+ while (1) {
+ MSZIP_READ_HUFFSYM(LITERAL, code);
+ if (code < 256) {
+ zip->window[window_posn++] = (unsigned char) code;
+ if (window_posn == MSZIP_FRAME_SIZE) {
+ if (zip->flush_window(zip, MSZIP_FRAME_SIZE)) return INF_ERR_FLUSH;
+ window_posn = 0;
+ }
+ }
+ else if (code == 256) {
+ /* END OF BLOCK CODE: loop break point */
+ break;
+ }
+ else {
+ code -= 257;
+ if (code > 29) return INF_ERR_LITCODE;
+ MSZIP_READ_BITS_T(length, mszip_lit_extrabits[code]);
+ length += mszip_lit_lengths[code];
+
+ MSZIP_READ_HUFFSYM(DISTANCE, code);
+ if (code > 30) return INF_ERR_DISTCODE;
+ MSZIP_READ_BITS_T(distance, mszip_dist_extrabits[code]);
+ distance += mszip_dist_offsets[code];
+
+ /* match position is window position minus distance. If distance
+ * is more than window position numerically, it must 'wrap
+ * around' the frame size. */
+ match_posn = ((distance > window_posn) ? MSZIP_FRAME_SIZE : 0)
+ + window_posn - distance;
+
+ /* copy match */
+ if (length < 12) {
+ /* short match, use slower loop but no loop setup code */
+ while (length--) {
+ zip->window[window_posn++] = zip->window[match_posn++];
+ match_posn &= MSZIP_FRAME_SIZE - 1;
+
+ if (window_posn == MSZIP_FRAME_SIZE) {
+ if (zip->flush_window(zip, MSZIP_FRAME_SIZE))
+ return INF_ERR_FLUSH;
+ window_posn = 0;
+ }
+ }
+ }
+ else {
+ /* longer match, use faster loop but with setup expense */
+ unsigned char *runsrc, *rundest;
+ do {
+ this_run = length;
+ if ((match_posn + this_run) > MSZIP_FRAME_SIZE)
+ this_run = MSZIP_FRAME_SIZE - match_posn;
+ if ((window_posn + this_run) > MSZIP_FRAME_SIZE)
+ this_run = MSZIP_FRAME_SIZE - window_posn;
+
+ rundest = &zip->window[window_posn]; window_posn += this_run;
+ runsrc = &zip->window[match_posn]; match_posn += this_run;
+ length -= this_run;
+ while (this_run--) *rundest++ = *runsrc++;
+
+ /* flush if necessary */
+ if (window_posn == MSZIP_FRAME_SIZE) {
+ if (zip->flush_window(zip, MSZIP_FRAME_SIZE))
+ return INF_ERR_FLUSH;
+ window_posn = 0;
+ }
+ if (match_posn == MSZIP_FRAME_SIZE) match_posn = 0;
+ } while (length > 0);
+ }
+
+ } /* else (code >= 257) */
+
+ } /* while (forever) -- break point at 'code == 256' */
+ zip->window_posn = window_posn;
+ }
+ else {
+ /* block_type == 3 -- bad block type */
+ return INF_ERR_BLOCKTYPE;
+ }
+ } while (!last_block);
+
+ /* flush the remaining data */
+ if (zip->window_posn) {
+ if (zip->flush_window(zip, zip->window_posn)) return INF_ERR_FLUSH;
+ }
+ MSZIP_STORE_BITS;
+
+ /* return success */
+ return 0;
+}
+
+/* inflate() calls this whenever the window should be flushed. As
+ * MSZIP only expands to the size of the window, the implementation used
+ * simply keeps track of the amount of data flushed, and if more than 32k
+ * is flushed, an error is raised.
+ */
+static int mszip_flush_window(struct mszip_stream *zip,
+ unsigned int data_flushed)
+{
+ zip->bytes_output += data_flushed;
+ if (zip->bytes_output > MSZIP_FRAME_SIZE) {
+ cli_dbgmsg("mszip_flush_window: overflow: %u bytes flushed, total is now %u\n", data_flushed, zip->bytes_output);
+ return 1;
+ }
+ return 0;
+}
+
+struct mszip_stream *mszip_init(int fd,
+ int ofd,
+ int input_buffer_size,
+ int repair_mode,
+ struct cab_file *file,
+ int (*read)(struct cab_file *, unsigned char *, int))
+{
+ struct mszip_stream *zip;
+
+ input_buffer_size = (input_buffer_size + 1) & -2;
+ if (!input_buffer_size) return NULL;
+
+ /* allocate decompression state */
+ if (!(zip = cli_malloc(sizeof(struct mszip_stream)))) {
+ return NULL;
+ }
+
+ /* allocate input buffer */
+ zip->inbuf = cli_malloc((size_t) input_buffer_size);
+ if (!zip->inbuf) {
+ free(zip);
+ return NULL;
+ }
+
+ /* initialise decompression state */
+ zip->fd = fd;
+ zip->ofd = ofd;
+ zip->wflag = 1;
+ zip->inbuf_size = input_buffer_size;
+ zip->error = CL_SUCCESS;
+ zip->repair_mode = repair_mode;
+ zip->flush_window = &mszip_flush_window;
+ zip->input_end = 0;
+
+ zip->i_ptr = zip->i_end = &zip->inbuf[0];
+ zip->o_ptr = zip->o_end = NULL;
+ zip->bit_buffer = 0; zip->bits_left = 0;
+
+ zip->file = file;
+ zip->read = read;
+
+ return zip;
+}
+
+int mszip_decompress(struct mszip_stream *zip, off_t out_bytes) {
+ /* for the bit buffer */
+ register unsigned int bit_buffer;
+ register int bits_left;
+ unsigned char *i_ptr, *i_end;
+
+ int i, state, error;
+
+ /* easy answers */
+ if (!zip || (out_bytes < 0)) return CL_ENULLARG;
+ if (zip->error) return zip->error;
+
+ /* flush out any stored-up bytes before we begin */
+ i = zip->o_end - zip->o_ptr;
+ if ((off_t) i > out_bytes) i = (int) out_bytes;
+ if (i) {
+ if (zip->wflag && cli_writen(zip->ofd, zip->o_ptr, i) != i) {
+ return zip->error = CL_EIO;
+ }
+ zip->o_ptr += i;
+ out_bytes -= i;
+ }
+ if (out_bytes == 0) return CL_SUCCESS;
+
+ while (out_bytes > 0) {
+ /* unpack another block */
+ MSZIP_RESTORE_BITS;
+
+ /* skip to next read 'CK' header */
+ i = bits_left & 7; MSZIP_REMOVE_BITS(i); /* align to bytestream */
+ state = 0;
+ do {
+ MSZIP_READ_BITS(i, 8);
+ if (i == 'C') state = 1;
+ else if ((state == 1) && (i == 'K')) state = 2;
+ else state = 0;
+ } while (state != 2);
+
+ /* inflate a block, repair and realign if necessary */
+ zip->window_posn = 0;
+ zip->bytes_output = 0;
+ MSZIP_STORE_BITS;
+ if ((error = mszip_inflate(zip))) {
+ cli_dbgmsg("mszip_decompress: inflate error %d\n", error);
+ if (zip->repair_mode) {
+ cli_dbgmsg("mszip_decompress: MSZIP error, %u bytes of data lost\n",
+ MSZIP_FRAME_SIZE - zip->bytes_output);
+ for (i = zip->bytes_output; i < MSZIP_FRAME_SIZE; i++) {
+ zip->window[i] = '\0';
+ }
+ zip->bytes_output = MSZIP_FRAME_SIZE;
+ }
+ else {
+ return zip->error = (error > 0) ? error : CL_EFORMAT;
+ }
+ }
+ zip->o_ptr = &zip->window[0];
+ zip->o_end = &zip->o_ptr[zip->bytes_output];
+
+ /* write a frame */
+ i = (out_bytes < (off_t)zip->bytes_output) ?
+ (int)out_bytes : zip->bytes_output;
+ if (zip->wflag && cli_writen(zip->ofd, zip->o_ptr, i) != i) {
+ return zip->error = CL_EIO;
+ }
+
+ /* mspack errors (i.e. read errors) are fatal and can't be recovered */
+ if ((error > 0) && zip->repair_mode) return error;
+
+ zip->o_ptr += i;
+ out_bytes -= i;
+ }
+
+ if (out_bytes) {
+ cli_dbgmsg("mszip_decompress: bytes left to output\n");
+ return zip->error = CL_EFORMAT;
+ }
+ return CL_SUCCESS;
+}
+
+void mszip_free(struct mszip_stream *zip) {
+ if (zip) {
+ free(zip->inbuf);
+ free(zip);
+ }
+}
+
+/***************************************************************************
+ * LZX decompression implementation
+ ***************************************************************************
+ * The LZX method was created by Jonathan Forbes and Tomi Poutanen, adapted
+ * by Microsoft Corporation.
+ *
+ */
+
+/* LZX decompressor input macros
+ *
+ * LZX_STORE_BITS stores bitstream state in lzx_stream structure
+ * LZX_RESTORE_BITS restores bitstream state from lzx_stream structure
+ * LZX_READ_BITS(var,n) takes N bits from the buffer and puts them in var
+ * LZX_ENSURE_BITS(n) ensures there are at least N bits in the bit buffer.
+ * LZX_PEEK_BITS(n) extracts without removing N bits from the bit buffer
+ * LZX_REMOVE_BITS(n) removes N bits from the bit buffer
+ *
+ */
+
+#define LZX_BITBUF_WIDTH (sizeof(bit_buffer) * CHAR_BIT)
+
+#define LZX_STORE_BITS do { \
+ lzx->i_ptr = i_ptr; \
+ lzx->i_end = i_end; \
+ lzx->bit_buffer = bit_buffer; \
+ lzx->bits_left = bits_left; \
+} while (0)
+
+#define LZX_RESTORE_BITS do { \
+ i_ptr = lzx->i_ptr; \
+ i_end = lzx->i_end; \
+ bit_buffer = lzx->bit_buffer; \
+ bits_left = lzx->bits_left; \
+} while (0)
+
+#define LZX_ENSURE_BITS(nbits) \
+ while (bits_left < (nbits)) { \
+ if (i_ptr + 1 >= i_end) { \
+ if (lzx_read_input(lzx)) return lzx->error; \
+ i_ptr = lzx->i_ptr; \
+ i_end = lzx->i_end; \
+ } \
+ bit_buffer |= ((i_ptr[1] << 8) | i_ptr[0]) \
+ << (LZX_BITBUF_WIDTH - 16 - bits_left); \
+ bits_left += 16; \
+ i_ptr += 2; \
+ }
+
+#define LZX_PEEK_BITS(nbits) (bit_buffer >> (LZX_BITBUF_WIDTH - (nbits)))
+
+#define LZX_REMOVE_BITS(nbits) ((bit_buffer <<= (nbits)), (bits_left -= (nbits)))
+
+#define LZX_READ_BITS(val, nbits) do { \
+ LZX_ENSURE_BITS(nbits); \
+ (val) = LZX_PEEK_BITS(nbits); \
+ LZX_REMOVE_BITS(nbits); \
+} while (0)
+
+static int lzx_read_input(struct lzx_stream *lzx) {
+ int bread = lzx->read ? lzx->read(lzx->file, &lzx->inbuf[0], (int)lzx->inbuf_size) : cli_readn(lzx->fd, &lzx->inbuf[0], (int)lzx->inbuf_size);
+ if (bread < 0) return lzx->error = CL_EIO;
+
+ /* huff decode's ENSURE_BYTES(16) might overrun the input stream, even
+ * if those bits aren't used, so fake 2 more bytes */
+ if (bread == 0) {
+ if (lzx->input_end) {
+ cli_dbgmsg("lzx_read_input: out of input bytes\n");
+ return lzx->error = CL_EIO;
+ }
+ else {
+ bread = 2;
+ lzx->inbuf[0] = lzx->inbuf[1] = 0;
+ lzx->input_end = 1;
+ }
+ }
+
+ lzx->i_ptr = &lzx->inbuf[0];
+ lzx->i_end = &lzx->inbuf[bread];
+
+ return CL_SUCCESS;
+}
+
+/* Huffman decoding macros */
+
+/* LZX_READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
+ * bitstream using the stated table and puts it in var.
+ */
+#define LZX_READ_HUFFSYM(tbl, var) do { \
+ /* huffman symbols can be up to 16 bits long */ \
+ LZX_ENSURE_BITS(16); \
+ /* immediate table lookup of [tablebits] bits of the code */ \
+ sym = lzx->tbl##_table[LZX_PEEK_BITS(LZX_##tbl##_TABLEBITS)]; \
+ /* is the symbol is longer than [tablebits] bits? (i=node index) */ \
+ if (sym >= LZX_##tbl##_MAXSYMBOLS) { \
+ /* decode remaining bits by tree traversal */ \
+ i = 1 << (LZX_BITBUF_WIDTH - LZX_##tbl##_TABLEBITS); \
+ do { \
+ /* one less bit. error if we run out of bits before decode */ \
+ i >>= 1; \
+ if (i == 0) { \
+ cli_dbgmsg("lzx: out of bits in huffman decode\n"); \
+ return lzx->error = CL_EFORMAT; \
+ } \
+ /* double node index and add 0 (left branch) or 1 (right) */ \
+ sym <<= 1; sym |= (bit_buffer & i) ? 1 : 0; \
+ /* hop to next node index / decoded symbol */ \
+ if(sym >= (1 << LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS * 2)) { \
+ cli_dbgmsg("lzx: index out of table\n"); \
+ return lzx->error = CL_EFORMAT; \
+ } \
+ sym = lzx->tbl##_table[sym]; \
+ /* while we are still in node indicies, not decoded symbols */ \
+ } while (sym >= LZX_##tbl##_MAXSYMBOLS); \
+ } \
+ /* result */ \
+ (var) = sym; \
+ /* look up the code length of that symbol and discard those bits */ \
+ i = lzx->tbl##_len[sym]; \
+ LZX_REMOVE_BITS(i); \
+} while (0)
+
+/* LZX_BUILD_TABLE(tbl) builds a huffman lookup table from code lengths */
+#define LZX_BUILD_TABLE(tbl) \
+ if (lzx_make_decode_table(LZX_##tbl##_MAXSYMBOLS, LZX_##tbl##_TABLEBITS, \
+ &lzx->tbl##_len[0], &lzx->tbl##_table[0])) \
+ { \
+ cli_dbgmsg("lzx: failed to build %s table\n", #tbl); \
+ return lzx->error = CL_EFORMAT; \
+ }
+
+/* lzx_make_decode_table(nsyms, nbits, length[], table[])
+ *
+ * This function was coded by David Tritscher. It builds a fast huffman
+ * decoding table from a canonical huffman code lengths table.
+ *
+ * nsyms = total number of symbols in this huffman tree.
+ * nbits = any symbols with a code length of nbits or less can be decoded
+ * in one lookup of the table.
+ * length = A table to get code lengths from [0 to syms-1]
+ * table = The table to fill up with decoded symbols and pointers.
+ *
+ * Returns 0 for OK or 1 for error
+ */
+
+static int lzx_make_decode_table(unsigned int nsyms, unsigned int nbits,
+ unsigned char *length, unsigned short *table)
+{
+ register unsigned short sym;
+ register unsigned int leaf, fill;
+ register unsigned char bit_num;
+ unsigned int pos = 0; /* the current position in the decode table */
+ unsigned int table_mask = 1 << nbits;
+ unsigned int bit_mask = table_mask >> 1; /* don't do 0 length codes */
+ unsigned int next_symbol = bit_mask; /* base of allocation for long codes */
+
+ /* fill entries for codes short enough for a direct mapping */
+ for (bit_num = 1; bit_num <= nbits; bit_num++) {
+ for (sym = 0; sym < nsyms; sym++) {
+ if (length[sym] != bit_num) continue;
+ leaf = pos;
+ if((pos += bit_mask) > table_mask) return 1; /* table overrun */
+ /* fill all possible lookups of this symbol with the symbol itself */
+ for (fill = bit_mask; fill-- > 0;) table[leaf++] = sym;
+ }
+ bit_mask >>= 1;
+ }
+
+ /* full table already? */
+ if (pos == table_mask) return 0;
+
+ /* clear the remainder of the table */
+ for (sym = pos; sym < table_mask; sym++) table[sym] = 0xFFFF;
+
+ /* allow codes to be up to nbits+16 long, instead of nbits */
+ pos <<= 16;
+ table_mask <<= 16;
+ bit_mask = 1 << 15;
+
+ for (bit_num = nbits+1; bit_num <= 16; bit_num++) {
+ for (sym = 0; sym < nsyms; sym++) {
+ if (length[sym] != bit_num) continue;
+
+ leaf = pos >> 16;
+ for (fill = 0; fill < bit_num - nbits; fill++) {
+ /* if this path hasn't been taken yet, 'allocate' two entries */
+ if (table[leaf] == 0xFFFF) {
+ table[(next_symbol << 1)] = 0xFFFF;
+ table[(next_symbol << 1) + 1] = 0xFFFF;
+ table[leaf] = next_symbol++;
+ }
+ /* follow the path and select either left or right for next bit */
+ leaf = table[leaf] << 1;
+ if ((pos >> (15-fill)) & 1) leaf++;
+ }
+ table[leaf] = sym;
+
+ if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
+ }
+ bit_mask >>= 1;
+ }
+
+ /* full table? */
+ if (pos == table_mask) return 0;
+
+ /* either erroneous table, or all elements are 0 - let's find out. */
+ for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
+ return 0;
+}
+
+/* LZX_READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
+ * first to last in the given table. The code lengths are stored in their
+ * own special LZX way.
+ */
+#define LZX_READ_LENGTHS(tbl, first, last) do { \
+ LZX_STORE_BITS; \
+ if (lzx_read_lens(lzx, &lzx->tbl##_len[0], (first), \
+ (unsigned int)(last))) return lzx->error; \
+ LZX_RESTORE_BITS; \
+} while (0)
+
+static int lzx_read_lens(struct lzx_stream *lzx, unsigned char *lens,
+ unsigned int first, unsigned int last)
+{
+ /* bit buffer and huffman symbol decode variables */
+ register unsigned int bit_buffer;
+ register int bits_left, i;
+ register unsigned short sym;
+ unsigned char *i_ptr, *i_end;
+
+ unsigned int x, y;
+ int z;
+
+ LZX_RESTORE_BITS;
+
+ /* read lengths for pretree (20 symbols, lengths stored in fixed 4 bits) */
+ for (x = 0; x < 20; x++) {
+ LZX_READ_BITS(y, 4);
+ lzx->PRETREE_len[x] = y;
+ }
+ LZX_BUILD_TABLE(PRETREE);
+
+ for (x = first; x < last; ) {
+ LZX_READ_HUFFSYM(PRETREE, z);
+ if (z == 17) {
+ /* code = 17, run of ([read 4 bits]+4) zeros */
+ LZX_READ_BITS(y, 4); y += 4;
+ while (y--) lens[x++] = 0;
+ }
+ else if (z == 18) {
+ /* code = 18, run of ([read 5 bits]+20) zeros */
+ LZX_READ_BITS(y, 5); y += 20;
+ while (y--) lens[x++] = 0;
+ }
+ else if (z == 19) {
+ /* code = 19, run of ([read 1 bit]+4) [read huffman symbol] */
+ LZX_READ_BITS(y, 1); y += 4;
+ LZX_READ_HUFFSYM(PRETREE, z);
+ z = lens[x] - z; if (z < 0) z += 17;
+ while (y--) lens[x++] = z;
+ }
+ else {
+ /* code = 0 to 16, delta current length entry */
+ z = lens[x] - z; if (z < 0) z += 17;
+ lens[x++] = z;
+ }
+ }
+
+ LZX_STORE_BITS;
+
+ return CL_SUCCESS;
+}
+
+static void lzx_reset_state(struct lzx_stream *lzx) {
+ int i;
+
+ lzx->R0 = 1;
+ lzx->R1 = 1;
+ lzx->R2 = 1;
+ lzx->header_read = 0;
+ lzx->block_remaining = 0;
+ lzx->block_type = LZX_BLOCKTYPE_INVALID;
+
+ /* initialise tables to 0 (because deltas will be applied to them) */
+ for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) lzx->MAINTREE_len[i] = 0;
+ for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) lzx->LENGTH_len[i] = 0;
+}
+
+/*-------- main LZX code --------*/
+
+struct lzx_stream *lzx_init(int fd,
+ int ofd,
+ int window_bits,
+ int reset_interval,
+ int input_buffer_size,
+ off_t output_length,
+ struct cab_file *file,
+ int (*read)(struct cab_file *, unsigned char *, int))
+{
+ unsigned int window_size = 1 << window_bits;
+ struct lzx_stream *lzx;
+ int i, j;
+
+ /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
+ if (window_bits < 15 || window_bits > 21) return NULL;
+
+ input_buffer_size = (input_buffer_size + 1) & -2;
+ if (!input_buffer_size) return NULL;
+
+ /* allocate decompression state */
+ if (!(lzx = cli_calloc(1, sizeof(struct lzx_stream)))) {
+ return NULL;
+ }
+
+ for (i = 0, j = 0; i < 51; i += 2) {
+ lzx->extra_bits[i] = j; /* 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7... */
+ if(i < 50)
+ lzx->extra_bits[i+1] = j;
+ if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
+ }
+
+ for (i = 0, j = 0; i < 51; i++) {
+ lzx->position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
+ j += 1 << lzx->extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
+ }
+
+ /* allocate decompression window and input buffer */
+ lzx->window = cli_calloc(1, (size_t) window_size);
+ if(!lzx->window) {
+ free(lzx);
+ return NULL;
+ }
+
+ lzx->inbuf = cli_calloc(1, (size_t) input_buffer_size);
+ if (!lzx->inbuf) {
+ free(lzx->window);
+ free(lzx);
+ return NULL;
+ }
+
+ /* initialise decompression state */
+ lzx->fd = fd;
+ lzx->ofd = ofd;
+ lzx->wflag = 1;
+ lzx->offset = 0;
+ lzx->length = output_length;
+ lzx->file = file;
+ lzx->read = read;
+
+ lzx->inbuf_size = input_buffer_size;
+ lzx->window_size = 1 << window_bits;
+ lzx->window_posn = 0;
+ lzx->frame_posn = 0;
+ lzx->frame = 0;
+ lzx->reset_interval = reset_interval;
+ lzx->intel_filesize = 0;
+ lzx->intel_curpos = 0;
+
+ /* window bits: 15 16 17 18 19 20 21
+ * position slots: 30 32 34 36 38 42 50 */
+ lzx->posn_slots = ((window_bits == 21) ? 50 :
+ ((window_bits == 20) ? 42 : (window_bits << 1)));
+ lzx->intel_started = 0;
+ lzx->input_end = 0;
+
+ lzx->error = CL_SUCCESS;
+
+ lzx->i_ptr = lzx->i_end = &lzx->inbuf[0];
+ lzx->o_ptr = lzx->o_end = &lzx->e8_buf[0];
+ lzx->bit_buffer = lzx->bits_left = 0;
+
+ lzx_reset_state(lzx);
+ return lzx;
+}
+
+void lzx_set_output_length(struct lzx_stream *lzx, off_t out_bytes) {
+ if (lzx) lzx->length = out_bytes;
+}
+
+int lzx_decompress(struct lzx_stream *lzx, off_t out_bytes) {
+ /* bitstream reading and huffman variables */
+ register unsigned int bit_buffer;
+ register int bits_left, i=0;
+ register unsigned short sym;
+ unsigned char *i_ptr, *i_end;
+
+ int match_length, length_footer, extra, verbatim_bits, bytes_todo;
+ int this_run, main_element, aligned_bits, j;
+ unsigned char *window, *runsrc, *rundest, buf[12];
+ unsigned int frame_size=0, end_frame, match_offset, window_posn;
+ unsigned int R0, R1, R2;
+
+ /* easy answers */
+ if (!lzx || (out_bytes < 0)) return CL_ENULLARG;
+ if (lzx->error) return lzx->error;
+
+ /* flush out any stored-up bytes before we begin */
+ i = lzx->o_end - lzx->o_ptr;
+ if ((off_t) i > out_bytes) i = (int) out_bytes;
+ if (i) {
+ if (lzx->wflag && cli_writen(lzx->ofd, lzx->o_ptr, i) != i) {
+ return lzx->error = CL_EIO;
+ }
+ lzx->o_ptr += i;
+ lzx->offset += i;
+ out_bytes -= i;
+ }
+ if (out_bytes == 0) return CL_SUCCESS;
+
+ /* restore local state */
+ LZX_RESTORE_BITS;
+ window = lzx->window;
+ window_posn = lzx->window_posn;
+ R0 = lzx->R0;
+ R1 = lzx->R1;
+ R2 = lzx->R2;
+
+ end_frame = (unsigned int)((lzx->offset + out_bytes) / LZX_FRAME_SIZE) + 1;
+
+ while (lzx->frame < end_frame) {
+ /* have we reached the reset interval? (if there is one?) */
+ if (lzx->reset_interval && ((lzx->frame % lzx->reset_interval) == 0)) {
+ if (lzx->block_remaining) {
+ cli_dbgmsg("lzx_decompress: %d bytes remaining at reset interval\n", lzx->block_remaining);
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* re-read the intel header and reset the huffman lengths */
+ lzx_reset_state(lzx);
+ }
+
+ /* read header if necessary */
+ if (!lzx->header_read) {
+ /* read 1 bit. if bit=0, intel filesize = 0.
+ * if bit=1, read intel filesize (32 bits) */
+ j = 0; LZX_READ_BITS(i, 1); if (i) { LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 16); }
+ lzx->intel_filesize = (i << 16) | j;
+ lzx->header_read = 1;
+ }
+
+ /* calculate size of frame: all frames are 32k except the final frame
+ * which is 32kb or less. this can only be calculated when lzx->length
+ * has been filled in. */
+ frame_size = LZX_FRAME_SIZE;
+ if (lzx->length && (lzx->length - lzx->offset) < (off_t)frame_size) {
+ frame_size = lzx->length - lzx->offset;
+ }
+
+ /* decode until one more frame is available */
+ bytes_todo = lzx->frame_posn + frame_size - window_posn;
+ while (bytes_todo > 0) {
+ /* initialise new block, if one is needed */
+ if (lzx->block_remaining == 0) {
+ /* realign if previous block was an odd-sized UNCOMPRESSED block */
+ if ((lzx->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) &&
+ (lzx->block_length & 1))
+ {
+ if (i_ptr == i_end) {
+ if (lzx_read_input(lzx)) return lzx->error;
+ i_ptr = lzx->i_ptr;
+ i_end = lzx->i_end;
+ }
+ i_ptr++;
+ }
+
+ /* read block type (3 bits) and block length (24 bits) */
+ LZX_READ_BITS(lzx->block_type, 3);
+ LZX_READ_BITS(i, 16); LZX_READ_BITS(j, 8);
+ lzx->block_remaining = lzx->block_length = (i << 8) | j;
+
+ /* read individual block headers */
+ switch (lzx->block_type) {
+ case LZX_BLOCKTYPE_ALIGNED:
+ /* read lengths of and build aligned huffman decoding tree */
+ for (i = 0; i < 8; i++) { LZX_READ_BITS(j, 3); lzx->ALIGNED_len[i] = j; }
+ LZX_BUILD_TABLE(ALIGNED);
+ /* no break -- rest of aligned header is same as verbatim */
+ case LZX_BLOCKTYPE_VERBATIM:
+ /* read lengths of and build main huffman decoding tree */
+ LZX_READ_LENGTHS(MAINTREE, 0, 256);
+ LZX_READ_LENGTHS(MAINTREE, 256, LZX_NUM_CHARS + (lzx->posn_slots << 3));
+ LZX_BUILD_TABLE(MAINTREE);
+ /* if the literal 0xE8 is anywhere in the block... */
+ if (lzx->MAINTREE_len[0xE8] != 0) lzx->intel_started = 1;
+ /* read lengths of and build lengths huffman decoding tree */
+ LZX_READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
+ LZX_BUILD_TABLE(LENGTH);
+ break;
+
+ case LZX_BLOCKTYPE_UNCOMPRESSED:
+ /* because we can't assume otherwise */
+ lzx->intel_started = 1;
+
+ /* read 1-16 (not 0-15) bits to align to bytes */
+ LZX_ENSURE_BITS(16);
+ if (bits_left > 16) i_ptr -= 2;
+ bits_left = 0; bit_buffer = 0;
+
+ /* read 12 bytes of stored R0 / R1 / R2 values */
+ for (rundest = &buf[0], i = 0; i < 12; i++) {
+ if (i_ptr == i_end) {
+ if (lzx_read_input(lzx)) return lzx->error;
+ i_ptr = lzx->i_ptr;
+ i_end = lzx->i_end;
+ }
+ *rundest++ = *i_ptr++;
+ }
+ R0 = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
+ R1 = buf[4] | (buf[5] << 8) | (buf[6] << 16) | (buf[7] << 24);
+ R2 = buf[8] | (buf[9] << 8) | (buf[10] << 16) | (buf[11] << 24);
+ break;
+
+ default:
+ cli_dbgmsg("lzx_decompress: bad block type (0x%x)\n", lzx->block_type);
+ return lzx->error = CL_EFORMAT;
+ }
+ }
+
+ /* decode more of the block:
+ * run = min(what's available, what's needed) */
+ this_run = lzx->block_remaining;
+ if (this_run > bytes_todo) this_run = bytes_todo;
+
+ /* assume we decode exactly this_run bytes, for now */
+ bytes_todo -= this_run;
+ lzx->block_remaining -= this_run;
+
+ /* decode at least this_run bytes */
+ switch (lzx->block_type) {
+ case LZX_BLOCKTYPE_VERBATIM:
+ while (this_run > 0) {
+ LZX_READ_HUFFSYM(MAINTREE, main_element);
+ if (main_element < LZX_NUM_CHARS) {
+ /* literal: 0 to LZX_NUM_CHARS-1 */
+ window[window_posn++] = main_element;
+ this_run--;
+ }
+ else {
+ /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+ main_element -= LZX_NUM_CHARS;
+
+ /* get match length */
+ match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+ if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+ LZX_READ_HUFFSYM(LENGTH, length_footer);
+ match_length += length_footer;
+ }
+ match_length += LZX_MIN_MATCH;
+
+ /* get match offset */
+ switch ((match_offset = (main_element >> 3))) {
+ case 0: match_offset = R0; break;
+ case 1: match_offset = R1; R1=R0; R0 = match_offset; break;
+ case 2: match_offset = R2; R2=R0; R0 = match_offset; break;
+ case 3: match_offset = 1; R2=R1; R1=R0; R0 = match_offset; break;
+ default:
+ extra = lzx->extra_bits[match_offset];
+ LZX_READ_BITS(verbatim_bits, extra);
+ match_offset = lzx->position_base[match_offset] - 2 + verbatim_bits;
+ R2 = R1; R1 = R0; R0 = match_offset;
+ }
+
+ if ((window_posn + match_length) > lzx->window_size) {
+ cli_dbgmsg("lzx_decompress: match ran over window wrap\n");
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* copy match */
+ rundest = &window[window_posn];
+ i = match_length;
+ /* does match offset wrap the window? */
+ if (match_offset > window_posn) {
+ /* j = length from match offset to end of window */
+ j = match_offset - window_posn;
+ if (j > (int) lzx->window_size) {
+ cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n");
+ return lzx->error = CL_EFORMAT;
+ }
+ runsrc = &window[lzx->window_size - j];
+ if (j < i) {
+ /* if match goes over the window edge, do two copy runs */
+ i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+ runsrc = window;
+ }
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+ else {
+ runsrc = rundest - match_offset;
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+
+ this_run -= match_length;
+ window_posn += match_length;
+ }
+ } /* while (this_run > 0) */
+ break;
+
+ case LZX_BLOCKTYPE_ALIGNED:
+ while (this_run > 0) {
+ LZX_READ_HUFFSYM(MAINTREE, main_element);
+ if (main_element < LZX_NUM_CHARS) {
+ /* literal: 0 to LZX_NUM_CHARS-1 */
+ window[window_posn++] = main_element;
+ this_run--;
+ }
+ else {
+ /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
+ main_element -= LZX_NUM_CHARS;
+
+ /* get match length */
+ match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
+ if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
+ LZX_READ_HUFFSYM(LENGTH, length_footer);
+ match_length += length_footer;
+ }
+ match_length += LZX_MIN_MATCH;
+
+ /* get match offset */
+ switch ((match_offset = (main_element >> 3))) {
+ case 0: match_offset = R0; break;
+ case 1: match_offset = R1; R1 = R0; R0 = match_offset; break;
+ case 2: match_offset = R2; R2 = R0; R0 = match_offset; break;
+ default:
+ extra = lzx->extra_bits[match_offset];
+ match_offset = lzx->position_base[match_offset] - 2;
+ if (extra > 3) {
+ /* verbatim and aligned bits */
+ extra -= 3;
+ LZX_READ_BITS(verbatim_bits, extra);
+ match_offset += (verbatim_bits << 3);
+ LZX_READ_HUFFSYM(ALIGNED, aligned_bits);
+ match_offset += aligned_bits;
+ }
+ else if (extra == 3) {
+ /* aligned bits only */
+ LZX_READ_HUFFSYM(ALIGNED, aligned_bits);
+ match_offset += aligned_bits;
+ }
+ else if (extra > 0) { /* extra==1, extra==2 */
+ /* verbatim bits only */
+ LZX_READ_BITS(verbatim_bits, extra);
+ match_offset += verbatim_bits;
+ }
+ else /* extra == 0 */ {
+ /* ??? not defined in LZX specification! */
+ match_offset = 1;
+ }
+ /* update repeated offset LRU queue */
+ R2 = R1; R1 = R0; R0 = match_offset;
+ }
+
+ if ((window_posn + match_length) > lzx->window_size) {
+ cli_dbgmsg("lzx_decompress: match ran over window wrap\n");
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* copy match */
+ rundest = &window[window_posn];
+ i = match_length;
+ /* does match offset wrap the window? */
+ if (match_offset > window_posn) {
+ /* j = length from match offset to end of window */
+ j = match_offset - window_posn;
+ if (j > (int) lzx->window_size) {
+ cli_dbgmsg("lzx_decompress: match offset beyond window boundaries\n");
+ return lzx->error = CL_EFORMAT;
+ }
+ runsrc = &window[lzx->window_size - j];
+ if (j < i) {
+ /* if match goes over the window edge, do two copy runs */
+ i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+ runsrc = window;
+ }
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+ else {
+ runsrc = rundest - match_offset;
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+
+ this_run -= match_length;
+ window_posn += match_length;
+ }
+ } /* while (this_run > 0) */
+ break;
+
+ case LZX_BLOCKTYPE_UNCOMPRESSED:
+ /* as this_run is limited not to wrap a frame, this also means it
+ * won't wrap the window (as the window is a multiple of 32k) */
+ rundest = &window[window_posn];
+ window_posn += this_run;
+ while (this_run > 0) {
+ if ((i = i_end - i_ptr)) {
+ if (i > this_run) i = this_run;
+ memcpy(rundest, i_ptr, (size_t) i);
+ rundest += i;
+ i_ptr += i;
+ this_run -= i;
+ }
+ else {
+ if (lzx_read_input(lzx)) return lzx->error;
+ i_ptr = lzx->i_ptr;
+ i_end = lzx->i_end;
+ }
+ }
+ break;
+
+ default:
+ return lzx->error = CL_EFORMAT; /* might as well */
+ }
+
+ /* did the final match overrun our desired this_run length? */
+ if (this_run < 0) {
+ if ((unsigned int)(-this_run) > lzx->block_remaining) {
+ cli_dbgmsg("lzx_decompress: overrun went past end of block by %d (%d remaining)\n", -this_run, lzx->block_remaining);
+ return lzx->error = CL_EFORMAT;
+ }
+ lzx->block_remaining -= -this_run;
+ }
+ } /* while (bytes_todo > 0) */
+
+ /* streams don't extend over frame boundaries */
+ if ((window_posn - lzx->frame_posn) != frame_size) {
+ cli_dbgmsg("lzx_decompress: decode beyond output frame limits! %d != %d\n", window_posn - lzx->frame_posn, frame_size);
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* re-align input bitstream */
+ if (bits_left > 0) LZX_ENSURE_BITS(16);
+ if (bits_left & 15) LZX_REMOVE_BITS(bits_left & 15);
+
+ /* check that we've used all of the previous frame first */
+ if (lzx->o_ptr != lzx->o_end) {
+ cli_dbgmsg("lzx_decompress: %d avail bytes, new %d frame\n", lzx->o_end-lzx->o_ptr, frame_size);
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* does this intel block _really_ need decoding? */
+ if (lzx->intel_started && lzx->intel_filesize &&
+ (lzx->frame <= 32768) && (frame_size > 10))
+ {
+ unsigned char *data = &lzx->e8_buf[0];
+ unsigned char *dataend = &lzx->e8_buf[frame_size - 10];
+ signed int curpos = lzx->intel_curpos;
+ signed int filesize = lzx->intel_filesize;
+ signed int abs_off, rel_off;
+
+ /* copy e8 block to the e8 buffer and tweak if needed */
+ lzx->o_ptr = data;
+ memcpy(data, &lzx->window[lzx->frame_posn], frame_size);
+
+ while (data < dataend) {
+ if (*data++ != 0xE8) { curpos++; continue; }
+ abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
+ if ((abs_off >= -curpos) && (abs_off < filesize)) {
+ rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
+ data[0] = (unsigned char) rel_off;
+ data[1] = (unsigned char) (rel_off >> 8);
+ data[2] = (unsigned char) (rel_off >> 16);
+ data[3] = (unsigned char) (rel_off >> 24);
+ }
+ data += 4;
+ curpos += 5;
+ }
+ lzx->intel_curpos += frame_size;
+ }
+ else {
+ lzx->o_ptr = &lzx->window[lzx->frame_posn];
+ if (lzx->intel_filesize) lzx->intel_curpos += frame_size;
+ }
+ lzx->o_end = &lzx->o_ptr[frame_size];
+
+ /* write a frame */
+ i = (out_bytes < (off_t)frame_size) ? (unsigned int)out_bytes : frame_size;
+ if (lzx->wflag && cli_writen(lzx->ofd, lzx->o_ptr, i) != i) {
+ return lzx->error = CL_EIO;
+ }
+ lzx->o_ptr += i;
+ lzx->offset += i;
+ out_bytes -= i;
+
+ /* advance frame start position */
+ lzx->frame_posn += frame_size;
+ lzx->frame++;
+
+ /* wrap window / frame position pointers */
+ if (window_posn == lzx->window_size) window_posn = 0;
+ if (lzx->frame_posn == lzx->window_size) lzx->frame_posn = 0;
+
+ } /* while (lzx->frame < end_frame) */
+
+ if (out_bytes) {
+ cli_dbgmsg("lzx_decompress: bytes left to output\n");
+ return lzx->error = CL_EFORMAT;
+ }
+
+ /* store local state */
+ LZX_STORE_BITS;
+ lzx->window_posn = window_posn;
+ lzx->R0 = R0;
+ lzx->R1 = R1;
+ lzx->R2 = R2;
+
+ return CL_SUCCESS;
+}
+
+void lzx_free(struct lzx_stream *lzx) {
+ if (lzx) {
+ free(lzx->inbuf);
+ free(lzx->window);
+ free(lzx);
+ }
+}
+
+/***************************************************************************
+ * Quantum decompression implementation
+ ***************************************************************************
+ * The Quantum method was created by David Stafford, adapted by Microsoft
+ * Corporation.
+ *
+ * This decompressor is based on an implementation by Matthew Russotto, used
+ * with permission.
+ *
+ * This decompressor was researched and implemented by Matthew Russotto. It
+ * has since been tidied up by Stuart Caie. More information can be found at
+ * http://www.speakeasy.org/~russotto/quantumcomp.html
+ */
+
+/* Quantum decompressor bitstream reading macros
+ *
+ * QTM_STORE_BITS stores bitstream state in qtm_stream structure
+ * QTM_RESTORE_BITS restores bitstream state from qtm_stream structure
+ * QTM_READ_BITS(var,n) takes N bits from the buffer and puts them in var
+ * QTM_FILL_BUFFER if there is room for another 16 bits, reads another
+ * 16 bits from the input stream.
+ * QTM_PEEK_BITS(n) extracts without removing N bits from the bit buffer
+ * QTM_REMOVE_BITS(n) removes N bits from the bit buffer
+ *
+ * These bit access routines work by using the area beyond the MSB and the
+ * LSB as a free source of zeroes. This avoids having to mask any bits.
+ * So we have to know the bit width of the bitbuffer variable.
+ */
+
+#define QTM_BITBUF_WIDTH (sizeof(unsigned int) * CHAR_BIT)
+
+#define QTM_STORE_BITS do { \
+ qtm->i_ptr = i_ptr; \
+ qtm->i_end = i_end; \
+ qtm->bit_buffer = bit_buffer; \
+ qtm->bits_left = bits_left; \
+} while (0)
+
+#define QTM_RESTORE_BITS do { \
+ i_ptr = qtm->i_ptr; \
+ i_end = qtm->i_end; \
+ bit_buffer = qtm->bit_buffer; \
+ bits_left = qtm->bits_left; \
+} while (0)
+
+/* adds 16 bits to bit buffer, if there's space for the new bits */
+#define QTM_FILL_BUFFER do { \
+ if (bits_left <= (QTM_BITBUF_WIDTH - 16)) { \
+ if (i_ptr >= i_end) { \
+ if (qtm_read_input(qtm)) return qtm->error; \
+ i_ptr = qtm->i_ptr; \
+ i_end = qtm->i_end; \
+ } \
+ bit_buffer |= ((i_ptr[0] << 8) | i_ptr[1]) \
+ << (QTM_BITBUF_WIDTH - 16 - bits_left); \
+ bits_left += 16; \
+ i_ptr += 2; \
+ } \
+} while (0)
+
+#define QTM_PEEK_BITS(n) (bit_buffer >> (QTM_BITBUF_WIDTH - (n)))
+#define QTM_REMOVE_BITS(n) ((bit_buffer <<= (n)), (bits_left -= (n)))
+
+#define QTM_READ_BITS(val, bits) do { \
+ (val) = 0; \
+ for (bits_needed = (bits); bits_needed > 0; bits_needed -= bit_run) { \
+ QTM_FILL_BUFFER; \
+ bit_run = (bits_left < bits_needed) ? bits_left : bits_needed; \
+ (val) = ((val) << bit_run) | QTM_PEEK_BITS(bit_run); \
+ QTM_REMOVE_BITS(bit_run); \
+ } \
+} while (0)
+
+static int qtm_read_input(struct qtm_stream *qtm) {
+ int read = qtm->read ? qtm->read(qtm->file, &qtm->inbuf[0], (int)qtm->inbuf_size) : cli_readn(qtm->fd, &qtm->inbuf[0], (int)qtm->inbuf_size);
+ if (read < 0) return qtm->error = CL_EIO;
+
+ qtm->i_ptr = &qtm->inbuf[0];
+ qtm->i_end = &qtm->inbuf[read];
+ return CL_SUCCESS;
+}
+
+/* Arithmetic decoder:
+ *
+ * QTM_GET_SYMBOL(model, var) fetches the next symbol from the stated model
+ * and puts it in var.
+ *
+ * If necessary, qtm_update_model() is called.
+ */
+#define QTM_GET_SYMBOL(model, var) do { \
+ range = ((H - L) & 0xFFFF) + 1; \
+ symf = ((((C - L + 1) * model.syms[0].cumfreq)-1) / range) & 0xFFFF; \
+ \
+ for (i = 1; i < model.entries; i++) { \
+ if (model.syms[i].cumfreq <= symf) break; \
+ } \
+ (var) = model.syms[i-1].sym; \
+ \
+ range = (H - L) + 1; \
+ symf = model.syms[0].cumfreq; \
+ H = L + ((model.syms[i-1].cumfreq * range) / symf) - 1; \
+ L = L + ((model.syms[i].cumfreq * range) / symf); \
+ \
+ do { model.syms[--i].cumfreq += 8; } while (i > 0); \
+ if (model.syms[0].cumfreq > 3800) qtm_update_model(&model); \
+ \
+ while (1) { \
+ if ((L & 0x8000) != (H & 0x8000)) { \
+ if ((L & 0x4000) && !(H & 0x4000)) { \
+ /* underflow case */ \
+ C ^= 0x4000; L &= 0x3FFF; H |= 0x4000; \
+ } \
+ else break; \
+ } \
+ L <<= 1; H = (H << 1) | 1; \
+ QTM_FILL_BUFFER; \
+ C = (C << 1) | QTM_PEEK_BITS(1); \
+ QTM_REMOVE_BITS(1); \
+ } \
+} while (0)
+
+static void qtm_update_model(struct qtm_model *model) {
+ struct qtm_modelsym tmp;
+ int i, j;
+
+ if (--model->shiftsleft) {
+ for (i = model->entries - 1; i >= 0; i--) {
+ /* -1, not -2; the 0 entry saves this */
+ model->syms[i].cumfreq >>= 1;
+ if (model->syms[i].cumfreq <= model->syms[i+1].cumfreq) {
+ model->syms[i].cumfreq = model->syms[i+1].cumfreq + 1;
+ }
+ }
+ }
+ else {
+ model->shiftsleft = 50;
+ for (i = 0; i < model->entries; i++) {
+ /* no -1, want to include the 0 entry */
+ /* this converts cumfreqs into frequencies, then shifts right */
+ model->syms[i].cumfreq -= model->syms[i+1].cumfreq;
+ model->syms[i].cumfreq++; /* avoid losing things entirely */
+ model->syms[i].cumfreq >>= 1;
+ }
+
+ /* now sort by frequencies, decreasing order -- this must be an
+ * inplace selection sort, or a sort with the same (in)stability
+ * characteristics */
+ for (i = 0; i < model->entries - 1; i++) {
+ for (j = i + 1; j < model->entries; j++) {
+ if (model->syms[i].cumfreq < model->syms[j].cumfreq) {
+ tmp = model->syms[i];
+ model->syms[i] = model->syms[j];
+ model->syms[j] = tmp;
+ }
+ }
+ }
+
+ /* then convert frequencies back to cumfreq */
+ for (i = model->entries - 1; i >= 0; i--) {
+ model->syms[i].cumfreq += model->syms[i+1].cumfreq;
+ }
+ }
+}
+
+/* Initialises a model to decode symbols from [start] to [start]+[len]-1 */
+static void qtm_init_model(struct qtm_model *model,
+ struct qtm_modelsym *syms, int start, int len)
+{
+ int i;
+
+ model->shiftsleft = 4;
+ model->entries = len;
+ model->syms = syms;
+
+ for (i = 0; i <= len; i++) {
+ syms[i].sym = start + i; /* actual symbol */
+ syms[i].cumfreq = len - i; /* current frequency of that symbol */
+ }
+}
+
+
+/*-------- main Quantum code --------*/
+
+struct qtm_stream *qtm_init(int fd, int ofd,
+ int window_bits, int input_buffer_size,
+ struct cab_file *file,
+ int (*read)(struct cab_file *, unsigned char *, int))
+{
+ unsigned int window_size = 1 << window_bits;
+ struct qtm_stream *qtm;
+ unsigned offset;
+ int i;
+
+ /* Quantum supports window sizes of 2^10 (1Kb) through 2^21 (2Mb) */
+
+ /* tk: temporary fix: only process 32KB+ window sizes */
+ if (window_bits < 15 || window_bits > 21) return NULL;
+
+ input_buffer_size = (input_buffer_size + 1) & -2;
+ if (input_buffer_size < 2) return NULL;
+
+ /* allocate decompression state */
+ if (!(qtm = cli_malloc(sizeof(struct qtm_stream)))) {
+ return NULL;
+ }
+
+ for (i = 0, offset = 0; i < 42; i++) {
+ qtm->position_base[i] = offset;
+ qtm->extra_bits[i] = ((i < 2) ? 0 : (i - 2)) >> 1;
+ offset += 1 << qtm->extra_bits[i];
+ }
+
+ for (i = 0, offset = 0; i < 26; i++) {
+ qtm->length_base[i] = offset;
+ qtm->length_extra[i] = (i < 2 ? 0 : i - 2) >> 2;
+ offset += 1 << qtm->length_extra[i];
+ }
+ qtm->length_base[26] = 254; qtm->length_extra[26] = 0;
+
+ /* allocate decompression window and input buffer */
+ qtm->window = cli_malloc((size_t) window_size);
+ if (!qtm->window) {
+ free(qtm);
+ return NULL;
+ }
+
+ qtm->inbuf = cli_malloc((size_t) input_buffer_size);
+ if (!qtm->inbuf) {
+ free(qtm->window);
+ free(qtm);
+ return NULL;
+ }
+
+ /* initialise decompression state */
+ qtm->fd = fd;
+ qtm->ofd = ofd;
+ qtm->wflag = 1;
+ qtm->inbuf_size = input_buffer_size;
+ qtm->window_size = window_size;
+ qtm->window_posn = 0;
+ qtm->frame_start = 0;
+ qtm->header_read = 0;
+ qtm->error = CL_SUCCESS;
+
+ qtm->i_ptr = qtm->i_end = &qtm->inbuf[0];
+ qtm->o_ptr = qtm->o_end = &qtm->window[0];
+ qtm->bits_left = 0;
+ qtm->bit_buffer = 0;
+
+ /* initialise arithmetic coding models
+ * - model 4 depends on window size, ranges from 20 to 24
+ * - model 5 depends on window size, ranges from 20 to 36
+ * - model 6pos depends on window size, ranges from 20 to 42
+ */
+ i = window_bits * 2;
+ qtm_init_model(&qtm->model0, &qtm->m0sym[0], 0, 64);
+ qtm_init_model(&qtm->model1, &qtm->m1sym[0], 64, 64);
+ qtm_init_model(&qtm->model2, &qtm->m2sym[0], 128, 64);
+ qtm_init_model(&qtm->model3, &qtm->m3sym[0], 192, 64);
+ qtm_init_model(&qtm->model4, &qtm->m4sym[0], 0, (i > 24) ? 24 : i);
+ qtm_init_model(&qtm->model5, &qtm->m5sym[0], 0, (i > 36) ? 36 : i);
+ qtm_init_model(&qtm->model6, &qtm->m6sym[0], 0, i);
+ qtm_init_model(&qtm->model6len, &qtm->m6lsym[0], 0, 27);
+ qtm_init_model(&qtm->model7, &qtm->m7sym[0], 0, 7);
+
+ qtm->file = file;
+ qtm->read = read;
+
+ /* all ok */
+ return qtm;
+}
+
+int qtm_decompress(struct qtm_stream *qtm, off_t out_bytes) {
+ unsigned int frame_start, frame_end, window_posn, match_offset, range;
+ unsigned char *window, *i_ptr, *i_end, *runsrc, *rundest;
+ int i, j, selector, extra, sym, match_length;
+ unsigned short H, L, C, symf;
+
+ register unsigned int bit_buffer;
+ register unsigned char bits_left;
+ unsigned char bits_needed, bit_run;
+
+ /* easy answers */
+ if (!qtm || (out_bytes < 0)) return CL_ENULLARG;
+ if (qtm->error) return qtm->error;
+
+ /* flush out any stored-up bytes before we begin */
+ i = qtm->o_end - qtm->o_ptr;
+ if ((off_t) i > out_bytes) i = (int) out_bytes;
+ if (i) {
+ if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+ return qtm->error = CL_EIO;
+ }
+ qtm->o_ptr += i;
+ out_bytes -= i;
+ }
+ if (out_bytes == 0) return CL_SUCCESS;
+
+ /* restore local state */
+ QTM_RESTORE_BITS;
+ window = qtm->window;
+ window_posn = qtm->window_posn;
+ frame_start = qtm->frame_start;
+ H = qtm->H;
+ L = qtm->L;
+ C = qtm->C;
+
+ /* while we do not have enough decoded bytes in reserve: */
+ while ((qtm->o_end - qtm->o_ptr) < out_bytes) {
+
+ /* read header if necessary. Initialises H, L and C */
+ if (!qtm->header_read) {
+ H = 0xFFFF; L = 0; QTM_READ_BITS(C, 16);
+ qtm->header_read = 1;
+ }
+
+ /* decode more, at most up to to frame boundary */
+ frame_end = window_posn + (out_bytes - (qtm->o_end - qtm->o_ptr));
+ if ((frame_start + QTM_FRAME_SIZE) < frame_end) {
+ frame_end = frame_start + QTM_FRAME_SIZE;
+ }
+
+ while (window_posn < frame_end) {
+ QTM_GET_SYMBOL(qtm->model7, selector);
+ if (selector < 4) {
+ struct qtm_model *mdl = (selector == 0) ? &qtm->model0 :
+ ((selector == 1) ? &qtm->model1 :
+ ((selector == 2) ? &qtm->model2 :
+ &qtm->model3));
+ QTM_GET_SYMBOL((*mdl), sym);
+ window[window_posn++] = sym;
+ }
+ else {
+ switch (selector) {
+ case 4: /* selector 4 = fixed length match (3 bytes) */
+ QTM_GET_SYMBOL(qtm->model4, sym);
+ QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+ match_offset = qtm->position_base[sym] + extra + 1;
+ match_length = 3;
+ break;
+
+ case 5: /* selector 5 = fixed length match (4 bytes) */
+ QTM_GET_SYMBOL(qtm->model5, sym);
+ QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+ match_offset = qtm->position_base[sym] + extra + 1;
+ match_length = 4;
+ break;
+
+ case 6: /* selector 6 = variable length match */
+ QTM_GET_SYMBOL(qtm->model6len, sym);
+ QTM_READ_BITS(extra, qtm->length_extra[sym]);
+ match_length = qtm->length_base[sym] + extra + 5;
+
+ QTM_GET_SYMBOL(qtm->model6, sym);
+ QTM_READ_BITS(extra, qtm->extra_bits[sym]);
+ match_offset = qtm->position_base[sym] + extra + 1;
+ break;
+
+ default:
+ /* should be impossible, model7 can only return 0-6 */
+ return qtm->error = CL_EFORMAT;
+ }
+
+ rundest = &window[window_posn];
+ i = match_length;
+ /* does match offset wrap the window? */
+ if (match_offset > window_posn) {
+ /* j = length from match offset to end of window */
+ j = match_offset - window_posn;
+ if (j > (int) qtm->window_size) {
+ cli_dbgmsg("qtm_decompress: match offset beyond window boundaries\n");
+ return qtm->error = CL_EFORMAT;
+ }
+ runsrc = &window[qtm->window_size - j];
+ if (j < i) {
+ /* if match goes over the window edge, do two copy runs */
+ i -= j; while (j-- > 0) *rundest++ = *runsrc++;
+ runsrc = window;
+ }
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+ else {
+ runsrc = rundest - match_offset;
+ while (i-- > 0) *rundest++ = *runsrc++;
+ }
+ window_posn += match_length;
+ }
+ } /* while (window_posn < frame_end) */
+
+ qtm->o_end = &window[window_posn];
+
+ /* another frame completed? */
+ if ((window_posn - frame_start) >= QTM_FRAME_SIZE) {
+ if ((window_posn - frame_start) != QTM_FRAME_SIZE) {
+ cli_dbgmsg("qtm_decompress: overshot frame alignment\n");
+ return qtm->error = CL_EFORMAT;
+ }
+
+ /* re-align input */
+ if (bits_left & 7) QTM_REMOVE_BITS(bits_left & 7);
+ do { QTM_READ_BITS(i, 8); } while (i != 0xFF);
+ qtm->header_read = 0;
+
+ /* window wrap? */
+ if (window_posn == qtm->window_size) {
+ /* flush all currently stored data */
+ i = (qtm->o_end - qtm->o_ptr);
+ if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+ return qtm->error = CL_EIO;
+ }
+ out_bytes -= i;
+ qtm->o_ptr = &window[0];
+ qtm->o_end = &window[0];
+ window_posn = 0;
+ }
+
+ frame_start = window_posn;
+ }
+
+ } /* while (more bytes needed) */
+
+ if (out_bytes) {
+ i = (int) out_bytes;
+ if (qtm->wflag && cli_writen(qtm->ofd, qtm->o_ptr, i) != i) {
+ return qtm->error = CL_EIO;
+ }
+ qtm->o_ptr += i;
+ }
+
+ /* store local state */
+ QTM_STORE_BITS;
+ qtm->window_posn = window_posn;
+ qtm->frame_start = frame_start;
+ qtm->H = H;
+ qtm->L = L;
+ qtm->C = C;
+
+ return CL_SUCCESS;
+}
+
+void qtm_free(struct qtm_stream *qtm) {
+ if (qtm) {
+ free(qtm->window);
+ free(qtm->inbuf);
+ free(qtm);
+ }
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_LZMADecode.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,557 @@
+/*
+ * LZMADecode.c
+ *
+ * This file is a part of LZMA compression module for NSIS.
+ *
+ * Original LZMA SDK Copyright (C) 1999-2006 Igor Pavlov
+ * Modifications Copyright (C) 2003-2007 Amir Szekely <kichik at netvision.net.il>
+ *
+ * Licensed under the Common Public License version 1.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * Licence details can be found in the file COPYING.nsis.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.
+ */
+
+#include <stdlib.h>
+#include "LZMADecode.h"
+
+#define LEAVE { goto saveStateAndReturn; }
+#define NEED_BYTE(c) case c: if (!avail_in) { mode = c; LEAVE; }
+#define NEED_BYTE_ if (!avail_in) LEAVE;
+#define NEXT_BYTE (avail_in--, *next_in++)
+#define NEED_OUT(c) case c: if (!avail_out) { mode = c; LEAVE; }
+#define PUT_BYTE_(b) { *next_out = b; next_out++; avail_out--; }
+#define PUT_BYTE(b) { totalOut++; PUT_BYTE_(b) }
+#define DECODE_BIT(c, x) prob = x; last = c; goto _LZMA_C_RDBD; case c:
+#define DECODE_LEN(c, x) probs = x; last2 = c; goto _LZMA_C_LEND; case c:
+#define DECODE_BIT_TREE(c, x, y) probs = x; numLevels = y; last3 = c; goto _LZMA_C_BTD; case c:
+
+enum {
+ /* 0 */ LZMA_C_INIT = 0,
+ /* 1 */ LZMA_C_GETDICT,
+ /* 2 */ LZMA_C_BLOCK,
+ /* 3 */ LZMA_C_RDI, /* RangeDecoderInit */
+ /* 4 */ LZMA_C_RDBD, /* RangeDecoderBitDecode */
+ /* 5 */ LZMA_C_RDBD_IN, /* RangeDecoderBitDecode */
+ /* 6 */ LZMA_C_TYPE,
+ /* 7 */ LZMA_C_ISREP,
+ /* 8 */ LZMA_C_ISREPG0,
+ /* 9 */ LZMA_C_ISREP0LONG,
+ /* 10 */ LZMA_C_ISREPG1,
+ /* 11 */ LZMA_C_ISREPG2,
+ /* 12 */ LZMA_C_NORM,
+ /* 13 */ LZMA_C_LITDM1, /* LzmaLiteralDecodeMatch */
+ /* 14 */ LZMA_C_LITDM2, /* LzmaLiteralDecodeMatch */
+ /* 15 */ LZMA_C_LITD, /* LzmaLiteralDecode */
+ /* 16 */ LZMA_C_RDRBTD, /* RangeDecoderReverseBitTreeDecode */
+ /* 17 */ LZMA_C_LEND, /* LzmaLenDecode */
+ /* 18 */ LZMA_C_LEND1, /* LzmaLenDecode */
+ /* 19 */ LZMA_C_LEND2, /* LzmaLenDecode */
+ /* 20 */ LZMA_C_LEND_RES, /* LzmaLenDecode */
+ /* 21 */ LZMA_C_LEND_C1,
+ /* 22 */ LZMA_C_LEND_C2,
+ /* 23 */ LZMA_C_BTD, /* RangeDecoderBitTreeDecode */
+ /* 24 */ LZMA_C_BTD_LOOP,
+ /* 25 */ LZMA_C_BTD_C1,
+ /* 26 */ LZMA_C_OUTPUT_1,
+ /* 27 */ LZMA_C_OUTPUT_2,
+ /* 28 */ LZMA_C_OUTPUT_3
+};
+
+#define kNumTopBits 24
+#define kTopValue ((UInt32)1 << kNumTopBits)
+
+#define kNumBitModelTotalBits 11
+#define kBitModelTotal (1 << kNumBitModelTotalBits)
+#define kNumMoveBits 5
+
+#define RC_NORMALIZE(c) if (range < kTopValue) { NEED_BYTE(c); range <<= 8; code = (code << 8) | NEXT_BYTE; }
+
+#define RC_GET_BIT2(c, prob, mi, A0, A1) { \
+ UInt32 bound = (range >> kNumBitModelTotalBits) * *prob; \
+ if (code < bound) \
+ { A0; range = bound; *prob += (kBitModelTotal - *prob) >> kNumMoveBits; mi <<= 1; } \
+ else \
+ { A1; range -= bound; code -= bound; *prob -= (*prob) >> kNumMoveBits; mi = (mi + mi) + 1; } \
+ RC_NORMALIZE(c) \
+}
+
+#define RC_GET_BIT(c, prob, mi) RC_GET_BIT2(c, prob, mi, ; , ;)
+
+#define kNumPosBitsMax 4
+#define kNumPosStatesMax (1 << kNumPosBitsMax)
+
+#define kLenNumLowBits 3
+#define kLenNumLowSymbols (1 << kLenNumLowBits)
+#define kLenNumMidBits 3
+#define kLenNumMidSymbols (1 << kLenNumMidBits)
+#define kLenNumHighBits 8
+#define kLenNumHighSymbols (1 << kLenNumHighBits)
+
+#define LenChoice 0
+#define LenChoice2 (LenChoice + 1)
+#define LenLow (LenChoice2 + 1)
+#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits))
+#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits))
+#define kNumLenProbs (LenHigh + kLenNumHighSymbols)
+
+#define kNumStates 12
+
+#define kStartPosModelIndex 4
+#define kEndPosModelIndex 14
+#define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+
+#define kNumPosSlotBits 6
+#define kNumLenToPosStates 4
+
+#define kNumAlignBits 4
+#define kAlignTableSize (1 << kNumAlignBits)
+
+#define kMatchMinLen 2
+
+#define IsMatch 0
+#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax))
+#define IsRepG0 (IsRep + kNumStates)
+#define IsRepG1 (IsRepG0 + kNumStates)
+#define IsRepG2 (IsRepG1 + kNumStates)
+#define IsRep0Long (IsRepG2 + kNumStates)
+#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax))
+#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits))
+#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex)
+#define LenCoder (Align + kAlignTableSize)
+#define RepLenCoder (LenCoder + kNumLenProbs)
+#define Literal (RepLenCoder + kNumLenProbs)
+
+#define LZMA_BASE_SIZE 1846
+#define LZMA_LIT_SIZE 768
+
+#if Literal != LZMA_BASE_SIZE
+StopCompilingDueBUG
+#endif
+
+void lzmaInit(lzma_stream *s)
+{
+ /* size of lzma_stream minus the size of the two allocated buffer pointers.
+ we don't want to lose to pointer or else we won't be able to free them. */
+ size_t i = sizeof(lzma_stream) - (sizeof(unsigned char *) * 2);
+ while (i--)
+ ((lzByte *)s)[i] = 0;
+
+ s->rep0 = s->rep1 = s->rep2 = s->rep3 = 1;
+ s->range = (0xFFFFFFFF);
+}
+
+int lzmaDecode(lzma_stream *s)
+{
+ /* restore decoder state */
+ lzma_stream _s = *s;
+
+#define mode _s.mode
+#define last _s.last
+#define last2 _s.last2
+#define last3 _s.last3
+
+#define p (*(CProb **) &_s.dynamicData)
+#define dynamicDataSize _s.dynamicDataSize
+
+#define state _s.state
+#define isPreviousMatch _s.isPreviousMatch
+#define previousByte _s.previousByte
+#define rep0 _s.rep0
+#define rep1 _s.rep1
+#define rep2 _s.rep2
+#define rep3 _s.rep3
+#define lc _s.lc
+#define len _s.len
+#define totalOut _s.totalOut
+
+#define dictionary _s.dictionary
+#define dictionarySize _s.dictionarySize
+#define dictionaryPos _s.dictionaryPos
+
+#define posStateMask _s.posStateMask
+#define literalPosMask _s.literalPosMask
+
+#define avail_in _s.avail_in
+#define next_in _s.next_in
+#define avail_out _s.avail_out
+#define next_out _s.next_out
+
+#define range _s.range
+#define code _s.code
+
+#define probs _s.probs
+#define prob _s.prob
+
+#define symbol _s.temp2
+#define bit _s.temp3
+#define matchBit _s.temp1
+#define i _s.temp1
+#define result _s.temp2
+#define numLevels _s.temp3
+#define posSlot _s.temp2
+#define newDictionarySize (*(UInt32*) &_s.temp3)
+
+#define matchByte _s.matchByte
+#define mi _s.mi
+#define posState _s.posState
+
+ if (len == -1)
+ return LZMA_STREAM_END;
+
+ for (;;) switch (mode)
+ {
+ case LZMA_C_INIT:
+ {
+ lzByte firstByte;
+ UInt32 newDynamicDataSize;
+ UInt32 numProbs;
+ int lp;
+ int pb;
+
+ NEED_BYTE_;
+
+ firstByte = NEXT_BYTE;
+
+ if (firstByte > (9*5*5))
+ return LZMA_DATA_ERROR;
+
+ pb = firstByte / (9*5);
+ firstByte %= (9*5);
+ lp = firstByte / 9;
+ firstByte %= 9;
+ lc = firstByte;
+
+ posStateMask = (1 << (pb)) - 1;
+ literalPosMask = (1 << (lp)) - 1;
+
+ numProbs = Literal + (LZMA_LIT_SIZE << (lc + pb));
+ newDynamicDataSize = numProbs * sizeof(CProb);
+
+ if (newDynamicDataSize != dynamicDataSize)
+ {
+ if (p)
+ lzmafree(p);
+ p = lzmaalloc(newDynamicDataSize);
+ if (!p)
+ return LZMA_NOT_ENOUGH_MEM;
+ dynamicDataSize = newDynamicDataSize;
+ }
+
+ while (numProbs--)
+ p[numProbs] = kBitModelTotal >> 1;
+
+ for (i = 0, newDictionarySize = 0; i < 4; i++)
+ {
+ NEED_BYTE(LZMA_C_GETDICT);
+ newDictionarySize |= NEXT_BYTE << (i * 8);
+ }
+
+ if (newDictionarySize != dictionarySize)
+ {
+ dictionarySize = newDictionarySize;
+ if (dictionary)
+ lzmafree(dictionary);
+ dictionary = lzmaalloc(dictionarySize);
+ if (!dictionary)
+ return LZMA_NOT_ENOUGH_MEM;
+ }
+
+ dictionary[dictionarySize - 1] = 0;
+
+ i = 5;
+ while (i--)
+ {
+ NEED_BYTE(LZMA_C_RDI);
+ code = (code << 8) | NEXT_BYTE;
+ }
+ }
+ case LZMA_C_BLOCK:
+ posState = (int)(totalOut & posStateMask);
+ DECODE_BIT(LZMA_C_TYPE, p + IsMatch + (state << kNumPosBitsMax) + posState);
+ if (bit == 0)
+ {
+ probs = p + Literal + (LZMA_LIT_SIZE *
+ (((totalOut & literalPosMask) << lc) + (previousByte >> (8 - lc))));
+
+ if (state < 4) state = 0;
+ else if (state < 10) state -= 3;
+ else state -= 6;
+ if (isPreviousMatch)
+ {
+ UInt32 pos = dictionaryPos - rep0;
+ if (pos >= dictionarySize)
+ pos += dictionarySize;
+ matchByte = dictionary[pos];
+ {
+ symbol = 1;
+ do
+ {
+ matchBit = (matchByte >> 7) & 1;
+ matchByte <<= 1;
+ {
+ prob = probs + ((1 + matchBit) << 8) + symbol;
+ RC_GET_BIT2(LZMA_C_LITDM1, prob, symbol, bit = 0, bit = 1)
+ }
+ if (matchBit != bit)
+ {
+ while (symbol < 0x100)
+ {
+ prob = probs + symbol;
+ RC_GET_BIT(LZMA_C_LITDM2, prob, symbol)
+ }
+ break;
+ }
+ }
+ while (symbol < 0x100);
+ previousByte = symbol;
+ }
+ isPreviousMatch = 0;
+ }
+ else
+ {
+ symbol = 1;
+ do
+ {
+ prob = probs + symbol;
+ RC_GET_BIT(LZMA_C_LITD, prob, symbol)
+ }
+ while (symbol < 0x100);
+ previousByte = symbol;
+ }
+ NEED_OUT(LZMA_C_OUTPUT_1);
+ PUT_BYTE(previousByte);
+ dictionary[dictionaryPos] = previousByte;
+ dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+ }
+ /* bit == 1 */
+ else
+ {
+ isPreviousMatch = 1;
+ DECODE_BIT(LZMA_C_ISREP, p + IsRep + state);
+ if (bit == 1)
+ {
+ DECODE_BIT(LZMA_C_ISREPG0, p + IsRepG0 + state);
+ if (bit == 0)
+ {
+ DECODE_BIT(LZMA_C_ISREP0LONG, p + IsRep0Long + (state << kNumPosBitsMax) + posState);
+ if (bit == 0)
+ {
+ UInt32 pos;
+ if (totalOut == 0)
+ return LZMA_DATA_ERROR;
+ state = state < 7 ? 9 : 11;
+ NEED_OUT(LZMA_C_OUTPUT_2);
+ pos = dictionaryPos - rep0;
+ if (pos >= dictionarySize)
+ pos += dictionarySize;
+ previousByte = dictionary[pos];
+ dictionary[dictionaryPos] = previousByte;
+ dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+ PUT_BYTE(previousByte);
+ mode = LZMA_C_BLOCK;
+ break;
+ }
+ }
+ else
+ {
+ UInt32 distance;
+ DECODE_BIT(LZMA_C_ISREPG1, p + IsRepG1 + state);
+ if (bit == 0)
+ {
+ distance = rep1;
+ }
+ else
+ {
+ DECODE_BIT(LZMA_C_ISREPG2, p + IsRepG2 + state);
+ if (bit == 0)
+ distance = rep2;
+ else
+ {
+ distance = rep3;
+ rep3 = rep2;
+ }
+ rep2 = rep1;
+ }
+ rep1 = rep0;
+ rep0 = distance;
+ }
+ DECODE_LEN(LZMA_C_LEND_C1, p + RepLenCoder);
+ state = state < 7 ? 8 : 11;
+ }
+ else
+ {
+ rep3 = rep2;
+ rep2 = rep1;
+ rep1 = rep0;
+ state = state < 7 ? 7 : 10;
+ DECODE_LEN(LZMA_C_LEND_C2, p + LenCoder);
+ DECODE_BIT_TREE(
+ LZMA_C_BTD_C1,
+ p + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits),
+ kNumPosSlotBits
+ );
+ if (posSlot >= kStartPosModelIndex)
+ {
+ int numDirectBits = ((posSlot >> 1) - 1);
+ rep0 = ((2 | ((UInt32)posSlot & 1)) << numDirectBits);
+ if (posSlot < kEndPosModelIndex)
+ {
+ probs = p + SpecPos + rep0 - posSlot - 1;
+ numLevels = numDirectBits;
+ }
+ else
+ {
+ int numTotalBits = numDirectBits - kNumAlignBits;
+ result = 0;
+ for (i = numTotalBits; i > 0; i--)
+ {
+ /* UInt32 t; */
+ range >>= 1;
+
+ result <<= 1;
+ if (code >= range)
+ {
+ code -= range;
+ result |= 1;
+ }
+ /*
+ t = (code - range) >> 31;
+ t &= 1;
+ code -= range & (t - 1);
+ result = (result + result) | (1 - t);
+ */
+ RC_NORMALIZE(LZMA_C_NORM)
+ }
+ rep0 += result << kNumAlignBits;
+ probs = p + Align;
+ numLevels = kNumAlignBits;
+ }
+ mi = 1;
+ symbol = 0;
+ for(i = 0; i < numLevels; i++)
+ {
+ prob = probs + mi;
+ RC_GET_BIT2(LZMA_C_RDRBTD, prob, mi, ; , symbol |= (1 << i));
+ }
+ rep0 += symbol;
+ }
+ else
+ rep0 = posSlot;
+ rep0++;
+ }
+ if (rep0 == (UInt32)(0))
+ {
+ len = -1;
+ LEAVE;
+ }
+ if (rep0 > totalOut)
+ {
+ return LZMA_DATA_ERROR;
+ }
+ len += kMatchMinLen;
+ totalOut += len;
+ do
+ {
+ UInt32 pos;
+ NEED_OUT(LZMA_C_OUTPUT_3);
+ pos = dictionaryPos - rep0;
+ if (pos >= dictionarySize)
+ pos += dictionarySize;
+ previousByte = dictionary[pos];
+ dictionary[dictionaryPos] = previousByte;
+ dictionaryPos = (dictionaryPos + 1) % dictionarySize;
+ PUT_BYTE_(previousByte);
+ len--;
+ }
+ while(len > 0);
+ }
+ mode = LZMA_C_BLOCK;
+ break;
+ case LZMA_C_RDBD:
+ _LZMA_C_RDBD:
+ {
+ UInt32 bound = (range >> kNumBitModelTotalBits) * *prob;
+ if (code < bound)
+ {
+ range = bound;
+ *prob += (kBitModelTotal - *prob) >> kNumMoveBits;
+ bit = 0;
+ }
+ else
+ {
+ range -= bound;
+ code -= bound;
+ *prob -= (*prob) >> kNumMoveBits;
+ bit = 1;
+ }
+ RC_NORMALIZE(LZMA_C_RDBD_IN);
+ }
+ mode = last;
+ break;
+ case LZMA_C_LEND:
+ _LZMA_C_LEND:
+ DECODE_BIT(LZMA_C_LEND1, probs + LenChoice);
+ if (bit == 0)
+ {
+ len = 0;
+ probs += LenLow + (posState << kLenNumLowBits);
+ numLevels = kLenNumLowBits;
+ }
+ else {
+ DECODE_BIT(LZMA_C_LEND2, probs + LenChoice2);
+ if (bit == 0)
+ {
+ len = kLenNumLowSymbols;
+ probs += + LenMid + (posState << kLenNumMidBits);
+ numLevels = kLenNumMidBits;
+ }
+ else
+ {
+ len = kLenNumLowSymbols + kLenNumMidSymbols;
+ probs += LenHigh;
+ numLevels = kLenNumHighBits;
+ }
+ }
+
+ last3 = LZMA_C_LEND_RES;
+ case LZMA_C_BTD:
+ _LZMA_C_BTD:
+ mi = 1;
+ for(i = numLevels; i > 0; i--)
+ {
+ prob = probs + mi;
+ RC_GET_BIT(LZMA_C_BTD_LOOP, prob, mi)
+ }
+ result = mi - (1 << numLevels);
+ mode = last3;
+ break;
+ case LZMA_C_LEND_RES:
+ len += result;
+ mode = last2;
+ break;
+ default:
+ return LZMA_DATA_ERROR;
+ }
+
+saveStateAndReturn:
+
+ /* save decoder state */
+ *s = _s;
+
+ return LZMA_OK;
+}
+
+
+/* aCaB */
+void lzmaShutdown(lzma_stream *s) {
+ lzma_stream _s = *s;
+ if (p) lzmafree(p);
+ if (dictionary) lzmafree(dictionary);
+ p = NULL;
+ dictionary = NULL;
+ *s = _s;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_bzlib.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1152 @@
+
+/*-------------------------------------------------------------*/
+/*--- Library top-level functions. ---*/
+/*--- bzlib.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.4 of 20 December 2006
+ Copyright (C) 1996-2006 Julian Seward <jseward at bzip.org>
+ This file was modified for ClamAV by aCaB <acab at clamav.net>
+
+ This program is released under the terms of the license contained
+ in the file COPYING.nsis.
+ ------------------------------------------------------------------ */
+
+/* CHANGES
+ 0.9.0 -- original version.
+ 0.9.0a/b -- no changes in this file.
+ 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress().
+ fixed bzWrite/bzRead to ignore zero-length requests.
+ fixed bzread to correctly handle read requests after EOF.
+ wrong parameter order in call to bzDecompressInit in
+ bzBuffToBuffDecompress. Fixed.
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "bzlib_private.h"
+#include "others.h"
+
+const Int32 BZ2_rNums[512] = {
+ 619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
+ 985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
+ 733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
+ 419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
+ 878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
+ 862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
+ 150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
+ 170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
+ 73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
+ 909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
+ 641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
+ 161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
+ 382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
+ 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
+ 227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
+ 469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
+ 184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
+ 715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
+ 951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
+ 652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
+ 645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
+ 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
+ 653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
+ 411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
+ 170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
+ 857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
+ 669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
+ 944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
+ 344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
+ 897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
+ 433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
+ 686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
+ 946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
+ 978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
+ 680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
+ 707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
+ 297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
+ 134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
+ 343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
+ 140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
+ 170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
+ 369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
+ 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
+ 896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
+ 661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
+ 768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
+ 61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
+ 372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
+ 780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
+ 920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
+ 645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
+ 936, 638
+};
+
+/*---------------------------------------------------*/
+static
+void makeMaps_d ( DState* s )
+{
+ Int32 i;
+ s->nInUse = 0;
+ for (i = 0; i < 256; i++)
+ if (s->inUse[i]) {
+ s->seqToUnseq[s->nInUse] = i;
+ s->nInUse++;
+ }
+}
+
+
+/*---------------------------------------------------*/
+#define RETURN(rrr) \
+ { retVal = rrr; goto save_state_and_return; };
+
+#define GET_BITS(lll,vvv,nnn) \
+ case lll: s->state = lll; \
+ while (True) { \
+ if (s->bsLive >= nnn) { \
+ UInt32 v; \
+ v = (s->bsBuff >> \
+ (s->bsLive-nnn)) & ((1 << nnn)-1); \
+ s->bsLive -= nnn; \
+ vvv = v; \
+ break; \
+ } \
+ if (s->strm->avail_in == 0) RETURN(BZ_OK); \
+ s->bsBuff \
+ = (s->bsBuff << 8) | \
+ ((UInt32) \
+ (*(s->strm->next_in))); \
+ s->bsLive += 8; \
+ s->strm->next_in++; \
+ s->strm->avail_in--; \
+ s->strm->total_in_lo32++; \
+ if (s->strm->total_in_lo32 == 0) \
+ s->strm->total_in_hi32++; \
+ }
+
+#define GET_UCHAR(lll,uuu) \
+ GET_BITS(lll,uuu,8)
+
+#define GET_BIT(lll,uuu) \
+ GET_BITS(lll,uuu,1)
+
+/*---------------------------------------------------*/
+#define GET_MTF_VAL(label1,label2,lval) \
+{ \
+ if (groupPos == 0) { \
+ groupNo++; \
+ if (groupNo >= nSelectors) \
+ RETURN(BZ_DATA_ERROR); \
+ groupPos = BZ_G_SIZE; \
+ gSel = s->selector[groupNo]; \
+ gMinlen = s->minLens[gSel]; \
+ gLimit = &(s->limit[gSel][0]); \
+ gPerm = &(s->perm[gSel][0]); \
+ gBase = &(s->base[gSel][0]); \
+ } \
+ groupPos--; \
+ zn = gMinlen; \
+ GET_BITS(label1, zvec, zn); \
+ while (1) { \
+ if (zn > 20 /* the longest code */) \
+ RETURN(BZ_DATA_ERROR); \
+ if (zvec <= gLimit[zn]) break; \
+ zn++; \
+ GET_BIT(label2, zj); \
+ zvec = (zvec << 1) | zj; \
+ }; \
+ if (zvec - gBase[zn] < 0 \
+ || zvec - gBase[zn] >= BZ_MAX_ALPHA_SIZE) \
+ RETURN(BZ_DATA_ERROR); \
+ lval = gPerm[zvec - gBase[zn]]; \
+}
+
+/*---------------------------------------------------*/
+
+inline static Int32 indexIntoF ( Int32 indx, Int32 *cftab )
+{
+ Int32 nb, na, mid;
+ nb = 0;
+ na = 256;
+ do {
+ mid = (nb + na) >> 1;
+ if (indx >= cftab[mid]) nb = mid; else na = mid;
+ }
+ while (na - nb != 1);
+ return nb;
+}
+
+/*---------------------------------------------------*/
+/* Return True iff data corruption is discovered.
+ Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_FAST ( DState* s )
+{
+ UChar k1;
+
+/* if (s->blockRandomised) { */
+
+/* while (True) { */
+/* /\* try to finish existing run *\/ */
+/* while (True) { */
+/* if (s->strm->avail_out == 0) return False; */
+/* if (s->state_out_len == 0) break; */
+/* *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; */
+/* BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+/* s->state_out_len--; */
+/* s->strm->next_out++; */
+/* s->strm->avail_out--; */
+/* s->strm->total_out_lo32++; */
+/* if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; */
+/* } */
+
+/* /\* can a new run be started? *\/ */
+/* if (s->nblock_used == s->save_nblock+1) return False; */
+
+/* /\* Only caused by corrupt data stream? *\/ */
+/* if (s->nblock_used > s->save_nblock+1) */
+/* return True; */
+
+/* s->state_out_len = 1; */
+/* s->state_out_ch = s->k0; */
+/* BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; */
+
+
+
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* s->state_out_len = 2; */
+/* BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* s->state_out_len = 3; */
+/* BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* BZ_GET_FAST(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* s->state_out_len = ((Int32)k1) + 4; */
+/* BZ_GET_FAST(s->k0); BZ_RAND_UPD_MASK; */
+/* s->k0 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* } */
+
+/* } else */ {
+
+ /* restore */
+ UInt32 c_calculatedBlockCRC = s->calculatedBlockCRC;
+ UChar c_state_out_ch = s->state_out_ch;
+ Int32 c_state_out_len = s->state_out_len;
+ Int32 c_nblock_used = s->nblock_used;
+ Int32 c_k0 = s->k0;
+ UInt32* c_tt = s->tt;
+ UInt32 c_tPos = s->tPos;
+ UChar* cs_next_out = s->strm->next_out;
+ unsigned int cs_avail_out = s->strm->avail_out;
+ /* end restore */
+
+ UInt32 avail_out_INIT = cs_avail_out;
+ Int32 s_save_nblockPP = s->save_nblock+1;
+ unsigned int total_out_lo32_old;
+
+ while (True) {
+
+ /* try to finish existing run */
+ if (c_state_out_len > 0) {
+ while (True) {
+ if (cs_avail_out == 0) goto return_notr;
+ if (c_state_out_len == 1) break;
+ *cs_next_out = c_state_out_ch;
+ /* aCaB BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); */
+ c_state_out_len--;
+ cs_next_out++;
+ cs_avail_out--;
+ }
+ s_state_out_len_eq_one:
+ {
+ if (cs_avail_out == 0) {
+ c_state_out_len = 1; goto return_notr;
+ };
+ *cs_next_out = c_state_out_ch;
+ /* aCaB BZ_UPDATE_CRC ( c_calculatedBlockCRC, c_state_out_ch ); */
+ cs_next_out++;
+ cs_avail_out--;
+ }
+ }
+ /* Only caused by corrupt data stream? */
+ if (c_nblock_used > s_save_nblockPP)
+ return True;
+
+ /* can a new run be started? */
+ if (c_nblock_used == s_save_nblockPP) {
+ c_state_out_len = 0; goto return_notr;
+ };
+ c_state_out_ch = c_k0;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (k1 != c_k0) {
+ c_k0 = k1; goto s_state_out_len_eq_one;
+ };
+ if (c_nblock_used == s_save_nblockPP)
+ goto s_state_out_len_eq_one;
+
+ c_state_out_len = 2;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (c_nblock_used == s_save_nblockPP) continue;
+ if (k1 != c_k0) { c_k0 = k1; continue; };
+
+ c_state_out_len = 3;
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ if (c_nblock_used == s_save_nblockPP) continue;
+ if (k1 != c_k0) { c_k0 = k1; continue; };
+
+ BZ_GET_FAST_C(k1); c_nblock_used++;
+ c_state_out_len = ((Int32)k1) + 4;
+ BZ_GET_FAST_C(c_k0); c_nblock_used++;
+ }
+
+ return_notr:
+ total_out_lo32_old = s->strm->total_out_lo32;
+ s->strm->total_out_lo32 += (avail_out_INIT - cs_avail_out);
+ if (s->strm->total_out_lo32 < total_out_lo32_old)
+ s->strm->total_out_hi32++;
+
+ /* save */
+ s->calculatedBlockCRC = c_calculatedBlockCRC;
+ s->state_out_ch = c_state_out_ch;
+ s->state_out_len = c_state_out_len;
+ s->nblock_used = c_nblock_used;
+ s->k0 = c_k0;
+ s->tt = c_tt;
+ s->tPos = c_tPos;
+ s->strm->next_out = cs_next_out;
+ s->strm->avail_out = cs_avail_out;
+ /* end save */
+ }
+ return False;
+}
+
+
+/*---------------------------------------------------*/
+/* Return True iff data corruption is discovered.
+ Returns False if there is no problem.
+*/
+static
+Bool unRLE_obuf_to_output_SMALL ( DState* s )
+{
+ UChar k1;
+
+/* if (s->blockRandomised) { */
+
+/* while (True) { */
+/* /\* try to finish existing run *\/ */
+/* while (True) { */
+/* if (s->strm->avail_out == 0) return False; */
+/* if (s->state_out_len == 0) break; */
+/* *( (UChar*)(s->strm->next_out) ) = s->state_out_ch; */
+/* BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+/* s->state_out_len--; */
+/* s->strm->next_out++; */
+/* s->strm->avail_out--; */
+/* s->strm->total_out_lo32++; */
+/* if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++; */
+/* } */
+
+/* /\* can a new run be started? *\/ */
+/* if (s->nblock_used == s->save_nblock+1) return False; */
+
+/* /\* Only caused by corrupt data stream? *\/ */
+/* if (s->nblock_used > s->save_nblock+1) */
+/* return True; */
+
+/* s->state_out_len = 1; */
+/* s->state_out_ch = s->k0; */
+/* BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* s->state_out_len = 2; */
+/* BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* s->state_out_len = 3; */
+/* BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* if (s->nblock_used == s->save_nblock+1) continue; */
+/* if (k1 != s->k0) { s->k0 = k1; continue; }; */
+
+/* BZ_GET_SMALL(k1); BZ_RAND_UPD_MASK; */
+/* k1 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* s->state_out_len = ((Int32)k1) + 4; */
+/* BZ_GET_SMALL(s->k0); BZ_RAND_UPD_MASK; */
+/* s->k0 ^= BZ_RAND_MASK; s->nblock_used++; */
+/* } */
+
+/* } else */ {
+
+ while (True) {
+ /* try to finish existing run */
+ while (True) {
+ if (s->strm->avail_out == 0) return False;
+ if (s->state_out_len == 0) break;
+ *(s->strm->next_out) = s->state_out_ch;
+ /* aCaB BZ_UPDATE_CRC ( s->calculatedBlockCRC, s->state_out_ch ); */
+ s->state_out_len--;
+ s->strm->next_out++;
+ s->strm->avail_out--;
+ s->strm->total_out_lo32++;
+ if (s->strm->total_out_lo32 == 0) s->strm->total_out_hi32++;
+ }
+
+ /* can a new run be started? */
+ if (s->nblock_used == s->save_nblock+1) return False;
+
+ /* Only caused by corrupt data stream? */
+ if (s->nblock_used > s->save_nblock+1)
+ return True;
+
+ s->state_out_len = 1;
+ s->state_out_ch = s->k0;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 2;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ s->state_out_len = 3;
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ if (s->nblock_used == s->save_nblock+1) continue;
+ if (k1 != s->k0) { s->k0 = k1; continue; };
+
+ BZ_GET_SMALL(k1); s->nblock_used++;
+ s->state_out_len = ((Int32)k1) + 4;
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ }
+
+ }
+}
+/*---------------------------------------------------*/
+
+static void CreateDecodeTables ( Int32 *limit,
+ Int32 *base,
+ Int32 *perm,
+ UChar *length,
+ Int32 minLen,
+ Int32 maxLen,
+ Int32 alphaSize )
+{
+ Int32 pp, i, j, vec;
+
+ pp = 0;
+ for (i = minLen; i <= maxLen; i++)
+ for (j = 0; j < alphaSize; j++)
+ if (length[j] == i) { perm[pp] = j; pp++; };
+
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
+ for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
+
+ for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
+
+ for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
+ vec = 0;
+
+ for (i = minLen; i <= maxLen; i++) {
+ vec += (base[i+1] - base[i]);
+ limit[i] = vec-1;
+ vec <<= 1;
+ }
+ for (i = minLen + 1; i <= maxLen; i++)
+ base[i] = ((limit[i-1] + 1) << 1) - base[i];
+}
+
+/*---------------------------------------------------*/
+static Int32 BZ2_decompress ( DState* s )
+{
+ UChar uc;
+ Int32 retVal;
+ Int32 minLen, maxLen;
+ nsis_bzstream* strm = s->strm;
+
+ /* stuff that needs to be saved/restored */
+ Int32 i;
+ Int32 j;
+ Int32 t;
+ Int32 alphaSize;
+ Int32 nGroups;
+ Int32 nSelectors;
+ Int32 EOB;
+ Int32 groupNo;
+ Int32 groupPos;
+ Int32 nextSym;
+ Int32 nblockMAX;
+ Int32 nblock;
+ Int32 es;
+ Int32 N;
+ Int32 curr;
+ Int32 zt;
+ Int32 zn;
+ Int32 zvec;
+ Int32 zj;
+ Int32 gSel;
+ Int32 gMinlen;
+ Int32* gLimit;
+ Int32* gBase;
+ Int32* gPerm;
+
+ if (s->state == BZ_X_MAGIC_1) {
+ /*initialise the save area*/
+ s->save_i = 0;
+ s->save_j = 0;
+ s->save_t = 0;
+ s->save_alphaSize = 0;
+ s->save_nGroups = 0;
+ s->save_nSelectors = 0;
+ s->save_EOB = 0;
+ s->save_groupNo = 0;
+ s->save_groupPos = 0;
+ s->save_nextSym = 0;
+ s->save_nblockMAX = 0;
+ s->save_nblock = 0;
+ s->save_es = 0;
+ s->save_N = 0;
+ s->save_curr = 0;
+ s->save_zt = 0;
+ s->save_zn = 0;
+ s->save_zvec = 0;
+ s->save_zj = 0;
+ s->save_gSel = 0;
+ s->save_gMinlen = 0;
+ s->save_gLimit = NULL;
+ s->save_gBase = NULL;
+ s->save_gPerm = NULL;
+ }
+
+ /*restore from the save area*/
+ i = s->save_i;
+ j = s->save_j;
+ t = s->save_t;
+ alphaSize = s->save_alphaSize;
+ nGroups = s->save_nGroups;
+ nSelectors = s->save_nSelectors;
+ EOB = s->save_EOB;
+ groupNo = s->save_groupNo;
+ groupPos = s->save_groupPos;
+ nextSym = s->save_nextSym;
+ nblockMAX = s->save_nblockMAX;
+ nblock = s->save_nblock;
+ es = s->save_es;
+ N = s->save_N;
+ curr = s->save_curr;
+ zt = s->save_zt;
+ zn = s->save_zn;
+ zvec = s->save_zvec;
+ zj = s->save_zj;
+ gSel = s->save_gSel;
+ gMinlen = s->save_gMinlen;
+ gLimit = s->save_gLimit;
+ gBase = s->save_gBase;
+ gPerm = s->save_gPerm;
+
+ retVal = BZ_OK;
+
+ switch (s->state) {
+
+ /* aCaB
+ GET_UCHAR(BZ_X_MAGIC_1, uc);
+ if (uc != BZ_HDR_B) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_UCHAR(BZ_X_MAGIC_2, uc);
+ if (uc != BZ_HDR_Z) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_UCHAR(BZ_X_MAGIC_3, uc)
+ if (uc != BZ_HDR_h) RETURN(BZ_DATA_ERROR_MAGIC);
+
+ GET_BITS(BZ_X_MAGIC_4, s->blockSize100k, 8)
+ if (s->blockSize100k < (BZ_HDR_0 + 1) ||
+ s->blockSize100k > (BZ_HDR_0 + 9)) RETURN(BZ_DATA_ERROR_MAGIC);
+ s->blockSize100k -= BZ_HDR_0;
+ */
+
+ case BZ_X_MAGIC_1:
+
+ s->blockSize100k = 9;
+
+ if (s->smallDecompress) {
+ s->ll16 = BZALLOC( s->blockSize100k * 100000 * sizeof(UInt16) );
+ s->ll4 = BZALLOC(
+ ((1 + s->blockSize100k * 100000) >> 1) * sizeof(UChar)
+ );
+ if (s->ll16 == NULL || s->ll4 == NULL) RETURN(BZ_MEM_ERROR);
+ } else {
+ s->tt = BZALLOC( s->blockSize100k * 100000 * sizeof(Int32) );
+ if (s->tt == NULL) RETURN(BZ_MEM_ERROR);
+ }
+
+ GET_UCHAR(BZ_X_BLKHDR_1, uc);
+
+ if (uc == 0x17) goto endhdr_2;
+ if (uc != 0x31) RETURN(BZ_DATA_ERROR);
+
+ /* aCaB
+ GET_UCHAR(BZ_X_BLKHDR_2, uc);
+ if (uc != 0x41) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_3, uc);
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_4, uc);
+ if (uc != 0x26) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_5, uc);
+ if (uc != 0x53) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_BLKHDR_6, uc);
+ if (uc != 0x59) RETURN(BZ_DATA_ERROR);
+
+ s->currBlockNo++;
+ if (s->verbosity >= 2)
+ VPrintf1 ( "\n [%d: huff+mtf ", s->currBlockNo );
+
+ s->storedBlockCRC = 0;
+ GET_UCHAR(BZ_X_BCRC_1, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_2, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_3, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_BCRC_4, uc);
+ s->storedBlockCRC = (s->storedBlockCRC << 8) | ((UInt32)uc);
+
+
+ GET_BITS(BZ_X_RANDBIT, s->blockRandomised, 1);
+ */
+
+ s->origPtr = 0;
+ GET_UCHAR(BZ_X_ORIGPTR_1, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ GET_UCHAR(BZ_X_ORIGPTR_2, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+ GET_UCHAR(BZ_X_ORIGPTR_3, uc);
+ s->origPtr = (s->origPtr << 8) | ((Int32)uc);
+
+ if (s->origPtr < 0)
+ RETURN(BZ_DATA_ERROR);
+ if (s->origPtr > 10 + 100000*s->blockSize100k)
+ RETURN(BZ_DATA_ERROR);
+
+ /*--- Receive the mapping table ---*/
+ for (i = 0; i < 16; i++) {
+ GET_BIT(BZ_X_MAPPING_1, uc);
+ if (uc == 1)
+ s->inUse16[i] = True; else
+ s->inUse16[i] = False;
+ }
+
+ for (i = 0; i < 256; i++) s->inUse[i] = False;
+
+ for (i = 0; i < 16; i++)
+ if (s->inUse16[i])
+ for (j = 0; j < 16; j++) {
+ GET_BIT(BZ_X_MAPPING_2, uc);
+ if (uc == 1) s->inUse[i * 16 + j] = True;
+ }
+ makeMaps_d ( s );
+ if (s->nInUse == 0) RETURN(BZ_DATA_ERROR);
+ alphaSize = s->nInUse+2;
+
+ /*--- Now the selectors ---*/
+ GET_BITS(BZ_X_SELECTOR_1, nGroups, 3);
+ if (nGroups < 2 || nGroups > 6) RETURN(BZ_DATA_ERROR);
+ GET_BITS(BZ_X_SELECTOR_2, nSelectors, 15);
+ if (nSelectors < 1) RETURN(BZ_DATA_ERROR);
+ for (i = 0; i < nSelectors; i++) {
+ j = 0;
+ while (True) {
+ GET_BIT(BZ_X_SELECTOR_3, uc);
+ if (uc == 0) break;
+ j++;
+ if (j >= nGroups) RETURN(BZ_DATA_ERROR);
+ }
+ s->selectorMtf[i] = j;
+ }
+
+ /*--- Undo the MTF values for the selectors. ---*/
+ {
+ UChar pos[BZ_N_GROUPS], tmp, v;
+ for (v = 0; v < nGroups; v++) pos[v] = v;
+
+ for (i = 0; i < nSelectors; i++) {
+ v = s->selectorMtf[i];
+ tmp = pos[v];
+ while (v > 0) { pos[v] = pos[v-1]; v--; }
+ pos[0] = tmp;
+ s->selector[i] = tmp;
+ }
+ }
+
+ /*--- Now the coding tables ---*/
+ for (t = 0; t < nGroups; t++) {
+ GET_BITS(BZ_X_CODING_1, curr, 5);
+ for (i = 0; i < alphaSize; i++) {
+ while (True) {
+ if (curr < 1 || curr > 20) RETURN(BZ_DATA_ERROR);
+ GET_BIT(BZ_X_CODING_2, uc);
+ if (uc == 0) break;
+ GET_BIT(BZ_X_CODING_3, uc);
+ if (uc == 0) curr++; else curr--;
+ }
+ s->len[t][i] = curr;
+ }
+ }
+
+ /*--- Create the Huffman decoding tables ---*/
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
+ }
+ CreateDecodeTables (
+ &(s->limit[t][0]),
+ &(s->base[t][0]),
+ &(s->perm[t][0]),
+ &(s->len[t][0]),
+ minLen, maxLen, alphaSize
+ );
+ s->minLens[t] = minLen;
+ }
+
+ /*--- Now the MTF values ---*/
+
+ EOB = s->nInUse+1;
+ nblockMAX = 100000 * s->blockSize100k;
+ groupNo = -1;
+ groupPos = 0;
+
+ for (i = 0; i <= 255; i++) s->unzftab[i] = 0;
+
+ /*-- MTF init --*/
+ {
+ Int32 ii, jj, kk;
+ kk = MTFA_SIZE-1;
+ for (ii = 256 / MTFL_SIZE - 1; ii >= 0; ii--) {
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+ s->mtfa[kk] = (UChar)(ii * MTFL_SIZE + jj);
+ kk--;
+ }
+ s->mtfbase[ii] = kk + 1;
+ }
+ }
+ /*-- end MTF init --*/
+
+ nblock = 0;
+ GET_MTF_VAL(BZ_X_MTF_1, BZ_X_MTF_2, nextSym);
+
+ while (True) {
+
+ if (nextSym == EOB) break;
+
+ if (nextSym == BZ_RUNA || nextSym == BZ_RUNB) {
+
+ es = -1;
+ N = 1;
+ do {
+ if (nextSym == BZ_RUNA) es = es + (0+1) * N; else
+ if (nextSym == BZ_RUNB) es = es + (1+1) * N;
+ N = N * 2;
+ GET_MTF_VAL(BZ_X_MTF_3, BZ_X_MTF_4, nextSym);
+ }
+ while (nextSym == BZ_RUNA || nextSym == BZ_RUNB);
+
+ es++;
+ uc = s->seqToUnseq[ s->mtfa[s->mtfbase[0]] ];
+ s->unzftab[uc] += es;
+
+ if (s->smallDecompress)
+ while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+ s->ll16[nblock] = (UInt16)uc;
+ nblock++;
+ es--;
+ }
+ else
+ while (es > 0) {
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+ s->tt[nblock] = (UInt32)uc;
+ nblock++;
+ es--;
+ };
+
+ continue;
+
+ } else {
+
+ if (nblock >= nblockMAX) RETURN(BZ_DATA_ERROR);
+
+ /*-- uc = MTF ( nextSym-1 ) --*/
+ {
+ Int32 ii, jj, kk, pp, lno, off;
+ UInt32 nn;
+ nn = (UInt32)(nextSym - 1);
+
+ if (nn < MTFL_SIZE) {
+ /* avoid general-case expense */
+ pp = s->mtfbase[0];
+ uc = s->mtfa[pp+nn];
+ while (nn > 3) {
+ Int32 z = pp+nn;
+ s->mtfa[(z) ] = s->mtfa[(z)-1];
+ s->mtfa[(z)-1] = s->mtfa[(z)-2];
+ s->mtfa[(z)-2] = s->mtfa[(z)-3];
+ s->mtfa[(z)-3] = s->mtfa[(z)-4];
+ nn -= 4;
+ }
+ while (nn > 0) {
+ s->mtfa[(pp+nn)] = s->mtfa[(pp+nn)-1]; nn--;
+ };
+ s->mtfa[pp] = uc;
+ } else {
+ /* general case */
+ lno = nn / MTFL_SIZE;
+ off = nn % MTFL_SIZE;
+ pp = s->mtfbase[lno] + off;
+ uc = s->mtfa[pp];
+ while (pp > s->mtfbase[lno]) {
+ s->mtfa[pp] = s->mtfa[pp-1]; pp--;
+ };
+ s->mtfbase[lno]++;
+ while (lno > 0) {
+ s->mtfbase[lno]--;
+ s->mtfa[s->mtfbase[lno]]
+ = s->mtfa[s->mtfbase[lno-1] + MTFL_SIZE - 1];
+ lno--;
+ }
+ s->mtfbase[0]--;
+ s->mtfa[s->mtfbase[0]] = uc;
+ if (s->mtfbase[0] == 0) {
+ kk = MTFA_SIZE-1;
+ for (ii = 256 / MTFL_SIZE-1; ii >= 0; ii--) {
+ for (jj = MTFL_SIZE-1; jj >= 0; jj--) {
+ s->mtfa[kk] = s->mtfa[s->mtfbase[ii] + jj];
+ kk--;
+ }
+ s->mtfbase[ii] = kk + 1;
+ }
+ }
+ }
+ }
+ /*-- end uc = MTF ( nextSym-1 ) --*/
+
+ s->unzftab[s->seqToUnseq[uc]]++;
+ if (s->smallDecompress)
+ s->ll16[nblock] = (UInt16)(s->seqToUnseq[uc]); else
+ s->tt[nblock] = (UInt32)(s->seqToUnseq[uc]);
+ nblock++;
+
+ GET_MTF_VAL(BZ_X_MTF_5, BZ_X_MTF_6, nextSym);
+ continue;
+ }
+ }
+
+ /* Now we know what nblock is, we can do a better sanity
+ check on s->origPtr.
+ */
+ if (s->origPtr < 0 || s->origPtr >= nblock)
+ RETURN(BZ_DATA_ERROR);
+
+ /*-- Set up cftab to facilitate generation of T^(-1) --*/
+ s->cftab[0] = 0;
+ for (i = 1; i <= 256; i++) s->cftab[i] = s->unzftab[i-1];
+ for (i = 1; i <= 256; i++) s->cftab[i] += s->cftab[i-1];
+ for (i = 0; i <= 256; i++) {
+ if (s->cftab[i] < 0 || s->cftab[i] > nblock) {
+ RETURN(BZ_DATA_ERROR);
+ }
+ }
+ s->state_out_len = 0;
+ s->state_out_ch = 0;
+ /* BZ_INITIALISE_CRC ( s->calculatedBlockCRC ); */
+ s->state = BZ_X_OUTPUT;
+ if (s->verbosity >= 2) VPrintf0 ( "rt+rld" );
+
+ if (s->smallDecompress) {
+
+ /*-- Make a copy of cftab, used in generation of T --*/
+ for (i = 0; i <= 256; i++) s->cftabCopy[i] = s->cftab[i];
+
+ /*-- compute the T vector --*/
+ for (i = 0; i < nblock; i++) {
+ uc = (UChar)(s->ll16[i]);
+ SET_LL(i, s->cftabCopy[uc]);
+ s->cftabCopy[uc]++;
+ }
+
+ /*-- Compute T^(-1) by pointer reversal on T --*/
+ i = s->origPtr;
+ j = GET_LL(i);
+ do {
+ Int32 tmp = GET_LL(j);
+ SET_LL(j, i);
+ i = j;
+ j = tmp;
+ }
+ while (i != s->origPtr);
+
+ s->tPos = s->origPtr;
+ s->nblock_used = 0;
+ /* aCaB
+ if (s->blockRandomised) {
+ BZ_RAND_INIT_MASK;
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
+ } else */{
+ BZ_GET_SMALL(s->k0); s->nblock_used++;
+ }
+
+ } else {
+
+ /*-- compute the T^(-1) vector --*/
+ for (i = 0; i < nblock; i++) {
+ uc = (UChar)(s->tt[i] & 0xff);
+ s->tt[s->cftab[uc]] |= (i << 8);
+ s->cftab[uc]++;
+ }
+
+ s->tPos = s->tt[s->origPtr] >> 8;
+ s->nblock_used = 0;
+ /* aCaB
+ if (s->blockRandomised) {
+ BZ_RAND_INIT_MASK;
+ BZ_GET_FAST(s->k0); s->nblock_used++;
+ BZ_RAND_UPD_MASK; s->k0 ^= BZ_RAND_MASK;
+ } else */{
+ BZ_GET_FAST(s->k0); s->nblock_used++;
+ }
+
+ }
+
+ RETURN(BZ_OK);
+
+
+
+ endhdr_2:
+ /* aCaB
+ GET_UCHAR(BZ_X_ENDHDR_2, uc);
+ if (uc != 0x72) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_3, uc);
+ if (uc != 0x45) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_4, uc);
+ if (uc != 0x38) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_5, uc);
+ if (uc != 0x50) RETURN(BZ_DATA_ERROR);
+ GET_UCHAR(BZ_X_ENDHDR_6, uc);
+ if (uc != 0x90) RETURN(BZ_DATA_ERROR);
+
+ s->storedCombinedCRC = 0;
+ GET_UCHAR(BZ_X_CCRC_1, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_2, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_3, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ GET_UCHAR(BZ_X_CCRC_4, uc);
+ s->storedCombinedCRC = (s->storedCombinedCRC << 8) | ((UInt32)uc);
+ */
+ s->state = BZ_X_IDLE;
+ RETURN(BZ_STREAM_END);
+
+ default: /* aCaB AssertH ( False, 4001 ); */
+ RETURN(BZ_DATA_ERROR);
+ }
+
+ /* aCaB AssertH ( False, 4002 ); */
+ RETURN(BZ_DATA_ERROR);
+
+ save_state_and_return:
+
+ s->save_i = i;
+ s->save_j = j;
+ s->save_t = t;
+ s->save_alphaSize = alphaSize;
+ s->save_nGroups = nGroups;
+ s->save_nSelectors = nSelectors;
+ s->save_EOB = EOB;
+ s->save_groupNo = groupNo;
+ s->save_groupPos = groupPos;
+ s->save_nextSym = nextSym;
+ s->save_nblockMAX = nblockMAX;
+ s->save_nblock = nblock;
+ s->save_es = es;
+ s->save_N = N;
+ s->save_curr = curr;
+ s->save_zt = zt;
+ s->save_zn = zn;
+ s->save_zvec = zvec;
+ s->save_zj = zj;
+ s->save_gSel = gSel;
+ s->save_gMinlen = gMinlen;
+ s->save_gLimit = gLimit;
+ s->save_gBase = gBase;
+ s->save_gPerm = gPerm;
+
+ return retVal;
+}
+
+
+/*---------------------------------------------------*/
+static
+int bz_config_ok ( void )
+{
+ if (sizeof(int) != 4) return 0;
+ if (sizeof(short) != 2) return 0;
+ if (sizeof(char) != 1) return 0;
+ return 1;
+}
+
+
+/*---------------------------------------------------*/
+static
+void* default_bzalloc ( void* opaque, Int32 items, Int32 size )
+{
+ void* v = cli_malloc ( items * size );
+ return v;
+}
+
+static
+void default_bzfree ( void* opaque, void* addr )
+{
+ if (addr != NULL) free ( addr );
+}
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompressInit)
+ ( nsis_bzstream* strm,
+ int verbosity,
+ int small )
+{
+ DState* s;
+
+ if (!bz_config_ok()) return BZ_CONFIG_ERROR;
+
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ if (small != 0 && small != 1) return BZ_PARAM_ERROR;
+ if (verbosity < 0 || verbosity > 4) return BZ_PARAM_ERROR;
+
+ if (strm->bzalloc == NULL) strm->bzalloc = default_bzalloc;
+ if (strm->bzfree == NULL) strm->bzfree = default_bzfree;
+
+ s = BZALLOC( sizeof(DState) );
+ if (s == NULL) return BZ_MEM_ERROR;
+ s->strm = strm;
+ strm->state = s;
+ s->state = BZ_X_MAGIC_1;
+ s->bsLive = 0;
+ s->bsBuff = 0;
+ s->calculatedCombinedCRC = 0;
+ strm->total_in_lo32 = 0;
+ strm->total_in_hi32 = 0;
+ strm->total_out_lo32 = 0;
+ strm->total_out_hi32 = 0;
+ s->smallDecompress = (Bool)small;
+ s->ll4 = NULL;
+ s->ll16 = NULL;
+ s->tt = NULL;
+ s->currBlockNo = 0;
+ s->verbosity = verbosity;
+
+ return BZ_OK;
+}
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompress) ( nsis_bzstream *strm )
+{
+ Bool corrupt;
+ DState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ while (True) {
+ if (s->state == BZ_X_IDLE) return BZ_SEQUENCE_ERROR;
+ if (s->state == BZ_X_OUTPUT) {
+ if (s->smallDecompress)
+ corrupt = unRLE_obuf_to_output_SMALL ( s ); else
+ corrupt = unRLE_obuf_to_output_FAST ( s );
+ if (corrupt) return BZ_DATA_ERROR;
+ if (s->nblock_used == s->save_nblock+1 && s->state_out_len == 0) {
+ /* BZ_FINALISE_CRC ( s->calculatedBlockCRC );
+ if (s->verbosity >= 3)
+ VPrintf2 ( " {0x%08x, 0x%08x}", s->storedBlockCRC,
+ s->calculatedBlockCRC );
+ if (s->verbosity >= 2) VPrintf0 ( "]" );
+ if (s->calculatedBlockCRC != s->storedBlockCRC)
+ return BZ_DATA_ERROR;
+ s->calculatedCombinedCRC
+ = (s->calculatedCombinedCRC << 1) |
+ (s->calculatedCombinedCRC >> 31);
+ s->calculatedCombinedCRC ^= s->calculatedBlockCRC;
+ */
+ s->state = BZ_X_BLKHDR_1;
+ } else {
+ return BZ_OK;
+ }
+ }
+ if (s->state >= BZ_X_MAGIC_1) {
+ Int32 r = BZ2_decompress ( s );
+ if (r == BZ_STREAM_END) {
+ /* aCaB
+ if (s->verbosity >= 3)
+ VPrintf2 ( "\n combined CRCs: stored = 0x%08x, computed = 0x%08x",
+ s->storedCombinedCRC, s->calculatedCombinedCRC );
+ if (s->calculatedCombinedCRC != s->storedCombinedCRC)
+ return BZ_DATA_ERROR;
+ */
+ return r;
+ }
+ if (s->state != BZ_X_OUTPUT) return r;
+ }
+ }
+
+ /* AssertH ( 0, 6001 ); */
+ return BZ_DATA_ERROR;
+ /* return 0; */ /*NOTREACHED*/
+}
+
+
+/*---------------------------------------------------*/
+int BZ_API(nsis_BZ2_bzDecompressEnd) ( nsis_bzstream *strm )
+{
+ DState* s;
+ if (strm == NULL) return BZ_PARAM_ERROR;
+ s = strm->state;
+ if (s == NULL) return BZ_PARAM_ERROR;
+ if (s->strm != strm) return BZ_PARAM_ERROR;
+
+ if (s->tt != NULL) BZFREE(s->tt);
+ if (s->ll16 != NULL) BZFREE(s->ll16);
+ if (s->ll4 != NULL) BZFREE(s->ll4);
+
+ BZFREE(strm->state);
+ strm->state = NULL;
+
+ return BZ_OK;
+}
+
+/*-------------------------------------------------------------*/
+/*--- end bzlib.c ---*/
+/*-------------------------------------------------------------*/
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_infblock.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,715 @@
+/*
+ * This file is a part of the zlib compression module for NSIS.
+ *
+ * Copyright and license information can be found below.
+ * Modifications Copyright (C) 1999-2007 Nullsoft and Contributors
+ *
+ * The original zlib source code is available at
+ * http://www.zlib.net/
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.
+ */
+
+/*
+ * Copyright (C) 1995-1998 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in COPYING.nsis
+ */
+
+#include "nsis_zutil.h"
+#include <string.h>
+
+#ifndef min
+# define min(x,y) ((x<y)?x:y)
+#endif
+
+/* defines for inflate input/output */
+/* update pointers and return */
+#define UPDBITS {s->bitb=b;s->bitk=k;}
+#define UPDIN {z->avail_in=n;z->next_in=p;}
+#define UPDOUT {s->write=q;}
+#define UPDATE {UPDBITS UPDIN UPDOUT}
+#define LEAVE(r) {UPDATE inflate_flush(z); return r;}
+
+/* get bytes and bits */
+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
+
+
+#define NEEDBYTE {if(!n)LEAVE(Z_OK)}
+#define NEXTBYTE (n--,*p++)
+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+
+#define DUMPBITS(j) {b>>=(j);k-=(j);}
+/* output bytes */
+#define WAVAIL (uInt)(q<s->read?s->read-q-1:s->end-q)
+#define LOADOUT {q=s->write;m=(uInt)WAVAIL;}
+#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=(uInt)WAVAIL;}}
+#define FLUSH {UPDOUT inflate_flush(z); LOADOUT}
+#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE(Z_OK)}}}
+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
+/* load local pointers */
+#define LOAD {LOADIN LOADOUT}
+
+#define LAST (s->last == DRY)
+
+#define FIXEDH 544 /* number of hufts used by fixed tables */
+
+
+
+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* And'ing with mask[n] masks the lower n bits */
+local unsigned short inflate_mask[17] = {
+ 0x0000,
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+}; /* use to reduce .data #define INFLATE_MASK(x, n) (x & (~((unsigned short) 0xFFFF << n))) */
+local const char border[] = { /* Order of the bit length code lengths */
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+local const unsigned short cplens[31] = { /* Copy lengths for literal codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ /* see note #13 above about 258 */
+local const unsigned short cplext[31] = { /* Extra bits for literal codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 112, 112}; /* 112==invalid */
+local const unsigned short cpdist[30] = { /* Copy offsets for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+local const unsigned short cpdext[30] = { /* Extra bits for distance codes */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+/* build fixed tables only once--keep them here */
+local char fixed_built = 0;
+local inflate_huft fixed_mem[FIXEDH];
+local uInt fixed_bl=9;
+local uInt fixed_bd=5;
+local inflate_huft *fixed_tl;
+local inflate_huft *fixed_td;
+
+/* copy as much as possible from the sliding window to the output area */
+local void ZEXPORT inflate_flush(nsis_z_streamp z)
+{
+ inflate_blocks_statef *s = &z->blocks;
+ uInt n;
+ Bytef *q;
+
+ /* local copies of source and destination pointers */
+ q = s->read;
+
+again:
+ /* compute number of bytes to copy as far as end of window */
+ n = (uInt)((q <= s->write ? s->write : s->end) - q);
+ n = min(n, z->avail_out);
+
+ /* update counters */
+ z->avail_out -= n;
+ /* z->total_out += n; */
+
+ /* copy as far as end of window */
+ zmemcpy(z->next_out, q, n);
+ z->next_out += n;
+ q += n;
+
+ /* see if more to copy at beginning of window */
+ if (q == s->end)
+ {
+ /* wrap pointers */
+ q = s->window;
+ if (s->write == s->end)
+ s->write = s->window;
+
+ /* do the same for the beginning of the window */
+ goto again;
+ }
+
+ /* update pointers */
+ s->read = q;
+}
+
+#define BMAX 15 /* maximum bit length of any code */
+
+local int ZEXPORT huft_build(
+uIntf *b, /* code lengths in bits (all assumed <= BMAX) */
+uInt n, /* number of codes (assumed <= 288) */
+uInt s, /* number of simple-valued codes (0..s-1) */
+const unsigned short *d, /* list of base values for non-simple codes */
+const unsigned short *e, /* list of extra bits for non-simple codes */
+inflate_huft * FAR *t, /* result: starting table */
+uIntf *m, /* maximum lookup bits, returns actual */
+inflate_huft *hp, /* space for trees */
+uInt *hn) /* working area: values in order of bit length */
+{
+ static uIntf v[288]; /* work area for huft_build */
+ uInt a; /* counter for codes of length k */
+ uInt c[BMAX+1]; /* bit length count table */
+ uInt f; /* i repeats in table every f entries */
+ int g; /* maximum code length */
+ int h; /* table level */
+ uInt i; /* counter, current code */
+ uInt j; /* counter */
+ int k; /* number of bits in current code */
+ int l; /* bits per table (returned in m) */
+ uIntf *p; /* pointer into c[], b[], or v[] */
+ inflate_huft *q; /* points to current table */
+ struct inflate_huft_s r; /* table entry for structure assignment */
+ inflate_huft *u[BMAX]; /* table stack */
+ int w; /* bits before this table == (l * h) */
+ uInt x[BMAX+1]; /* bit offsets, then code stack */
+ uIntf *xp; /* pointer into x */
+ int y; /* number of dummy codes added */
+ uInt z; /* number of entries in current table */
+
+
+ /* Generate counts for each bit length */
+ p=c;
+ y=16; while (y--) *p++ = 0;
+ p = b;
+ i = n;
+ do {
+ c[*p++]++; /* assume all entries <= BMAX */
+ } while (--i);
+ if (c[0] == n) /* null input--all zero length codes */
+ {
+ *t = (inflate_huft *)Z_NULL;
+ *m = 0;
+ return Z_OK;
+ }
+
+
+ /* Find minimum and maximum length, bound *m by those */
+ l = *m;
+ for (j = 1; j <= BMAX; j++)
+ if (c[j])
+ break;
+ k = j; /* minimum code length */
+ if ((uInt)l < j)
+ l = j;
+ for (i = BMAX; i; i--)
+ if (c[i])
+ break;
+ g = i; /* maximum code length */
+ if ((uInt)l > i)
+ l = i;
+ *m = l;
+
+
+ /* Adjust last length count to fill out codes, if needed */
+ for (y = 1 << j; j < i; j++, y <<= 1)
+ if ((y -= c[j]) < 0)
+ return Z_DATA_ERROR;
+ if ((y -= c[i]) < 0)
+ return Z_DATA_ERROR;
+ c[i] += y;
+
+
+ /* Generate starting offsets into the value table for each length */
+ x[1] = j = 0;
+ p = c + 1; xp = x + 2;
+ while (--i) { /* note that i == g from above */
+ *xp++ = (j += *p++);
+ }
+
+
+ /* Make a table of values in order of bit lengths */
+ p = b; i = 0;
+ do {
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
+ n = x[g]; /* set n to length of v */
+
+
+ /* Generate the Huffman codes and for each, make the table entries */
+ x[0] = i = 0; /* first Huffman code is zero */
+ p = v; /* grab values in bit order */
+ h = -1; /* no tables yet--level -1 */
+ w = -l; /* bits decoded == (l * h) */
+ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */
+ q = (inflate_huft *)Z_NULL; /* ditto */
+ z = 0; /* ditto */
+
+ r.base = 0;
+
+ /* go through the bit lengths (k already is bits in shortest code) */
+ for (; k <= g; k++)
+ {
+ a = c[k];
+ while (a--)
+ {
+ int nextw=w;
+ /* here i is the Huffman code of length k bits for value *p */
+ /* make tables up to required level */
+ while (k > (nextw=w + l))
+ {
+ h++;
+
+ /* compute minimum size table less than or equal to l bits */
+ z = g - nextw;
+ z = z > (uInt)l ? (uInt)l : z; /* table size upper limit */
+ if ((f = 1 << (j = k - nextw)) > a + 1) /* try a k-w bit table */
+ { /* too few codes for k-w bit table */
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+ if (j < z)
+ while (++j < z && (f <<= 1) > *++xp) /* try smaller tables up to z bits */
+ {
+ f -= *xp; /* else deduct codes from patterns */
+ }
+ }
+ z = 1 << j; /* table entries for j-bit table */
+
+ /* allocate new table */
+ if (*hn + z > MANY) /* (note: doesn't matter for fixed) */
+ return Z_MEM_ERROR; /* not enough memory */
+ u[h] = q = hp + *hn;
+ *hn += z;
+
+ /* connect to last table, if there is one */
+ if (h)
+ {
+ x[h] = i; /* save pattern for backing up */
+ r.bits = (Byte)l; /* bits to dump before this table */
+ r.exop = (Byte)j; /* bits in this table */
+ j = i >> w;
+ r.base = (uInt)(q - u[h-1] - j); /* offset to this table */
+ u[h-1][j] = r; /* connect to last table */
+ }
+ else
+ *t = q; /* first table is returned result */
+ w=nextw; /* previous table always l bits */
+ }
+
+ /* set up table entry in r */
+ r.bits = (Byte)(k - w);
+ if (p >= v + n)
+ r.exop = 128 + 64; /* out of values--invalid code */
+ else if (*p < s)
+ {
+ r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */
+ r.base = *p++; /* simple code is just the value */
+ }
+ else
+ {
+ r.exop = (Byte)(e[*p - s] + 16 + 64);/* non-simple--look up in lists */
+ r.base = d[*p++ - s];
+ }
+
+ /* fill code-like entries with r */
+ f = 1 << (k - w);
+ for (j = i >> w; j < z; j += f)
+ q[j] = r;
+
+ /* backwards increment the k-bit code i */
+ for (j = 1 << (k - 1); i & j; j >>= 1)
+ i ^= j;
+ i ^= j;
+
+ /* backup over finished tables */
+ while ((i & ((1 << w) - 1)) != x[h])
+ {
+ h--; /* don't need to update q */
+ w -= l;
+ }
+ }
+ }
+
+
+ /* Return Z_BUF_ERROR if we were given an incomplete table */
+ return (y != 0 && g != 1) ? Z_BUF_ERROR : Z_OK;
+}
+
+int ZEXPORT nsis_inflate(nsis_z_streamp z)
+{
+ inflate_blocks_statef *s = &z->blocks;
+ inflate_codes_statef *c = &s->sub.decode.t_codes; /* codes state */
+
+ /* lousy two bytes saved by doing this */
+ struct
+ {
+ uInt t; /* temporary storage */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+
+ /* CODES variables */
+
+ inflate_huft *j; /* temporary pointer */
+ uInt e; /* extra bits or operation */
+ Bytef *f; /* pointer to copy strings from */
+ } _state;
+
+#define t _state.t
+#define b _state.b
+#define k _state.k
+#define p _state.p
+#define n _state.n
+#define q _state.q
+#define m _state.m
+
+ /* copy input/output information to locals (UPDATE macro restores) */
+ LOAD
+
+ /* process input based on current state */
+ for (;;) switch (s->mode)
+ {
+ case TYPE:
+ NEEDBITS(3)
+ t = (uInt)b & 7;
+ DUMPBITS(3)
+ s->last = (t & 1) ? DRY : TYPE;
+ switch (t >> 1)
+ {
+ case 0: /* stored */
+ Tracev((stderr, "inflate: stored block%s\n",
+ LAST ? " (last)" : ""));
+ DUMPBITS(k&7)
+ s->mode = LENS; /* get length of stored block */
+ break;
+ case 1: /* fixed */
+ Tracev((stderr, "inflate: fixed codes block%s\n",
+ LAST ? " (last)" : ""));
+ {
+ if (!fixed_built)
+ {
+ int _k; /* temporary variable */
+ uInt f = 0; /* number of hufts used in fixed_mem */
+ static uIntf lc[288]; /* length list for huft_build */
+
+ /* literal table */
+ for (_k = 0; _k < 288; _k++)
+ {
+ char v=8;
+ if (_k > 143)
+ {
+ if (_k < 256) v++;
+ else if (_k < 280) v--;
+ }
+ lc[_k] = v;
+ }
+
+ huft_build(lc, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, fixed_mem, &f);
+
+ /* distance table */
+ for (_k = 0; _k < 30; _k++) lc[_k] = 5;
+
+ huft_build(lc, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, fixed_mem, &f);
+
+ /* done */
+ fixed_built++;
+ }
+
+ /* s->sub.decode.t_codes.mode = CODES_START; */
+ s->sub.decode.t_codes.lbits = (Byte)fixed_bl;
+ s->sub.decode.t_codes.dbits = (Byte)fixed_bd;
+ s->sub.decode.t_codes.ltree = fixed_tl;
+ s->sub.decode.t_codes.dtree = fixed_td;
+ }
+ s->mode = CODES_START;
+ break;
+ case 2: /* dynamic */
+ Tracev((stderr, "inflate: dynamic codes block%s\n",
+ LAST ? " (last)" : ""));
+ s->mode = TABLE;
+ break;
+ case 3: /* illegal */
+ /* the only illegal value possible is 3 because we check only 2 bits */
+ goto bad;
+ }
+ break;
+ case LENS:
+ NEEDBITS(16)
+ s->sub.left = (uInt)b & 0xffff;
+ b = k = 0; /* dump bits */
+ Tracev((stderr, "inflate: stored length %u\n", s->sub.left));
+ s->mode = s->sub.left ? STORED : (inflate_mode)s->last;
+ break;
+ case STORED:
+ {
+ uInt mn;
+
+ if (n == 0)
+ LEAVE(Z_OK)
+ NEEDOUT
+ mn = min(m, n);
+ t = min(s->sub.left, mn);
+ zmemcpy(q, p, t);
+ p += t; n -= t;
+ q += t; m -= t;
+ if (!(s->sub.left -= t))
+ s->mode = (inflate_mode)s->last;
+ break;
+ }
+ case TABLE:
+ NEEDBITS(14)
+ s->sub.trees.table = t = (uInt)b & 0x3fff;
+ if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
+ {
+ s->mode = NZ_BAD;
+ LEAVE(Z_DATA_ERROR);
+ }
+ /* t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f); */
+ DUMPBITS(14)
+ s->sub.trees.index = 0;
+ Tracev((stderr, "inflate: table sizes ok\n"));
+ s->mode = BTREE;
+ case BTREE:
+ while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
+ {
+ NEEDBITS(3)
+ s->sub.trees.t_blens[(int)border[s->sub.trees.index++]] = (uInt)b & 7;
+ DUMPBITS(3)
+ }
+ while (s->sub.trees.index < 19)
+ s->sub.trees.t_blens[(int)border[s->sub.trees.index++]] = 0;
+ s->sub.trees.bb = 7;
+
+ {
+ uInt hn = 0; /* hufts used in space */
+
+ t = huft_build(s->sub.trees.t_blens, 19, 19, Z_NULL, Z_NULL,
+ &s->sub.trees.tb, &s->sub.trees.bb, s->hufts, &hn);
+ if (t != Z_OK || !s->sub.trees.bb)
+ {
+ s->mode = NZ_BAD;
+ break;
+ }
+ }
+
+ s->sub.trees.index = 0;
+ Tracev((stderr, "inflate: bits tree ok\n"));
+ s->mode = DTREE;
+ case DTREE:
+ while (t = s->sub.trees.table,
+ s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
+ {
+ inflate_huft *h;
+ uInt i, j, d;
+
+ t = s->sub.trees.bb;
+ NEEDBITS(t)
+ h = s->sub.trees.tb + ((uInt)b & (uInt)inflate_mask[t]);
+ t = h->bits;
+ d = h->base;
+ if (d < 16)
+ {
+ DUMPBITS(t)
+ s->sub.trees.t_blens[s->sub.trees.index++] = d;
+ }
+ else /* d == 16..18 */
+ {
+ if (d == 18)
+ {
+ i=7;
+ j=11;
+ }
+ else
+ {
+ i=d-14;
+ j=3;
+ }
+ NEEDBITS(t+i)
+ DUMPBITS(t)
+ j += (uInt)b & (uInt)inflate_mask[i];
+ DUMPBITS(i)
+ i = s->sub.trees.index;
+ t = s->sub.trees.table;
+ if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
+ (d == 16 && i < 1))
+ {
+ s->mode = NZ_BAD;
+ LEAVE(Z_DATA_ERROR);
+ }
+ d = d == 16 ? s->sub.trees.t_blens[i - 1] : 0;
+ do {
+ s->sub.trees.t_blens[i++] = d;
+ } while (--j);
+ s->sub.trees.index = i;
+ }
+ }
+ s->sub.trees.tb = Z_NULL;
+ {
+ uInt hn = 0; /* hufts used in space */
+ uInt bl, bd;
+ inflate_huft *tl, *td;
+ int nl,nd;
+ t = s->sub.trees.table;
+
+ nl = 257 + (t & 0x1f);
+ nd = 1 + ((t >> 5) & 0x1f);
+ bl = 9; /* must be <= 9 for lookahead assumptions */
+ bd = 6; /* must be <= 9 for lookahead assumptions */
+
+ t = huft_build(s->sub.trees.t_blens, nl, 257, cplens, cplext, &tl, &bl, s->hufts, &hn);
+ if (bl == 0) t = Z_DATA_ERROR;
+ if (t == Z_OK)
+ {
+ /* build distance tree */
+ t = huft_build(s->sub.trees.t_blens + nl, nd, 0, cpdist, cpdext, &td, &bd, s->hufts, &hn);
+ }
+ if (t != Z_OK || (bd == 0 && nl > 257))
+ {
+ s->mode = NZ_BAD;
+ LEAVE(Z_DATA_ERROR);
+ }
+ Tracev((stderr, "inflate: trees ok\n"));
+
+ /* s->sub.decode.t_codes.mode = CODES_START; */
+ s->sub.decode.t_codes.lbits = (Byte)bl;
+ s->sub.decode.t_codes.dbits = (Byte)bd;
+ s->sub.decode.t_codes.ltree = tl;
+ s->sub.decode.t_codes.dtree = td;
+ }
+ s->mode = CODES_START;
+
+#define j (_state.j)
+#define e (_state.e)
+#define f (_state.f)
+
+ /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+
+ case CODES_START: /* x: set up for LEN */
+ c->sub.code.need = c->lbits;
+ c->sub.code.tree = c->ltree;
+ s->mode = CODES_LEN;
+ case CODES_LEN: /* i: get length/literal/eob next */
+ t = c->sub.code.need;
+ NEEDBITS(t)
+ j = c->sub.code.tree + ((uInt)b & (uInt)inflate_mask[t]);
+ DUMPBITS(j->bits)
+ e = (uInt)(j->exop);
+ if (e == 0) /* literal */
+ {
+ c->sub.lit = j->base;
+ s->mode = CODES_LIT;
+ break;
+ }
+ if (e & 16) /* length */
+ {
+ c->sub.copy.get = e & 15;
+ c->len = j->base;
+ s->mode = CODES_LENEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = j + j->base;
+ break;
+ }
+ if (e & 32) /* end of block */
+ {
+ s->mode = CODES_WASH;
+ break;
+ }
+ goto bad;
+ case CODES_LENEXT: /* i: getting length extra (have base) */
+ t = c->sub.copy.get;
+ NEEDBITS(t)
+ c->len += (uInt)b & (uInt)inflate_mask[t];
+ DUMPBITS(t)
+ c->sub.code.need = c->dbits;
+ c->sub.code.tree = c->dtree;
+ s->mode = CODES_DIST;
+ case CODES_DIST: /* i: get distance next */
+ t = c->sub.code.need;
+ NEEDBITS(t)
+ j = c->sub.code.tree + ((uInt)b & (uInt)inflate_mask[t]);
+ DUMPBITS(j->bits)
+ e = (uInt)(j->exop);
+ if (e & 16) /* distance */
+ {
+ c->sub.copy.get = e & 15;
+ c->sub.copy.dist = j->base;
+ s->mode = CODES_DISTEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = j + j->base;
+ break;
+ }
+ goto bad; /* invalid code */
+ case CODES_DISTEXT: /* i: getting distance extra */
+ t = c->sub.copy.get;
+ NEEDBITS(t)
+ c->sub.copy.dist += (uInt)b & (uInt)inflate_mask[t];
+ DUMPBITS(t)
+ s->mode = CODES_COPY;
+ case CODES_COPY: /* o: copying bytes in window, waiting for space */
+ f = (uInt)(q - s->window) < c->sub.copy.dist ?
+ s->end - (c->sub.copy.dist - (q - s->window)) :
+ q - c->sub.copy.dist;
+
+ while (c->len)
+ {
+ NEEDOUT
+ OUTBYTE(*f++)
+ if (f == s->end)
+ f = s->window;
+ c->len--;
+ }
+ s->mode = CODES_START;
+ break;
+ case CODES_LIT: /* o: got literal, waiting for output space */
+ NEEDOUT
+ OUTBYTE(c->sub.lit)
+ s->mode = CODES_START;
+ break;
+ case CODES_WASH: /* o: got eob, possibly more output */
+ if (k > 7) /* return unused byte, if any */
+ {
+ k -= 8;
+ n++;
+ p--; /* can always return one */
+ }
+ /* flushing will be done in DRY */
+
+#undef j
+#undef e
+#undef f
+
+ case DRY:
+ FLUSH
+ if (s->write != s->read)
+ LEAVE(Z_OK)
+ if (s->mode == CODES_WASH)
+ {
+ Tracev((stderr, "inflate: codes end, %lu total out\n",
+ z->total_out + (q >= s->read ? q - s->read :
+ (s->end - s->read) + (q - s->window))));
+ }
+ /* DRY if last, TYPE if not */
+ s->mode = (inflate_mode)s->last;
+ if (s->mode == TYPE)
+ break;
+ LEAVE(Z_STREAM_END)
+ /*case BAD:
+ r = Z_DATA_ERROR;
+ LEAVE
+ */
+ default: /* we'll call Z_STREAM_ERROR if BAD anyway */
+ bad:
+ s->mode = NZ_BAD;
+ LEAVE(Z_STREAM_ERROR)
+ }
+}
+
+#undef t
+#undef b
+#undef k
+#undef p
+#undef n
+#undef q
+#undef m
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_nsis_nulsft.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,544 @@
+/*
+ * Copyright (C) 2007 aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "others.h"
+#include "cltypes.h"
+#include "nsis_bzlib.h"
+#include "LZMADecode.h"
+#include "nsis_zlib.h"
+#include "matcher.h"
+#include "scanners.h"
+#include "nulsft.h" /* SHUT UP GCC -Wextra */
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#define EC32(x) le32_to_host(x)
+
+enum {
+ COMP_NOT_DETECTED,
+ COMP_BZIP2,
+ COMP_LZMA,
+ COMP_ZLIB,
+ COMP_NOCOMP
+};
+
+struct nsis_st {
+ int ifd;
+ int ofd;
+ off_t off;
+ char *dir;
+ uint32_t asz;
+ uint32_t hsz;
+ uint32_t fno;
+ struct {
+ uint32_t avail_in;
+ unsigned char *next_in;
+ uint32_t avail_out;
+ unsigned char *next_out;
+ } nsis;
+ nsis_bzstream bz;
+ lzma_stream lz;
+ nsis_z_stream z;
+ unsigned char *freeme;
+ uint8_t comp;
+ uint8_t solid;
+ uint8_t freecomp;
+ uint8_t eof;
+ char ofn[1024];
+};
+
+
+#define LINESTR(x) #x
+#define LINESTR2(x) LINESTR(x)
+#define __AT__ " at "__FILE__":"LINESTR2(__LINE__)
+
+static int nsis_init(struct nsis_st *n) {
+ switch(n->comp) {
+ case COMP_BZIP2:
+ if (nsis_BZ2_bzDecompressInit(&n->bz, 0, 0)!=BZ_OK)
+ return CL_EBZIP;
+ n->freecomp=1;
+ break;
+ case COMP_LZMA:
+ lzmaInit(&n->lz);
+ n->freecomp=1;
+ break;
+ case COMP_ZLIB:
+ nsis_inflateInit(&n->z);
+ n->freecomp=0;
+ }
+ return CL_SUCCESS;
+}
+
+static void nsis_shutdown(struct nsis_st *n) {
+ if(!n->freecomp)
+ return;
+
+ switch(n->comp) {
+ case COMP_BZIP2:
+ nsis_BZ2_bzDecompressEnd(&n->bz);
+ break;
+ case COMP_LZMA:
+ lzmaShutdown(&n->lz);
+ case COMP_ZLIB:
+ break;
+ }
+
+ n->freecomp=0;
+}
+
+static int nsis_decomp(struct nsis_st *n) {
+ int ret = CL_EFORMAT;
+ switch(n->comp) {
+ case COMP_BZIP2:
+ n->bz.avail_in = n->nsis.avail_in;
+ n->bz.next_in = n->nsis.next_in;
+ n->bz.avail_out = n->nsis.avail_out;
+ n->bz.next_out = n->nsis.next_out;
+ switch (nsis_BZ2_bzDecompress(&n->bz)) {
+ case BZ_OK:
+ ret = CL_SUCCESS;
+ break;
+ case BZ_STREAM_END:
+ ret = CL_BREAK;
+ }
+ n->nsis.avail_in = n->bz.avail_in;
+ n->nsis.next_in = n->bz.next_in;
+ n->nsis.avail_out = n->bz.avail_out;
+ n->nsis.next_out = n->bz.next_out;
+ break;
+ case COMP_LZMA:
+ n->lz.avail_in = n->nsis.avail_in;
+ n->lz.next_in = n->nsis.next_in;
+ n->lz.avail_out = n->nsis.avail_out;
+ n->lz.next_out = n->nsis.next_out;
+ switch (lzmaDecode(&n->lz)) {
+ case LZMA_OK:
+ ret = CL_SUCCESS;
+ break;
+ case LZMA_STREAM_END:
+ ret = CL_BREAK;
+ }
+ n->nsis.avail_in = n->lz.avail_in;
+ n->nsis.next_in = n->lz.next_in;
+ n->nsis.avail_out = n->lz.avail_out;
+ n->nsis.next_out = n->lz.next_out;
+ break;
+ case COMP_ZLIB:
+ n->z.avail_in = n->nsis.avail_in;
+ n->z.next_in = n->nsis.next_in;
+ n->z.avail_out = n->nsis.avail_out;
+ n->z.next_out = n->nsis.next_out;
+ switch (nsis_inflate(&n->z)) {
+ case Z_OK:
+ ret = CL_SUCCESS;
+ break;
+ case Z_STREAM_END:
+ ret = CL_BREAK;
+ }
+ n->nsis.avail_in = n->z.avail_in;
+ n->nsis.next_in = n->z.next_in;
+ n->nsis.avail_out = n->z.avail_out;
+ n->nsis.next_out = n->z.next_out;
+ break;
+ }
+ return ret;
+}
+
+static int nsis_unpack_next(struct nsis_st *n, cli_ctx *ctx) {
+ unsigned char *ibuf;
+ uint32_t size, loops;
+ int ret;
+ unsigned char obuf[BUFSIZ];
+
+ if (n->eof) {
+ cli_dbgmsg("NSIS: extraction complete\n");
+ return CL_BREAK;
+ }
+ if (ctx->limits && ctx->limits->maxfiles && n->fno >= ctx->limits->maxfiles) {
+ cli_dbgmsg("NSIS: Files limit reached (max: %u)\n", ctx->limits->maxfiles);
+ return CL_EMAXFILES;
+ }
+
+ if (n->fno)
+ snprintf(n->ofn, 1023, "%s/content.%.3u", n->dir, n->fno);
+ else
+ snprintf(n->ofn, 1023, "%s/headers", n->dir);
+
+ n->fno++;
+
+ if ((n->ofd=open(n->ofn, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, 0600))==-1) {
+ cli_errmsg("NSIS: unable to create output file %s - aborting.", n->ofn);
+ return CL_EIO;
+ }
+
+ if (!n->solid) {
+ if (cli_readn(n->ifd, &size, 4)!=4) {
+ cli_dbgmsg("NSIS: reached EOF - extraction complete\n");
+ close(n->ofd);
+ return CL_BREAK;
+ }
+ if (n->asz==4) {
+ cli_dbgmsg("NSIS: reached CRC - extraction complete\n");
+ close(n->ofd);
+ return CL_BREAK;
+ }
+ loops = EC32(size);
+ if (!(size = (loops&~0x80000000))) {
+ cli_dbgmsg("NSIS: empty file found\n");
+ return CL_SUCCESS;
+ }
+ if (n->asz <4 || size > n->asz-4) {
+ cli_dbgmsg("NSIS: next file is outside the archive\n");
+ close(n->ofd);
+ return CL_BREAK;
+ }
+
+ n->asz -= size+4;
+
+ if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+ cli_dbgmsg("NSIS: Skipping file due to size limit (%u, max: %lu)\n", size, ctx->limits->maxfilesize);
+ close(n->ofd);
+ if (lseek(n->ifd, size, SEEK_CUR)==-1) return CL_EIO;
+ return CL_EMAXSIZE;
+ }
+ if (!(ibuf= (unsigned char *) cli_malloc(size))) {
+ cli_dbgmsg("NSIS: out of memory"__AT__"\n");
+ close(n->ofd);
+ return CL_EMEM;
+ }
+ if (cli_readn(n->ifd, ibuf, size) != (ssize_t) size) {
+ cli_dbgmsg("NSIS: cannot read %u bytes"__AT__"\n", size);
+ free(ibuf);
+ close(n->ofd);
+ return CL_EIO;
+ }
+ if (loops==size) {
+ if (cli_writen(n->ofd, ibuf, size) != (ssize_t) size) {
+ cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+ free(ibuf);
+ close(n->ofd);
+ return CL_EIO;
+ }
+ } else {
+ if ((ret=nsis_init(n))!=CL_SUCCESS) {
+ cli_dbgmsg("NSIS: decompressor init failed"__AT__"\n");
+ free(ibuf);
+ close(n->ofd);
+ return ret;
+ }
+
+ n->nsis.avail_in = size;
+ n->nsis.next_in = ibuf;
+ n->nsis.next_out = obuf;
+ n->nsis.avail_out = BUFSIZ;
+ loops=0;
+
+ while ((ret=nsis_decomp(n))==CL_SUCCESS) {
+ if ((size = n->nsis.next_out - obuf)) {
+ if (cli_writen(n->ofd, obuf, size) != (ssize_t) size) {
+ cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+ free(ibuf);
+ close(n->ofd);
+ return CL_EIO;
+ }
+ n->nsis.next_out = obuf;
+ n->nsis.avail_out = BUFSIZ;
+ loops=0;
+ if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+ cli_dbgmsg("NSIS: Skipping file due to size limit (%u, max: %lu)\n", size, ctx->limits->maxfilesize);
+ free(ibuf);
+ close(n->ofd);
+ nsis_shutdown(n);
+ return CL_EMAXSIZE;
+ }
+ } else if (++loops > 10) {
+ cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+ ret = CL_BREAK;
+ break;
+ }
+ }
+
+ if (ret != CL_BREAK) {
+ cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+ free(ibuf);
+ close(n->ofd);
+ return CL_EFORMAT;
+ }
+
+ if (cli_writen(n->ofd, obuf, n->nsis.next_out - obuf) != n->nsis.next_out - obuf) {
+ cli_dbgmsg("NSIS: cannot write output file"__AT__"\n");
+ free(ibuf);
+ close(n->ofd);
+ return CL_EIO;
+ }
+ nsis_shutdown(n);
+ }
+
+ free(ibuf);
+ return CL_SUCCESS;
+
+ } else {
+ if (!n->freeme) {
+ if ((ret=nsis_init(n))!=CL_SUCCESS) {
+ cli_dbgmsg("NSIS: decompressor init failed\n");
+ close(n->ofd);
+ return ret;
+ }
+ if (!(n->freeme= (unsigned char *) cli_malloc(n->asz))) {
+ cli_dbgmsg("NSIS: out of memory\n");
+ close(n->ofd);
+ return CL_EMEM;
+ }
+ if (cli_readn(n->ifd, n->freeme, n->asz) != (ssize_t) n->asz) {
+ cli_dbgmsg("NSIS: cannot read %u bytes"__AT__"\n", n->asz);
+ close(n->ofd);
+ return CL_EIO;
+ }
+ n->nsis.next_in = n->freeme;
+ n->nsis.avail_in = n->asz;
+ }
+
+ if (n->nsis.avail_in<=4) {
+ cli_dbgmsg("NSIS: extraction complete\n");
+ close(n->ofd);
+ return CL_BREAK;
+ }
+ n->nsis.next_out = obuf;
+ n->nsis.avail_out = 4;
+ loops = 0;
+
+ while ((ret=nsis_decomp(n))==CL_SUCCESS) {
+ if (n->nsis.next_out - obuf == 4) break;
+ if (++loops > 20) {
+ cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+ ret = CL_BREAK;
+ break;
+ }
+ }
+
+ if (ret != CL_SUCCESS) {
+ cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+ close(n->ofd);
+ return CL_EFORMAT;
+ }
+
+ size=cli_readint32(obuf);
+ if (ctx->limits && ctx->limits->maxfilesize && size > ctx->limits->maxfilesize) {
+ cli_dbgmsg("NSIS: Breaking out due to filesize limit (%u, max: %lu) in solid archive\n", size, ctx->limits->maxfilesize);
+ close(n->ofd);
+ return CL_EFORMAT;
+ }
+
+ n->nsis.next_out = obuf;
+ n->nsis.avail_out = MIN(BUFSIZ,size);
+ loops = 0;
+
+ while (size && (ret=nsis_decomp(n))==CL_SUCCESS) {
+ unsigned int wsz;
+ if ((wsz = n->nsis.next_out - obuf)) {
+ if (cli_writen(n->ofd, obuf, wsz) != (ssize_t) wsz) {
+ close(n->ofd);
+ return CL_EIO;
+ }
+ size-=wsz;
+ n->nsis.next_out = obuf;
+ n->nsis.avail_out = MIN(size,BUFSIZ);
+ } else if ( ++loops > 20 ) {
+ cli_dbgmsg("NSIS: xs looping, breaking out"__AT__"\n");
+ ret = CL_BREAK;
+ break;
+ }
+ }
+
+ if (ret == CL_BREAK) {
+ if (cli_writen(n->ofd, obuf, n->nsis.next_out - obuf) != n->nsis.next_out - obuf) {
+ close(n->ofd);
+ return CL_EIO;
+ }
+ n->eof=1;
+ } else if (ret != CL_SUCCESS) {
+ cli_dbgmsg("NSIS: bad stream"__AT__"\n");
+ close(n->ofd);
+ return CL_EFORMAT;
+ }
+
+ return CL_SUCCESS;
+ }
+
+}
+
+static uint8_t nsis_detcomp(const char *b) {
+ if (*b=='1') return COMP_BZIP2;
+ if ((cli_readint32(b)&~0x80000000)==0x5d) return COMP_LZMA;
+ return COMP_ZLIB;
+}
+
+static int nsis_headers(struct nsis_st *n, cli_ctx *ctx) {
+ char buf[28];
+ struct stat st;
+ uint32_t pos;
+ int i;
+ uint8_t comps[] = {0, 0, 0, 0}, trunc = 0;
+
+ if (fstat(n->ifd, &st)==-1 ||
+ lseek(n->ifd, n->off, SEEK_SET)==-1 ||
+ cli_readn(n->ifd, buf, 28) != 28)
+ return CL_EIO;
+
+ n->hsz = (uint32_t)cli_readint32(buf+0x14);
+ n->asz = (uint32_t)cli_readint32(buf+0x18);
+
+ cli_dbgmsg("NSIS: Header info - Flags=%x, Header size=%x, Archive size=%x\n", cli_readint32(buf), n->hsz, n->asz);
+
+ if (st.st_size - n->off < (off_t) n->asz) {
+ cli_dbgmsg("NSIS: Possibly truncated file\n");
+ n->asz = st.st_size - n->off;
+ trunc++;
+ } else if (st.st_size - n->off != (off_t) n->asz) {
+ cli_dbgmsg("NSIS: Overlays found\n");
+ }
+
+ n->asz -= 0x1c;
+
+ /* Guess if solid */
+ for (i=0, pos=0;pos < n->asz-4;i++) {
+ int32_t nextsz;
+ if (cli_readn(n->ifd, buf+4, 4)!=4) return CL_EIO;
+ nextsz=cli_readint32(buf+4);
+ if (!i) n->comp = nsis_detcomp(buf+4);
+ if (nextsz&0x80000000) {
+ nextsz&=~0x80000000;
+ if (cli_readn(n->ifd, buf+4, 4)!=4) return CL_EIO;
+ comps[nsis_detcomp(buf+4)]++;
+ nextsz-=4;
+ pos+=4;
+ }
+ if ((pos+=4+nextsz) > n->asz) {
+ n->solid = 1;
+ break;
+ }
+
+ if (lseek(n->ifd, nextsz, SEEK_CUR)==-1) return CL_EIO;
+ }
+
+ if (trunc && i>=2) n->solid=0;
+
+ cli_dbgmsg("NSIS: solid compression%s detected\n", (n->solid)?"":" not");
+
+ /* Guess the compression method */
+ if (!n->solid) {
+ cli_dbgmsg("NSIS: bzip2 %u - lzma %u - zlib %u\n", comps[1], comps[2], comps[3]);
+ n->comp = (comps[1]<comps[2]) ? (comps[2]<comps[3] ? COMP_ZLIB : COMP_LZMA) : (comps[1]<comps[3] ? COMP_ZLIB : COMP_BZIP2);
+ }
+
+ if (lseek(n->ifd, n->off+0x1c, SEEK_SET)==-1) return CL_EIO;
+
+ return nsis_unpack_next(n, ctx);
+}
+
+
+
+static int cli_nsis_unpack(struct nsis_st *n, cli_ctx *ctx) {
+ return (n->fno) ? nsis_unpack_next(n, ctx) : nsis_headers(n, ctx);
+}
+
+static void cli_nsis_free(struct nsis_st *n) {
+ nsis_shutdown(n);
+ if (n->solid && n->freeme) free(n->freeme);
+}
+
+int cli_scannulsft(int desc, cli_ctx *ctx, off_t offset) {
+ int ret;
+ struct nsis_st nsist;
+
+ cli_dbgmsg("in scannulsft()\n");
+ if(ctx->limits && ctx->limits->maxreclevel && ctx->arec >= ctx->limits->maxreclevel) {
+ cli_dbgmsg("Archive recursion limit exceeded (arec == %u).\n", ctx->arec+1);
+ return CL_EMAXREC;
+ }
+
+ memset(&nsist, 0, sizeof(struct nsis_st));
+
+ nsist.ifd = desc;
+ nsist.off = offset;
+ if (!(nsist.dir = cli_gentemp(NULL)))
+ return CL_ETMPDIR;
+ if(mkdir(nsist.dir, 0700)) {
+ cli_dbgmsg("NSIS: Can't create temporary directory %s\n", nsist.dir);
+ free(nsist.dir);
+ return CL_ETMPDIR;
+ }
+
+ if(cli_leavetemps_flag) cli_dbgmsg("NSIS: Extracting files to %s\n", nsist.dir);
+
+ ctx->arec++;
+
+ do {
+ ret = cli_nsis_unpack(&nsist, ctx);
+ if(ret != CL_SUCCESS) {
+ if(ret == CL_EMAXSIZE) {
+ if(BLOCKMAX) {
+ *ctx->virname = "NSIS.ExceededFileSize";
+ ret=CL_VIRUS;
+ } else {
+ ret = nsist.solid ? CL_BREAK : CL_SUCCESS;
+ }
+ }
+ } else {
+ cli_dbgmsg("NSIS: Successully extracted file #%u\n", nsist.fno);
+ lseek(nsist.ofd, 0, SEEK_SET);
+ if(nsist.fno == 1)
+ ret=cli_scandesc(nsist.ofd, ctx, 0, 0, 0, NULL);
+ else
+ ret=cli_magic_scandesc(nsist.ofd, ctx);
+ close(nsist.ofd);
+ if(!cli_leavetemps_flag)
+ unlink(nsist.ofn);
+ }
+ } while(ret == CL_SUCCESS);
+
+ if(ret == CL_BREAK)
+ ret = CL_CLEAN;
+
+ cli_nsis_free(&nsist);
+
+ if(!cli_leavetemps_flag)
+ cli_rmdirs(nsist.dir);
+
+ free(nsist.dir);
+
+ ctx->arec--;
+ return ret;
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_ole2_extract.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,891 @@
+/*
+ * Extract component parts of OLE2 files (e.g. MS Office Documents)
+ *
+ * Copyright (C) 2004-2007 trog at uncon.org
+ *
+ * This code is based on the OpenOffice and libgsf sources.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <ctype.h>
+#include <stdlib.h>
+#include "clamav.h"
+
+#if HAVE_MMAP
+#if HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#else /* HAVE_SYS_MMAN_H */
+#undef HAVE_MMAP
+#endif
+#endif
+
+#include "cltypes.h"
+#include "others.h"
+#include "ole2_extract.h"
+
+#include "mbox.h"
+#include "blob.h" /* sanitiseName() */
+
+#define ole2_endian_convert_16(v) le16_to_host((uint16_t)(v))
+#define ole2_endian_convert_32(v) le32_to_host((uint32_t)(v))
+
+#ifndef HAVE_ATTRIB_PACKED
+#define __attribute__(x)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack(1)
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack 1
+#endif
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+typedef struct ole2_header_tag
+{
+ unsigned char magic[8]; /* should be: 0xd0cf11e0a1b11ae1 */
+ unsigned char clsid[16];
+ uint16_t minor_version __attribute__ ((packed));
+ uint16_t dll_version __attribute__ ((packed));
+ int16_t byte_order __attribute__ ((packed)); /* -2=intel */
+
+ uint16_t log2_big_block_size __attribute__ ((packed)); /* usually 9 (2^9 = 512) */
+ uint32_t log2_small_block_size __attribute__ ((packed)); /* usually 6 (2^6 = 128) */
+
+ int32_t reserved[2] __attribute__ ((packed));
+ int32_t bat_count __attribute__ ((packed));
+ int32_t prop_start __attribute__ ((packed));
+
+ uint32_t signature __attribute__ ((packed));
+ uint32_t sbat_cutoff __attribute__ ((packed)); /* cutoff for files held in small blocks (4096) */
+
+ int32_t sbat_start __attribute__ ((packed));
+ int32_t sbat_block_count __attribute__ ((packed));
+ int32_t xbat_start __attribute__ ((packed));
+ int32_t xbat_count __attribute__ ((packed));
+ int32_t bat_array[109] __attribute__ ((packed));
+
+ /* not part of the ole2 header, but stuff we need in order to decode */
+ /* must take account of the size of variables below here when
+ reading the header */
+ int32_t sbat_root_start __attribute__ ((packed));
+ unsigned char *m_area;
+ off_t m_length;
+ bitset_t *bitset;
+ uint32_t max_block_no;
+} ole2_header_t;
+
+typedef struct property_tag
+{
+ char name[64]; /* in unicode */
+ uint16_t name_size __attribute__ ((packed));
+ unsigned char type; /* 1=dir 2=file 5=root */
+ unsigned char color; /* black or red */
+ uint32_t prev __attribute__ ((packed));
+ uint32_t next __attribute__ ((packed));
+ uint32_t child __attribute__ ((packed));
+
+ unsigned char clsid[16];
+ uint32_t user_flags __attribute__ ((packed));
+
+ uint32_t create_lowdate __attribute__ ((packed));
+ uint32_t create_highdate __attribute__ ((packed));
+ uint32_t mod_lowdate __attribute__ ((packed));
+ uint32_t mod_highdate __attribute__ ((packed));
+ uint32_t start_block __attribute__ ((packed));
+ uint32_t size __attribute__ ((packed));
+ unsigned char reserved[4];
+} property_t;
+
+#ifdef HAVE_PRAGMA_PACK
+#pragma pack()
+#endif
+
+#ifdef HAVE_PRAGMA_PACK_HPPA
+#pragma pack
+#endif
+
+static unsigned char magic_id[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1};
+
+static char *get_property_name(char *name, int size)
+{
+ int i, j;
+ char *newname;
+
+ if (*name == 0 || size <= 0 || size > 64) {
+ return NULL;
+ }
+
+ newname = (char *) cli_malloc(size*7);
+ if (!newname) {
+ return NULL;
+ }
+ j=0;
+ /* size-2 to ignore trailing NULL */
+ for (i=0 ; i < size-2; i+=2) {
+ if((!(name[i]&0x80)) && isprint(name[i])) {
+ newname[j++] = name[i];
+ } else {
+ if (name[i] < 10 && name[i] >= 0) {
+ newname[j++] = '_';
+ newname[j++] = name[i] + '0';
+ }
+ else {
+ const uint16_t x = (((uint16_t)name[i]) << 8) | name[i+1];
+ newname[j++] = '_';
+ newname[j++] = 'a'+((x&0xF));
+ newname[j++] = 'a'+((x>>4)&0xF);
+ newname[j++] = 'a'+((x>>8)&0xF);
+ newname[j++] = 'a'+((x>>16)&0xF);
+ newname[j++] = 'a'+((x>>24)&0xF);
+ }
+ newname[j++] = '_';
+ }
+ }
+ newname[j] = '\0';
+ if (strlen(newname) == 0) {
+ free(newname);
+ return NULL;
+ }
+ return newname;
+}
+
+static void print_property_name(char *pname, int size)
+{
+ char *name;
+
+ name = get_property_name(pname, size);
+ if (!name) {
+ return;
+ }
+ cli_dbgmsg("%34s ", name);
+ free(name);
+ return;
+}
+
+static void print_ole2_property(property_t *property)
+{
+ if (property->name_size > 64) {
+ cli_dbgmsg("[err name len: %d]\n", property->name_size);
+ return;
+ }
+ print_property_name(property->name, property->name_size);
+ switch (property->type) {
+ case 2:
+ cli_dbgmsg(" [file] ");
+ break;
+ case 1:
+ cli_dbgmsg(" [dir ] ");
+ break;
+ case 5:
+ cli_dbgmsg(" [root] ");
+ break;
+ default:
+ cli_dbgmsg(" [%d]", property->type);
+ }
+ switch (property->color) {
+ case 0:
+ cli_dbgmsg(" r ");
+ break;
+ case 1:
+ cli_dbgmsg(" b ");
+ break;
+ default:
+ cli_dbgmsg(" u ");
+ }
+ cli_dbgmsg(" 0x%.8x 0x%.8x\n", property->size, property->user_flags);
+}
+
+static void print_ole2_header(ole2_header_t *hdr)
+{
+ int i;
+
+ if (!hdr) {
+ return;
+ }
+
+ cli_dbgmsg("\nMagic:\t\t\t0x");
+ for (i=0 ; i<8; i++) {
+ cli_dbgmsg("%x", hdr->magic[i]);
+ }
+ cli_dbgmsg("\n");
+
+ cli_dbgmsg("CLSID:\t\t\t{");
+ for (i=0 ; i<16; i++) {
+ cli_dbgmsg("%x ", hdr->clsid[i]);
+ }
+ cli_dbgmsg("}\n");
+
+ cli_dbgmsg("Minor version:\t\t0x%x\n", hdr->minor_version);
+ cli_dbgmsg("DLL version:\t\t0x%x\n", hdr->dll_version);
+ cli_dbgmsg("Byte Order:\t\t%d\n", hdr->byte_order);
+ cli_dbgmsg("Big Block Size:\t\t%i\n", hdr->log2_big_block_size);
+ cli_dbgmsg("Small Block Size:\t%i\n", hdr->log2_small_block_size);
+ cli_dbgmsg("BAT count:\t\t%d\n", hdr->bat_count);
+ cli_dbgmsg("Prop start:\t\t%d\n", hdr->prop_start);
+ cli_dbgmsg("SBAT cutoff:\t\t%d\n", hdr->sbat_cutoff);
+ cli_dbgmsg("SBat start:\t\t%d\n", hdr->sbat_start);
+ cli_dbgmsg("SBat block count:\t%d\n", hdr->sbat_block_count);
+ cli_dbgmsg("XBat start:\t\t%d\n", hdr->xbat_start);
+ cli_dbgmsg("XBat block count:\t%d\n\n", hdr->xbat_count);
+ return;
+}
+
+static int ole2_read_block(int fd, ole2_header_t *hdr, void *buff, int32_t blockno)
+{
+ off_t offset, offend;
+
+ if (blockno < 0) {
+ return FALSE;
+ }
+
+ /* other methods: (blockno+1) * 512 or (blockno * block_size) + 512; */
+ offset = (blockno << hdr->log2_big_block_size) + 512; /* 512 is header size */
+
+ if (hdr->m_area == NULL) {
+ if (lseek(fd, offset, SEEK_SET) != offset) {
+ return FALSE;
+ }
+ if (cli_readn(fd, buff, (1 << hdr->log2_big_block_size)) != (1 << hdr->log2_big_block_size)) {
+ return FALSE;
+ }
+ } else {
+ offend = offset + (1 << hdr->log2_big_block_size);
+ if ((offend <= 0) || (offend > hdr->m_length)) {
+ return FALSE;
+ }
+ memcpy(buff, hdr->m_area+offset, (1 << hdr->log2_big_block_size));
+ }
+ return TRUE;
+}
+
+static int32_t ole2_get_next_bat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+ int32_t bat_array_index;
+ uint32_t bat[128];
+
+ if (current_block < 0) {
+ return -1;
+ }
+
+ bat_array_index = current_block / 128;
+ if (bat_array_index > hdr->bat_count) {
+ cli_dbgmsg("bat_array index error\n");
+ return -10;
+ }
+ if (!ole2_read_block(fd, hdr, &bat,
+ ole2_endian_convert_32(hdr->bat_array[bat_array_index]))) {
+ return -1;
+ }
+ return ole2_endian_convert_32(bat[current_block-(bat_array_index * 128)]);
+}
+
+static int32_t ole2_get_next_xbat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+ int32_t xbat_index, xbat_block_index, bat_index, bat_blockno;
+ uint32_t xbat[128], bat[128];
+
+ if (current_block < 0) {
+ return -1;
+ }
+
+ xbat_index = current_block / 128;
+
+ /* NB: The last entry in each XBAT points to the next XBAT block.
+ This reduces the number of entries in each block by 1.
+ */
+ xbat_block_index = (xbat_index - 109) / 127;
+ bat_blockno = (xbat_index - 109) % 127;
+
+ bat_index = current_block % 128;
+
+ if (!ole2_read_block(fd, hdr, &xbat, hdr->xbat_start)) {
+ return -1;
+ }
+
+ /* Follow the chain of XBAT blocks */
+ while (xbat_block_index > 0) {
+ if (!ole2_read_block(fd, hdr, &xbat,
+ ole2_endian_convert_32(xbat[127]))) {
+ return -1;
+ }
+ xbat_block_index--;
+ }
+
+ if (!ole2_read_block(fd, hdr, &bat, ole2_endian_convert_32(xbat[bat_blockno]))) {
+ return -1;
+ }
+
+ return ole2_endian_convert_32(bat[bat_index]);
+}
+
+static int32_t ole2_get_next_block_number(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+ if (current_block < 0) {
+ return -1;
+ }
+
+ if ((current_block / 128) > 108) {
+ return ole2_get_next_xbat_block(fd, hdr, current_block);
+ } else {
+ return ole2_get_next_bat_block(fd, hdr, current_block);
+ }
+}
+
+static int32_t ole2_get_next_sbat_block(int fd, ole2_header_t *hdr, int32_t current_block)
+{
+ int32_t iter, current_bat_block;
+ uint32_t sbat[128];
+
+ if (current_block < 0) {
+ return -1;
+ }
+
+ current_bat_block = hdr->sbat_start;
+ iter = current_block / 128;
+ while (iter > 0) {
+ current_bat_block = ole2_get_next_block_number(fd, hdr, current_bat_block);
+ iter--;
+ }
+ if (!ole2_read_block(fd, hdr, &sbat, current_bat_block)) {
+ return -1;
+ }
+ return ole2_endian_convert_32(sbat[current_block % 128]);
+}
+
+/* Retrieve the block containing the data for the given sbat index */
+static int32_t ole2_get_sbat_data_block(int fd, ole2_header_t *hdr, void *buff, int32_t sbat_index)
+{
+ int32_t block_count, current_block;
+
+ if (sbat_index < 0) {
+ return FALSE;
+ }
+
+ if (hdr->sbat_root_start < 0) {
+ cli_errmsg("No root start block\n");
+ return FALSE;
+ }
+
+ block_count = sbat_index / 8; /* 8 small blocks per big block */
+ current_block = hdr->sbat_root_start;
+ while (block_count > 0) {
+ current_block = ole2_get_next_block_number(fd, hdr, current_block);
+ block_count--;
+ }
+ /* current_block now contains the block number of the sbat array
+ containing the entry for the required small block */
+
+ return(ole2_read_block(fd, hdr, buff, current_block));
+}
+
+/* Read the property tree.
+ It is read as just an array rather than a tree */
+/*
+static void ole2_read_property_tree(int fd, ole2_header_t *hdr, const char *dir,
+ int (*handler)(int fd, ole2_header_t *hdr, property_t *prop, const char *dir))
+{
+ property_t prop_block[4];
+ int32_t index, current_block, count=0;
+
+ current_block = hdr->prop_start;
+
+ while(current_block >= 0) {
+ if (!ole2_read_block(fd, hdr, prop_block,
+ current_block)) {
+ return;
+ }
+ for (index=0 ; index < 4 ; index++) {
+ if (prop_block[index].type > 0) {
+ prop_block[index].name_size = ole2_endian_convert_16(prop_block[index].name_size);
+ prop_block[index].prev = ole2_endian_convert_32(prop_block[index].prev);
+ prop_block[index].next = ole2_endian_convert_32(prop_block[index].next);
+ prop_block[index].child = ole2_endian_convert_32(prop_block[index].child);
+ prop_block[index].user_flags = ole2_endian_convert_32(prop_block[index].user_flags);
+ prop_block[index].create_lowdate = ole2_endian_convert_32(prop_block[index].create_lowdate);
+ prop_block[index].create_highdate = ole2_endian_convert_32(prop_block[index].create_highdate);
+ prop_block[index].mod_lowdate = ole2_endian_convert_32(prop_block[index].mod_lowdate);
+ prop_block[index].mod_highdate = ole2_endian_convert_32(prop_block[index].mod_highdate);
+ prop_block[index].start_block = ole2_endian_convert_32(prop_block[index].start_block);
+ prop_block[index].size = ole2_endian_convert_32(prop_block[index].size);
+ if (prop_block[index].type > 5) {
+ cli_dbgmsg("ERROR: invalid property type: %d\n", prop_block[index].type);
+ return;
+ }
+ if (prop_block[index].type == 5) {
+ hdr->sbat_root_start = prop_block[index].start_block;
+ }
+ print_ole2_property(&prop_block[index]);
+ if (!handler(fd, hdr, &prop_block[index], dir)) {
+ cli_dbgmsg("ERROR: handler failed\n");
+ return;
+ }
+ }
+ }
+ current_block = ole2_get_next_block_number(fd, hdr, current_block);
+ if (++count > 100000) {
+ cli_dbgmsg("ERROR: loop detected\n");
+ return;
+ }
+ }
+ return;
+}
+*/
+
+static void ole2_walk_property_tree(int fd, ole2_header_t *hdr, const char *dir, int32_t prop_index,
+ int (*handler)(int fd, ole2_header_t *hdr, property_t *prop, const char *dir),
+ unsigned int rec_level, unsigned int *file_count, const struct cl_limits *limits)
+{
+ property_t prop_block[4];
+ int32_t index, current_block, i;
+ char *dirname;
+
+ current_block = hdr->prop_start;
+
+ if ((prop_index < 0) || (prop_index > hdr->max_block_no) || (rec_level > 100) || (*file_count > 100000)) {
+ return;
+ }
+
+ if (limits && limits->maxfiles && (*file_count > limits->maxfiles)) {
+ cli_dbgmsg("OLE2: File limit reached (max: %d)\n", limits->maxfiles);
+ return;
+ }
+
+ if (limits && limits->maxreclevel && (rec_level > limits->maxreclevel)) {
+ cli_dbgmsg("OLE2: Recursion limit reached (max: %d)\n", limits->maxreclevel);
+ return;
+ }
+
+ index = prop_index / 4;
+ for (i=0 ; i < index ; i++) {
+ current_block = ole2_get_next_block_number(fd, hdr, current_block);
+ if (current_block < 0) {
+ return;
+ }
+ }
+ index = prop_index % 4;
+ if (!ole2_read_block(fd, hdr, prop_block,
+ current_block)) {
+ return;
+ }
+ if (prop_block[index].type <= 0) {
+ return;
+ }
+ prop_block[index].name_size = ole2_endian_convert_16(prop_block[index].name_size);
+ prop_block[index].prev = ole2_endian_convert_32(prop_block[index].prev);
+ prop_block[index].next = ole2_endian_convert_32(prop_block[index].next);
+ prop_block[index].child = ole2_endian_convert_32(prop_block[index].child);
+ prop_block[index].user_flags = ole2_endian_convert_32(prop_block[index].user_flags);
+ prop_block[index].create_lowdate = ole2_endian_convert_32(prop_block[index].create_lowdate);
+ prop_block[index].create_highdate = ole2_endian_convert_32(prop_block[index].create_highdate);
+ prop_block[index].mod_lowdate = ole2_endian_convert_32(prop_block[index].mod_lowdate);
+ prop_block[index].mod_highdate = ole2_endian_convert_32(prop_block[index].mod_highdate);
+ prop_block[index].start_block = ole2_endian_convert_32(prop_block[index].start_block);
+ prop_block[index].size = ole2_endian_convert_32(prop_block[index].size);
+
+ print_ole2_property(&prop_block[index]);
+
+ /* Check we aren't in a loop */
+ if (cli_bitset_test(hdr->bitset, (unsigned long) prop_index)) {
+ /* Loop in property tree detected */
+ cli_dbgmsg("OLE2: Property tree loop detected at index %d\n", prop_index);
+ return;
+ }
+ if (!cli_bitset_set(hdr->bitset, (unsigned long) prop_index)) {
+ return;
+ }
+
+ switch (prop_block[index].type) {
+ case 5: /* Root Entry */
+ if ((prop_index != 0) || (rec_level !=0) ||
+ (*file_count != 0)) {
+ /* Can only have RootEntry as the top */
+ cli_dbgmsg("ERROR: illegal Root Entry\n");
+ return;
+ }
+ hdr->sbat_root_start = prop_block[index].start_block;
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].prev, handler, rec_level+1, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].next, handler, rec_level+1, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].child, handler, rec_level+1, file_count, limits);
+ break;
+ case 2: /* File */
+ (*file_count)++;
+ if (!handler(fd, hdr, &prop_block[index], dir)) {
+ cli_dbgmsg("ERROR: handler failed\n");
+ /* If we don't return on this error then
+ we can sometimes pull VBA code
+ from corrupted files.
+ */
+
+ }
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].prev, handler, rec_level, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].next, handler, rec_level, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].child, handler, rec_level, file_count, limits);
+ break;
+ case 1: /* Directory */
+ dirname = (char *) cli_malloc(strlen(dir)+8);
+ if (!dirname) {
+ return;
+ }
+ snprintf(dirname, strlen(dir)+8, "%s/%.6d", dir, prop_index);
+ if (mkdir(dirname, 0700) != 0) {
+ free(dirname);
+ return;
+ }
+ cli_dbgmsg("OLE2 dir entry: %s\n",dirname);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].prev, handler, rec_level+1, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dir,
+ prop_block[index].next, handler, rec_level+1, file_count, limits);
+ ole2_walk_property_tree(fd, hdr, dirname,
+ prop_block[index].child, handler, rec_level+1, file_count, limits);
+ free(dirname);
+ break;
+ default:
+ cli_dbgmsg("ERROR: unknown OLE2 entry type: %d\n", prop_block[index].type);
+ break;
+ }
+ return;
+}
+/* Write file Handler - write the contents of the entry to a file */
+static int handler_writefile(int fd, ole2_header_t *hdr, property_t *prop, const char *dir)
+{
+ unsigned char *buff;
+ int32_t current_block, ofd, len, offset;
+ char *name, *newname;
+ bitset_t *blk_bitset;
+
+ if (prop->type != 2) {
+ /* Not a file */
+ return TRUE;
+ }
+
+ if (prop->name_size > 64) {
+ cli_dbgmsg("\nERROR: property name too long: %d\n", prop->name_size);
+ return FALSE;
+ }
+
+ if (! (name = get_property_name(prop->name, prop->name_size))) {
+ /* File without a name - create a name for it */
+ off_t i;
+
+ i = lseek(fd, 0, SEEK_CUR);
+ name = (char *) cli_malloc(11);
+ if (!name) {
+ return FALSE;
+ }
+ snprintf(name, 11, "%.10ld", i + (long int) prop);
+ } else {
+ /* Sanitize the file name */
+ sanitiseName(name);
+ }
+
+ newname = (char *) cli_malloc(strlen(name) + strlen(dir) + 2);
+ if (!newname) {
+ free(name);
+ return FALSE;
+ }
+
+ sprintf(newname, "%s/%s", dir, name);
+ free(name);
+
+ ofd = open(newname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+ if (ofd < 0) {
+ cli_errmsg("ERROR: failed to create file: %s\n", newname);
+ free(newname);
+ return FALSE;
+ }
+ free(newname);
+ current_block = prop->start_block;
+ len = prop->size;
+
+ buff = (unsigned char *) cli_malloc(1 << hdr->log2_big_block_size);
+ if (!buff) {
+ close(ofd);
+ return FALSE;
+ }
+
+ blk_bitset = cli_bitset_init();
+ if (!blk_bitset) {
+ cli_errmsg("ERROR [handler_writefile]: init bitset failed\n");
+ close(ofd);
+ return FALSE;
+ }
+ while((current_block >= 0) && (len > 0)) {
+ if (current_block > hdr->max_block_no) {
+ cli_dbgmsg("OLE2: Max block number for file size exceeded: %d\n", current_block);
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+ /* Check we aren't in a loop */
+ if (cli_bitset_test(blk_bitset, (unsigned long) current_block)) {
+ /* Loop in block list */
+ cli_dbgmsg("OLE2: Block list loop detected\n");
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+ if (!cli_bitset_set(blk_bitset, (unsigned long) current_block)) {
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+ if (prop->size < (int64_t)hdr->sbat_cutoff) {
+ /* Small block file */
+ if (!ole2_get_sbat_data_block(fd, hdr, buff, current_block)) {
+ cli_dbgmsg("ole2_get_sbat_data_block failed\n");
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+ /* buff now contains the block with 8 small blocks in it */
+ offset = 64 * (current_block % 8);
+ if (cli_writen(ofd, &buff[offset], MIN(len,64)) != MIN(len,64)) {
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+
+ len -= MIN(len,64);
+ current_block = ole2_get_next_sbat_block(fd, hdr, current_block);
+ } else {
+ /* Big block file */
+ if (!ole2_read_block(fd, hdr, buff, current_block)) {
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+ if (cli_writen(ofd, buff, MIN(len,(1 << hdr->log2_big_block_size))) !=
+ MIN(len,(1 << hdr->log2_big_block_size))) {
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return FALSE;
+ }
+
+ current_block = ole2_get_next_block_number(fd, hdr, current_block);
+ len -= MIN(len,(1 << hdr->log2_big_block_size));
+ }
+ }
+ close(ofd);
+ free(buff);
+ cli_bitset_free(blk_bitset);
+ return TRUE;
+}
+
+#if !defined(HAVE_ATTRIB_PACKED) && !defined(HAVE_PRAGMA_PACK) && !defined(HAVE_PRAGMA_PACK_HPPA)
+static int ole2_read_header(int fd, ole2_header_t *hdr)
+{
+ int i;
+
+ if (cli_readn(fd, &hdr->magic, 8) != 8) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->clsid, 16) != 16) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->minor_version, 2) != 2) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->dll_version, 2) != 2) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->byte_order, 2) != 2) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->log2_big_block_size, 2) != 2) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->log2_small_block_size, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->reserved, 8) != 8) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->bat_count, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->prop_start, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->signature, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->sbat_cutoff, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->sbat_start, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->sbat_block_count, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->xbat_start, 4) != 4) {
+ return FALSE;
+ }
+ if (cli_readn(fd, &hdr->xbat_count, 4) != 4) {
+ return FALSE;
+ }
+ for (i=0 ; i < 109 ; i++) {
+ if (cli_readn(fd, &hdr->bat_array[i], 4) != 4) {
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+#endif
+
+int cli_ole2_extract(int fd, const char *dirname, const struct cl_limits *limits)
+{
+ ole2_header_t hdr;
+ int hdr_size;
+ struct stat statbuf;
+ unsigned int file_count=0;
+
+ cli_dbgmsg("in cli_ole2_extract()\n");
+
+ /* size of header - size of other values in struct */
+ hdr_size = sizeof(struct ole2_header_tag) - sizeof(int32_t) -
+ sizeof(unsigned char *) - sizeof(off_t) - sizeof(bitset_t *) -
+ sizeof(uint32_t);
+
+ hdr.m_area = NULL;
+
+ if (fstat(fd, &statbuf) == 0) {
+ if (statbuf.st_size < hdr_size) {
+ return 0;
+ }
+#ifdef HAVE_MMAP
+ hdr.m_length = statbuf.st_size;
+ hdr.m_area = (unsigned char *) mmap(NULL, hdr.m_length, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (hdr.m_area == MAP_FAILED) {
+ hdr.m_area = NULL;
+ } else {
+ cli_dbgmsg("mmap'ed file\n");
+ memcpy(&hdr, hdr.m_area, hdr_size);
+ }
+#endif
+ }
+
+ if (hdr.m_area == NULL) {
+#if defined(HAVE_ATTRIB_PACKED) || defined(HAVE_PRAGMA_PACK) || defined(HAVE_PRAGMA_PACK_HPPA)
+ if (cli_readn(fd, &hdr, hdr_size) != hdr_size) {
+ return 0;
+ }
+#else
+ if (!ole2_read_header(fd, &hdr)) {
+ return 0;
+ }
+#endif
+ }
+
+ hdr.minor_version = ole2_endian_convert_16(hdr.minor_version);
+ hdr.dll_version = ole2_endian_convert_16(hdr.dll_version);
+ hdr.byte_order = ole2_endian_convert_16(hdr.byte_order);
+ hdr.log2_big_block_size = ole2_endian_convert_16(hdr.log2_big_block_size);
+ hdr.log2_small_block_size = ole2_endian_convert_32(hdr.log2_small_block_size);
+ hdr.bat_count = ole2_endian_convert_32(hdr.bat_count);
+ hdr.prop_start = ole2_endian_convert_32(hdr.prop_start);
+ hdr.sbat_cutoff = ole2_endian_convert_32(hdr.sbat_cutoff);
+ hdr.sbat_start = ole2_endian_convert_32(hdr.sbat_start);
+ hdr.sbat_block_count = ole2_endian_convert_32(hdr.sbat_block_count);
+ hdr.xbat_start = ole2_endian_convert_32(hdr.xbat_start);
+ hdr.xbat_count = ole2_endian_convert_32(hdr.xbat_count);
+
+ hdr.sbat_root_start = -1;
+
+ hdr.bitset = cli_bitset_init();
+ if (!hdr.bitset) {
+ return CL_EOLE2;
+ }
+
+ if (memcmp(hdr.magic, magic_id, 8) != 0) {
+ cli_dbgmsg("OLE2 magic failed!\n");
+#ifdef HAVE_MMAP
+ if (hdr.m_area != NULL) {
+ munmap(hdr.m_area, hdr.m_length);
+ }
+#endif
+ cli_bitset_free(hdr.bitset);
+ return CL_EOLE2;
+ }
+
+ if (hdr.log2_big_block_size != 9) {
+ cli_errmsg("WARNING: not scanned; untested big block size - please report\n");
+ goto abort;
+ }
+ if (hdr.log2_small_block_size != 6) {
+ cli_errmsg("WARNING: not scanned; untested small block size - please report\n");
+ goto abort;
+ }
+ if (hdr.sbat_cutoff != 4096) {
+ cli_errmsg("WARNING: not scanned; untested sbat cutoff - please report\n");
+ goto abort;
+ }
+
+ /* 8 SBAT blocks per file block */
+ hdr.max_block_no = ((statbuf.st_size / hdr.log2_big_block_size) + 1) * 8;
+
+ print_ole2_header(&hdr);
+ cli_dbgmsg("Max block number: %lu\n", hdr.max_block_no);
+
+ /* NOTE: Select only ONE of the following two methods */
+
+ /* ole2_read_property_tree(fd, &hdr, dirname, handler_writefile); */
+
+ /* OR */
+
+ ole2_walk_property_tree(fd, &hdr, dirname, 0, handler_writefile, 0, &file_count, limits);
+
+abort:
+#ifdef HAVE_MMAP
+ if (hdr.m_area != NULL) {
+ munmap(hdr.m_area, hdr.m_length);
+ }
+#endif
+ cli_bitset_free(hdr.bitset);
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_others.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,846 @@
+/*
+ * Copyright (C) 1999 - 2005 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef C_WINDOWS
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <dirent.h>
+#endif
+#include <time.h>
+#include <fcntl.h>
+#ifndef C_WINDOWS
+#include <pwd.h>
+#endif
+#include <errno.h>
+#include "target.h"
+#ifndef C_WINDOWS
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+#if defined(_MSC_VER) && defined(_DEBUG)
+#include <crtdbg.h>
+#endif
+
+#ifdef CL_THREAD_SAFE
+# include <pthread.h>
+static pthread_mutex_t cli_gentemp_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <limits.h>
+#include <stddef.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "md5.h"
+#include "cltypes.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#ifdef C_WINDOWS
+#undef P_tmpdir
+#define P_tmpdir "C:\\WINDOWS\\TEMP"
+#endif
+
+#define CL_FLEVEL 25 /* don't touch it */
+
+uint8_t cli_debug_flag = 0, cli_leavetemps_flag = 0;
+
+static unsigned char name_salt[16] = { 16, 38, 97, 12, 8, 4, 72, 196, 217, 144, 33, 124, 18, 11, 17, 253 };
+
+
+#define MSGCODE(x) \
+ va_list args; \
+ int len = sizeof(x) - 1; \
+ char buff[BUFSIZ]; \
+ strncpy(buff, x, len); \
+ va_start(args, str); \
+ vsnprintf(buff + len, sizeof(buff) - len, str, args); \
+ buff[sizeof(buff) - 1] = '\0'; \
+ fputs(buff, stderr); \
+ va_end(args)
+
+
+void cli_warnmsg(const char *str, ...)
+{
+ MSGCODE("LibClamAV Warning: ");
+}
+
+void cli_errmsg(const char *str, ...)
+{
+ MSGCODE("LibClamAV Error: ");
+}
+
+void cli_dbgmsg(const char *str, ...)
+{
+ if(cli_debug_flag) {
+ puts(str);
+ }
+}
+
+void cl_debug(void)
+{
+ cli_debug_flag = 1;
+}
+
+unsigned int cl_retflevel(void)
+{
+ return CL_FLEVEL;
+}
+
+const char *cl_retver(void)
+{
+ return VERSION;
+}
+
+const char *cl_strerror(int clerror)
+{
+ switch(clerror) {
+ case CL_CLEAN:
+ return "No viruses detected";
+ case CL_VIRUS:
+ return "Virus(es) detected";
+ case CL_EMAXREC:
+ return "Recursion limit exceeded";
+ case CL_EMAXSIZE:
+ return "File size limit exceeded";
+ case CL_EMAXFILES:
+ return "Files number limit exceeded";
+ case CL_ERAR:
+ return "RAR module failure";
+ case CL_EZIP:
+ return "Zip module failure";
+ case CL_EGZIP:
+ return "GZip module failure";
+ case CL_EMSCOMP:
+ return "MS Expand module failure";
+ case CL_EMSCAB:
+ return "MS CAB module failure";
+ case CL_EOLE2:
+ return "OLE2 module failure";
+ case CL_ETMPFILE:
+ return "Unable to create temporary file";
+ case CL_ETMPDIR:
+ return "Unable to create temporary directory";
+ case CL_EFSYNC:
+ return "Unable to synchronize file <-> disk";
+ case CL_EMEM:
+ return "Unable to allocate memory";
+ case CL_EOPEN:
+ return "Unable to open file or directory";
+ case CL_EMALFDB:
+ return "Malformed database";
+ case CL_EPATSHORT:
+ return "Too short pattern detected";
+ case CL_ECVD:
+ return "Broken or not a CVD file";
+ case CL_ECVDEXTR:
+ return "CVD extraction failure";
+ case CL_EMD5:
+ return "MD5 verification error";
+ case CL_EDSIG:
+ return "Digital signature verification error";
+ case CL_ENULLARG:
+ return "Null argument passed while initialized is required";
+ case CL_EIO:
+ return "Input/Output error";
+ case CL_EFORMAT:
+ return "Bad format or broken data";
+ case CL_ESUPPORT:
+ return "Not supported data format";
+ case CL_ELOCKDB:
+ return "Unable to lock database directory";
+ case CL_EARJ:
+ return "ARJ module failure";
+ default:
+ return "Unknown error code";
+ }
+}
+
+unsigned char *cli_md5digest(int desc)
+{
+ unsigned char *digest;
+ char buff[FILEBUFF];
+ cli_md5_ctx ctx;
+ int bytes;
+
+
+ if(!(digest = cli_malloc(16)))
+ return NULL;
+
+ cli_md5_init(&ctx);
+
+ while((bytes = cli_readn(desc, buff, FILEBUFF)))
+ cli_md5_update(&ctx, buff, bytes);
+
+ cli_md5_final(digest, &ctx);
+
+ return digest;
+}
+
+char *cli_md5stream(FILE *fs, unsigned char *digcpy)
+{
+ unsigned char digest[16];
+ char buff[FILEBUFF];
+ cli_md5_ctx ctx;
+ char *md5str, *pt;
+ int i, bytes;
+
+
+ cli_md5_init(&ctx);
+
+ while((bytes = fread(buff, 1, FILEBUFF, fs)))
+ cli_md5_update(&ctx, buff, bytes);
+
+ cli_md5_final(digest, &ctx);
+
+ if(!(md5str = (char *) cli_calloc(32 + 1, sizeof(char))))
+ return NULL;
+
+ pt = md5str;
+ for(i = 0; i < 16; i++) {
+ sprintf(pt, "%02x", digest[i]);
+ pt += 2;
+ }
+
+ if(digcpy)
+ memcpy(digcpy, digest, 16);
+
+ return md5str;
+}
+
+char *cli_md5file(const char *filename)
+{
+ FILE *fs;
+ char *md5str;
+
+
+ if((fs = fopen(filename, "rb")) == NULL) {
+ cli_errmsg("cli_md5file(): Can't read file %s\n", filename);
+ return NULL;
+ }
+
+ md5str = cli_md5stream(fs, NULL);
+ fclose(fs);
+
+ return md5str;
+}
+
+static char *cli_md5buff(const unsigned char *buffer, unsigned int len, unsigned char *dig)
+{
+ unsigned char digest[16];
+ char *md5str, *pt;
+ cli_md5_ctx ctx;
+ int i;
+
+
+ cli_md5_init(&ctx);
+ cli_md5_update(&ctx, buffer, len);
+ cli_md5_final(digest, &ctx);
+
+ if(dig)
+ memcpy(dig, digest, 16);
+
+ if(!(md5str = (char *) cli_calloc(32 + 1, sizeof(char))))
+ return NULL;
+
+ pt = md5str;
+ for(i = 0; i < 16; i++) {
+ sprintf(pt, "%02x", digest[i]);
+ pt += 2;
+ }
+
+ return md5str;
+}
+
+void *cli_malloc(size_t size)
+{
+ void *alloc;
+
+
+ if(!size || size > CLI_MAX_ALLOCATION) {
+ cli_errmsg("cli_malloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+ return NULL;
+ }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+ alloc = _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+ alloc = malloc(size);
+#endif
+
+ if(!alloc) {
+ cli_errmsg("cli_malloc(): Can't allocate memory (%u bytes).\n", size);
+ perror("malloc_problem");
+ return NULL;
+ } else return alloc;
+}
+
+void *cli_calloc(size_t nmemb, size_t size)
+{
+ void *alloc;
+
+
+ if(!size || size > CLI_MAX_ALLOCATION) {
+ cli_errmsg("cli_calloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+ return NULL;
+ }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+ alloc = _calloc_dbg(nmemb, size, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+ alloc = calloc(nmemb, size);
+#endif
+
+ if(!alloc) {
+ cli_errmsg("cli_calloc(): Can't allocate memory (%u bytes).\n", nmemb * size);
+ perror("calloc_problem");
+ return NULL;
+ } else return alloc;
+}
+
+void *cli_realloc(void *ptr, size_t size)
+{
+ void *alloc;
+
+
+ if(!size || size > CLI_MAX_ALLOCATION) {
+ cli_errmsg("cli_realloc(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+ return NULL;
+ }
+
+ alloc = realloc(ptr, size);
+
+ if(!alloc) {
+ cli_errmsg("cli_realloc(): Can't re-allocate memory to %u bytes.\n", size);
+ perror("realloc_problem");
+ return NULL;
+ } else return alloc;
+}
+
+void *cli_realloc2(void *ptr, size_t size)
+{
+ void *alloc;
+
+
+ if(!size || size > CLI_MAX_ALLOCATION) {
+ cli_errmsg("cli_realloc2(): Attempt to allocate %u bytes. Please report to http://bugs.clamav.net\n", size);
+ return NULL;
+ }
+
+ alloc = realloc(ptr, size);
+
+ if(!alloc) {
+ cli_errmsg("cli_realloc2(): Can't re-allocate memory to %u bytes.\n", size);
+ perror("realloc_problem");
+ if(ptr)
+ free(ptr);
+ return NULL;
+ } else return alloc;
+}
+
+char *cli_strdup(const char *s)
+{
+ char *alloc;
+
+
+ if(s == NULL) {
+ cli_errmsg("cli_strdup(): s == NULL. Please report to http://bugs.clamav.net\n");
+ return NULL;
+ }
+
+#if defined(_MSC_VER) && defined(_DEBUG)
+ alloc = _strdup_dbg(s, _NORMAL_BLOCK, __FILE__, __LINE__);
+#else
+ alloc = strdup(s);
+#endif
+
+ if(!alloc) {
+ cli_errmsg("cli_strdup(): Can't allocate memory (%u bytes).\n", strlen(s));
+ perror("strdup_problem");
+ return NULL;
+ }
+
+ return alloc;
+}
+
+unsigned int cli_rndnum(unsigned int max)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, (struct timezone *) 0);
+ srand(tv.tv_usec+clock());
+
+ return rand() % max;
+}
+
+void cl_settempdir(const char *dir, short leavetemps)
+{
+ char *var;
+
+ if(dir) {
+ var = (char *) cli_malloc(8 + strlen(dir));
+ sprintf(var, "TMPDIR=%s", dir);
+ if(!putenv(var))
+ cli_dbgmsg("Setting %s as global temporary directory\n", dir);
+ else
+ cli_warnmsg("Can't set TMPDIR variable - insufficient space in the environment.\n");
+
+ /* WARNING: var must not be released - see putenv(3) */
+ }
+
+ cli_leavetemps_flag = leavetemps;
+}
+
+char *cli_gentemp(const char *dir)
+{
+ char *name, *tmp;
+ const char *mdir;
+ unsigned char salt[16 + 32];
+ int i;
+
+ if(!dir) {
+ if((mdir = getenv("TMPDIR")) == NULL)
+#ifdef P_tmpdir
+ mdir = P_tmpdir;
+#else
+ mdir = "/tmp";
+#endif
+ } else
+ mdir = dir;
+
+ name = (char *) cli_calloc(strlen(mdir) + 1 + 32 + 1 + 7, sizeof(char));
+ if(!name) {
+ cli_dbgmsg("cli_gentemp('%s'): out of memory\n", mdir);
+ return NULL;
+ }
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&cli_gentemp_mutex);
+#endif
+
+ memcpy(salt, name_salt, 16);
+
+ for(i = 16; i < 48; i++)
+ salt[i] = cli_rndnum(256);
+
+ tmp = cli_md5buff(salt, 48, name_salt);
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&cli_gentemp_mutex);
+#endif
+
+ if(!tmp) {
+ free(name);
+ cli_dbgmsg("cli_gentemp('%s'): out of memory\n", mdir);
+ return NULL;
+ }
+
+#ifdef C_WINDOWS
+ sprintf(name, "%s\\clamav-", mdir);
+#else
+ sprintf(name, "%s/clamav-", mdir);
+#endif
+ strncat(name, tmp, 32);
+ free(tmp);
+
+ return(name);
+}
+
+int cli_gentempfd(const char *dir, char **name, int *fd)
+{
+
+ *name = cli_gentemp(dir);
+ if(!*name)
+ return CL_EMEM;
+
+ *fd = open(*name, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU);
+ if(*fd == -1) {
+ cli_errmsg("cli_gentempfd: Can't create temporary file %s: %s\n", *name, strerror(errno));
+ free(*name);
+ return CL_EIO;
+ }
+
+ return CL_SUCCESS;
+}
+
+#ifdef C_WINDOWS
+/*
+ * Windows doesn't allow you to delete a directory while it is still open
+ */
+int
+cli_rmdirs(const char *name)
+{
+ int rc;
+ struct stat statb;
+ DIR *dd;
+ struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+
+
+ if(stat(name, &statb) < 0) {
+ cli_warnmsg("cli_rmdirs: Can't locate %s: %s\n", name, strerror(errno));
+ return -1;
+ }
+
+ if(!S_ISDIR(statb.st_mode)) {
+ if(unlink(name) < 0) {
+ cli_warnmsg("cli_rmdirs: Can't remove %s: %s\n", name, strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+
+ if((dd = opendir(name)) == NULL)
+ return -1;
+
+ rc = 0;
+
+#ifdef HAVE_READDIR_R_3
+ while((readdir_r(dd, &result.d, &dent) == 0) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *)readdir_r(dd, &result.d)) != NULL) {
+#else
+ while((dent = readdir(dd)) != NULL) {
+#endif
+ char *path;
+
+ if(strcmp(dent->d_name, ".") == 0)
+ continue;
+ if(strcmp(dent->d_name, "..") == 0)
+ continue;
+
+ path = cli_malloc(strlen(name) + strlen(dent->d_name) + 2);
+
+ if(path == NULL) {
+ closedir(dd);
+ return -1;
+ }
+
+ sprintf(path, "%s\\%s", name, dent->d_name);
+ rc = cli_rmdirs(path);
+ free(path);
+ if(rc != 0)
+ break;
+ }
+
+ closedir(dd);
+
+ if(rmdir(name) < 0) {
+ cli_errmsg("cli_rmdirs: Can't remove temporary directory %s: %s\n", name, strerror(errno));
+ return -1;
+ }
+
+ return rc;
+}
+#else
+int cli_rmdirs(const char *dirname)
+{
+ DIR *dd;
+ struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+ struct stat maind, statbuf;
+ char *path;
+
+
+ chmod(dirname, 0700);
+ if((dd = opendir(dirname)) != NULL) {
+ while(stat(dirname, &maind) != -1) {
+ if(!rmdir(dirname)) break;
+ if(errno != ENOTEMPTY && errno != EEXIST && errno != EBADF) {
+ cli_errmsg("cli_rmdirs: Can't remove temporary directory %s: %s\n", dirname, strerror(errno));
+ closedir(dd);
+ return -1;
+ }
+
+#ifdef HAVE_READDIR_R_3
+ while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+ while((dent = readdir(dd))) {
+#endif
+#if (!defined(C_CYGWIN)) && (!defined(C_INTERIX)) && (!defined(C_WINDOWS))
+ if(dent->d_ino)
+#endif
+ {
+ if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..")) {
+ path = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 2);
+ if(!path) {
+ closedir(dd);
+ return -1;
+ }
+
+#ifdef C_WINDOWS
+ sprintf(path, "%s\\%s", dirname, dent->d_name);
+#else
+ sprintf(path, "%s/%s", dirname, dent->d_name);
+#endif
+
+ /* stat the file */
+ if(lstat(path, &statbuf) != -1) {
+ if(S_ISDIR(statbuf.st_mode) && !S_ISLNK(statbuf.st_mode)) {
+ if(rmdir(path) == -1) { /* can't be deleted */
+ if(errno == EACCES) {
+ cli_errmsg("cli_rmdirs: Can't remove some temporary directories due to access problem.\n");
+ closedir(dd);
+ free(path);
+ return -1;
+ }
+ if(cli_rmdirs(path)) {
+ cli_warnmsg("cli_rmdirs: Can't remove nested directory %s\n", path);
+ free(path);
+ closedir(dd);
+ return -1;
+ }
+ }
+ } else
+ if(unlink(path) < 0) {
+ cli_warnmsg("cli_rmdirs: Couldn't remove %s: %s\n", path, strerror(errno));
+ free(path);
+ closedir(dd);
+ return -1;
+ }
+ }
+ free(path);
+ }
+ }
+ }
+ rewinddir(dd);
+ }
+
+ } else {
+ return -1;
+ }
+
+ closedir(dd);
+ return 0;
+}
+#endif
+
+/* Function: readn
+ Try hard to read the requested number of bytes
+*/
+int cli_readn(int fd, void *buff, unsigned int count)
+{
+ int retval;
+ unsigned int todo;
+ unsigned char *current;
+
+
+ todo = count;
+ current = (unsigned char *) buff;
+
+ do {
+ retval = read(fd, current, todo);
+ if (retval == 0) {
+ return (count - todo);
+ }
+ if (retval < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ cli_errmsg("cli_readn: read error: %s\n", strerror(errno));
+ return -1;
+ }
+ todo -= retval;
+ current += retval;
+ } while (todo > 0);
+
+
+ return count;
+}
+
+/* Function: writen
+ Try hard to write the specified number of bytes
+*/
+int cli_writen(int fd, const void *buff, unsigned int count)
+{
+ int retval;
+ unsigned int todo;
+ const unsigned char *current;
+
+
+ todo = count;
+ current = (const unsigned char *) buff;
+
+ do {
+ retval = write(fd, current, todo);
+ if (retval < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+ cli_errmsg("cli_writen: write error: %s\n", strerror(errno));
+ return -1;
+ }
+ todo -= retval;
+ current += retval;
+ } while (todo > 0);
+
+
+ return count;
+}
+
+int cli_filecopy(const char *src, const char *dest)
+{
+ char *buffer;
+ int s, d, bytes;
+
+
+ if((s = open(src, O_RDONLY|O_BINARY)) == -1)
+ return -1;
+
+ if((d = open(dest, O_CREAT|O_WRONLY|O_TRUNC|O_BINARY, S_IRWXU)) == -1) {
+ close(s);
+ return -1;
+ }
+
+ if(!(buffer = cli_malloc(FILEBUFF)))
+ return -1;
+
+ while((bytes = cli_readn(s, buffer, FILEBUFF)) > 0)
+ cli_writen(d, buffer, bytes);
+
+ free(buffer);
+ close(s);
+
+ return close(d);
+}
+
+/* Implement a generic bitset, trog at clamav.net */
+
+#define BITS_PER_CHAR (8)
+#define BITSET_DEFAULT_SIZE (1024)
+#define FALSE (0)
+#define TRUE (1)
+
+static unsigned long nearest_power(unsigned long num)
+{
+ unsigned long n = BITSET_DEFAULT_SIZE;
+
+ while (n < num) {
+ n <<= 1;
+ if (n == 0) {
+ return num;
+ }
+ }
+ return n;
+}
+
+bitset_t *cli_bitset_init(void)
+{
+ bitset_t *bs;
+
+ bs = cli_malloc(sizeof(bitset_t));
+ if (!bs) {
+ return NULL;
+ }
+ bs->length = BITSET_DEFAULT_SIZE;
+ bs->bitset = cli_calloc(BITSET_DEFAULT_SIZE, 1);
+ return bs;
+}
+
+void cli_bitset_free(bitset_t *bs)
+{
+ if (!bs) {
+ return;
+ }
+ if (bs->bitset) {
+ free(bs->bitset);
+ }
+ free(bs);
+}
+
+static bitset_t *bitset_realloc(bitset_t *bs, unsigned long min_size)
+{
+ unsigned long new_length;
+ unsigned char *new_bitset;
+
+ new_length = nearest_power(min_size);
+ new_bitset = (unsigned char *) cli_realloc(bs->bitset, new_length);
+ if (!new_bitset) {
+ return NULL;
+ }
+ bs->bitset = new_bitset;
+ memset(bs->bitset+bs->length, 0, new_length-bs->length);
+ bs->length = new_length;
+ return bs;
+}
+
+int cli_bitset_set(bitset_t *bs, unsigned long bit_offset)
+{
+ unsigned long char_offset;
+
+ char_offset = bit_offset / BITS_PER_CHAR;
+ bit_offset = bit_offset % BITS_PER_CHAR;
+
+ if (char_offset >= bs->length) {
+ bs = bitset_realloc(bs, char_offset+1);
+ if (!bs) {
+ return FALSE;
+ }
+ }
+ bs->bitset[char_offset] |= ((unsigned char)1 << bit_offset);
+ return TRUE;
+}
+
+int cli_bitset_test(bitset_t *bs, unsigned long bit_offset)
+{
+ unsigned long char_offset;
+
+ char_offset = bit_offset / BITS_PER_CHAR;
+ bit_offset = bit_offset % BITS_PER_CHAR;
+
+ if (char_offset >= bs->length) {
+ return FALSE;
+ }
+ return (bs->bitset[char_offset] & ((unsigned char)1 << bit_offset));
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_packlibs.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,309 @@
+/*
+ * Copyright (C) 2006 aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include "others.h"
+#include "execs.h"
+#include "pe.h"
+#include "packlibs.h"
+
+static int doubledl(char **scur, uint8_t *mydlptr, char *buffer, uint32_t buffersize)
+{
+ unsigned char mydl = *mydlptr;
+ unsigned char olddl = mydl;
+
+ mydl*=2;
+ if ( !(olddl & 0x7f)) {
+ if ( *scur < buffer || *scur >= buffer+buffersize-1 )
+ return -1;
+ olddl = **scur;
+ mydl = olddl*2+1;
+ *scur=*scur + 1;
+ }
+ *mydlptr = mydl;
+ return (olddl>>7)&1;
+}
+
+
+int cli_unfsg(char *source, char *dest, int ssize, int dsize, char **endsrc, char **enddst) {
+ uint8_t mydl=0x80;
+ uint32_t backbytes, backsize, oldback = 0;
+ char *csrc = source, *cdst = dest;
+ int oob, lostbit = 1;
+
+ if (ssize<=0 || dsize<=0) return -1;
+ *cdst++=*csrc++;
+
+ while ( 1 ) {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 164 */
+ backsize = 0;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 16a */
+ backbytes = 0;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 170 */
+ lostbit = 1;
+ backsize++;
+ backbytes = 0x10;
+ while ( backbytes < 0x100 ) {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ backbytes = backbytes*2+oob;
+ }
+ backbytes &= 0xff;
+ if ( ! backbytes ) {
+ if (cdst >= dest+dsize)
+ return -1;
+ *cdst++=0x00;
+ continue;
+ }
+ } else {
+ /* 18f */
+ if (csrc >= source+ssize)
+ return -1;
+ backbytes = *(unsigned char*)csrc;
+ backsize = backsize * 2 + (backbytes & 1);
+ backbytes = (backbytes & 0xff)>>1;
+ csrc++;
+ if (! backbytes)
+ break;
+ backsize+=2;
+ oldback = backbytes;
+ lostbit = 0;
+ }
+ } else {
+ /* 180 */
+ backsize = 1;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ backsize = backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ backsize = backsize - 1 - lostbit;
+ if (! backsize) {
+ /* 18a */
+ backsize = 1;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ backsize = backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ backbytes = oldback;
+ } else {
+ /* 198 */
+ if (csrc >= source+ssize)
+ return -1;
+ backbytes = *(unsigned char*)csrc;
+ backbytes += (backsize-1)<<8;
+ backsize = 1;
+ csrc++;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ backsize = backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ if (backbytes >= 0x7d00)
+ backsize++;
+ if (backbytes >= 0x500)
+ backsize++;
+ if (backbytes <= 0x7f)
+ backsize += 2;
+
+ oldback = backbytes;
+ }
+ lostbit = 0;
+ }
+ if (!CLI_ISCONTAINED(dest, dsize, cdst, backsize) || !CLI_ISCONTAINED(dest, dsize, cdst-backbytes, backsize))
+ return -1;
+ while(backsize--) {
+ *cdst=*(cdst-backbytes);
+ cdst++;
+ }
+
+ } else {
+ /* 15d */
+ if (cdst < dest || cdst >= dest+dsize || csrc < source || csrc >= source+ssize)
+ return -1;
+ *cdst++=*csrc++;
+ lostbit=1;
+ }
+ }
+
+ if (endsrc) *endsrc = csrc;
+ if (enddst) *enddst = cdst;
+ return 0;
+}
+
+int unmew(char *source, char *dest, int ssize, int dsize, char **endsrc, char **enddst) {
+ uint8_t mydl=0x80;
+ uint32_t myeax_backbytes, myecx_backsize, oldback = 0;
+ char *csrc = source, *cdst = dest;
+ int oob, lostbit = 1;
+
+ *cdst++=*csrc++;
+
+ while ( 1 ) {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 164 */
+ myecx_backsize = 0;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 16a */
+ myeax_backbytes = 0;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize))) {
+ if (oob == -1)
+ return -1;
+ /* 170 */
+ lostbit = 1;
+ myecx_backsize++;
+ myeax_backbytes = 0x10;
+ while ( myeax_backbytes < 0x100 ) {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ myeax_backbytes = myeax_backbytes*2+oob;
+ }
+ myeax_backbytes &= 0xff;
+ if ( ! myeax_backbytes ) {
+ if (cdst >= dest+dsize)
+ return -1;
+ *cdst++=0x00;
+ /*cli_dbgmsg("X%02x ", *(cdst-1)&0xff);*/
+ continue;
+ }
+ } else {
+ /* 18f */
+ if (csrc >= source+ssize)
+ return -1;
+ myeax_backbytes = *(unsigned char*)csrc;
+ myecx_backsize = myecx_backsize * 2 + (myeax_backbytes & 1);
+ myeax_backbytes = (myeax_backbytes & 0xff)>>1;
+ csrc++;
+ if (! myeax_backbytes)
+ {
+ /* cli_dbgmsg("\nBREAK \n"); */
+ break;
+ }
+ myecx_backsize+=2;
+ oldback = myeax_backbytes;
+ lostbit = 0;
+ }
+ } else {
+ /* 180 */
+ myecx_backsize = 1;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ myecx_backsize = myecx_backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ myecx_backsize = myecx_backsize - 1 - lostbit;
+ if (! myecx_backsize) {
+ /* 18a */
+ myecx_backsize = 1;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ myecx_backsize = myecx_backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ myeax_backbytes = oldback;
+ } else {
+ /* 198 */
+ if (csrc >= source+ssize)
+ return -1;
+ myeax_backbytes = *(unsigned char*)csrc;
+ myeax_backbytes += (myecx_backsize-1)<<8;
+ myecx_backsize = 1;
+ csrc++;
+ do {
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ myecx_backsize = myecx_backsize*2+oob;
+ if ((oob=doubledl(&csrc, &mydl, source, ssize)) == -1)
+ return -1;
+ } while (oob);
+
+ if (myeax_backbytes >= 0x7d00)
+ myecx_backsize++;
+ if (myeax_backbytes >= 0x500)
+ myecx_backsize++;
+ if (myeax_backbytes <= 0x7f)
+ myecx_backsize += 2;
+
+ oldback = myeax_backbytes;
+ }
+ lostbit = 0;
+ }
+ if (!CLI_ISCONTAINED(dest, dsize, cdst, myecx_backsize) || !CLI_ISCONTAINED(dest, dsize, cdst-myeax_backbytes, myecx_backsize))
+ {
+ cli_dbgmsg("MEW: rete: %d %d %d %d %d || %d %d %d %d %d\n", dest, dsize, cdst, myecx_backsize,
+ CLI_ISCONTAINED(dest, dsize, cdst, myecx_backsize),
+ dest, dsize, cdst-myeax_backbytes, myecx_backsize,
+ CLI_ISCONTAINED(dest, dsize, cdst-myeax_backbytes, myecx_backsize) );
+ return -1;
+ }
+ while(myecx_backsize--) {
+ *cdst=*(cdst-myeax_backbytes);
+ cdst++;
+ }
+
+ } else {
+ /* 15d */
+ if (cdst < dest || cdst >= dest+dsize || csrc < source || csrc >= source+ssize)
+ {
+ cli_dbgmsg("MEW: retf %08x %08x+%08x=%08x, %08x %08x+%08x=%08x\n",
+ cdst, dest, dsize, dest+dsize, csrc, source, ssize, source+ssize);
+ return -1;
+ }
+ *cdst++=*csrc++;
+ /* cli_dbgmsg("Z%02x ", *(cdst-1)&0xff); */
+ lostbit=1;
+ }
+ }
+
+ *endsrc = csrc;
+ *enddst = cdst;
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pdf.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,891 @@
+/*
+ * Copyright (C) 2005-2007 Nigel Horne <njh at bandsman.co.uk>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * TODO: Embedded fonts
+ * TODO: Predictor image handling
+ */
+static char const rcsid[] = "$Id: pdf.c,v 1.61 2007/02/12 20:46:09 njh Exp $";
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifdef HAVE_MMAP
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <string.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef HAVE_ZLIB_H
+#include <zlib.h>
+#endif
+
+#ifdef C_WINDOWS
+#include <io.h>
+#endif
+
+#include "clamav.h"
+#include "others.h"
+#include "mbox.h"
+#include "pdf.h"
+
+#ifdef CL_DEBUG
+/*#define SAVE_TMP /* Save the file being worked on in tmp */
+#endif
+
+static int try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx);
+static int flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx);
+static int ascii85decode(const char *buf, off_t len, unsigned char *output);
+static const char *pdf_nextlinestart(const char *ptr, size_t len);
+static const char *pdf_nextobject(const char *ptr, size_t len);
+static const char *cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns);
+
+/*
+ * TODO: handle embedded URLs if (options&CL_SCAN_MAILURL)
+ */
+int
+cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
+{
+ off_t size; /* total number of bytes in the file */
+ off_t bytesleft, trailerlength;
+ char *buf, *alloced; /* start of memory mapped area */
+ const char *p, *q, *trailerstart;
+ const char *xrefstart; /* cross reference table */
+ const struct cl_limits *limits;
+ /*size_t xreflength;*/
+ table_t *md5table;
+ int printed_predictor_message, printed_embedded_font_message, rc;
+ unsigned int files;
+ struct stat statb;
+
+ cli_dbgmsg("in cli_pdf(%s)\n", dir);
+
+ if(fstat(desc, &statb) < 0)
+ return CL_EOPEN;
+
+ size = statb.st_size;
+
+ if(size == 0)
+ return CL_CLEAN;
+
+ if(size <= 7) /* doesn't even include the file header */
+ return CL_EFORMAT;
+
+ p = buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, desc, 0);
+ if(buf == MAP_FAILED)
+ return CL_EMEM;
+
+ alloced = cli_malloc(size);
+ if(alloced) {
+ /*
+ * FIXME: now I have this, there's no need for the lack of
+ * support on systems without mmap, e.g. cygwin
+ */
+ memcpy(alloced, buf, size);
+ munmap(buf, size);
+ p = alloced;
+ }
+
+ cli_dbgmsg("cli_pdf: scanning %lu bytes\n", (unsigned long)size);
+
+ /* Lines are terminated by \r, \n or both */
+
+ /* File Header */
+ if(memcmp(p, "%PDF-1.", 7) != 0) {
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+ return CL_EFORMAT;
+ }
+
+#if 0
+ q = pdf_nextlinestart(&p[6], size - 6);
+ if(q == NULL) {
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+ return CL_EFORMAT;
+ }
+ bytesleft = size - (long)(q - p);
+ p = q;
+#else
+ p = &p[6];
+ bytesleft = size - 6;
+#endif
+
+ /* Find the file trailer */
+ for(q = &p[bytesleft - 6]; q > p; --q)
+ if(memcmp(q, "%%EOF", 5) == 0)
+ break;
+
+ if(q <= p) {
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+ return CL_EFORMAT;
+ }
+
+ for(trailerstart = &q[-7]; trailerstart > p; --trailerstart)
+ if(memcmp(trailerstart, "trailer", 7) == 0)
+ break;
+
+ /*
+ * q points to the end of the trailer section
+ */
+ trailerlength = (long)(q - trailerstart);
+ if(cli_pmemstr(trailerstart, trailerlength, "Encrypt", 7)) {
+ /*
+ * This tends to mean that the file is, in effect, read-only
+ */
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+ cli_warnmsg("Encrypted PDF files not yet supported\n");
+ return CL_EFORMAT;
+ }
+
+ /*
+ * not true, since edits may put data after the trailer
+ bytesleft -= trailerlength;
+ */
+
+ /*
+ * FIXME: Handle more than one xref section in the xref table
+ */
+ for(xrefstart = trailerstart; xrefstart > p; --xrefstart)
+ if(memcmp(xrefstart, "xref", 4) == 0)
+ /*
+ * Make sure it's the start of the line, not a startxref
+ * token
+ */
+ if((xrefstart[-1] == '\n') || (xrefstart[-1] == '\r'))
+ break;
+
+ if(xrefstart == p) {
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+ return CL_EFORMAT;
+ }
+
+ printed_predictor_message = printed_embedded_font_message = 0;
+
+ md5table = tableCreate();
+ /*
+ * not true, since edits may put data after the trailer
+ xreflength = (size_t)(trailerstart - xrefstart);
+ bytesleft -= xreflength;
+ */
+
+ rc = CL_CLEAN;
+ files = 0;
+ limits = ctx->limits;
+
+ /*
+ * The body section consists of a sequence of indirect objects
+ */
+ while((p < xrefstart) && (rc == CL_CLEAN) &&
+ ((q = pdf_nextobject(p, bytesleft)) != NULL)) {
+ int is_ascii85decode, is_flatedecode, fout, len, has_cr;
+ /*int object_number, generation_number;*/
+ const char *objstart, *objend, *streamstart, *streamend;
+ char *md5digest;
+ unsigned long length, objlen, real_streamlen, calculated_streamlen;
+ int is_embedded_font, predictor;
+ char fullname[NAME_MAX + 1];
+
+ if(q == xrefstart)
+ break;
+ if(memcmp(q, "xref", 4) == 0)
+ break;
+
+ /*object_number = atoi(q);*/
+ bytesleft -= (off_t)(q - p);
+ p = q;
+
+ if(memcmp(q, "endobj", 6) == 0)
+ continue;
+ if(!isdigit(*q)) {
+ cli_warnmsg("cli_pdf: Object number missing\n");
+ rc = CL_EFORMAT;
+ break;
+ }
+ q = pdf_nextobject(p, bytesleft);
+ if((q == NULL) || !isdigit(*q)) {
+ cli_warnmsg("cli_pdf: Generation number missing\n");
+ rc = CL_EFORMAT;
+ break;
+ }
+ /*generation_number = atoi(q);*/
+ bytesleft -= (off_t)(q - p);
+ p = q;
+
+ q = pdf_nextobject(p, bytesleft);
+ if((q == NULL) || (memcmp(q, "obj", 3) != 0)) {
+ cli_warnmsg("Indirect object missing \"obj\"\n");
+ rc = CL_EFORMAT;
+ break;
+ }
+
+ bytesleft -= (off_t)((q - p) + 3);
+ objstart = p = &q[3];
+ objend = cli_pmemstr(p, bytesleft, "endobj", 6);
+ if(objend == NULL) {
+ cli_dbgmsg("No matching endobj\n");
+ break;
+ }
+ bytesleft -= (off_t)((objend - p) + 6);
+ p = &objend[6];
+ objlen = (unsigned long)(objend - objstart);
+
+ /* Is this object a stream? */
+ streamstart = cli_pmemstr(objstart, objlen, "stream", 6);
+ if(streamstart == NULL)
+ continue;
+
+ is_embedded_font = length = is_ascii85decode =
+ is_flatedecode = 0;
+ predictor = 1;
+
+ /*
+ * TODO: handle F and FFilter?
+ */
+ q = objstart;
+ while(q < streamstart) {
+ if(*q == '/') { /* name object */
+ /*cli_dbgmsg("Name object %8.8s\n", q+1, q+1);*/
+ if(strncmp(++q, "Length ", 7) == 0) {
+ q += 7;
+ length = atoi(q);
+ while(isdigit(*q))
+ q++;
+ /*
+ * Note: incremental updates are not
+ * supported
+ */
+ if((bytesleft > 11) && strncmp(q, " 0 R", 4) == 0) {
+ const char *r;
+ char b[14];
+
+ q += 4;
+ cli_dbgmsg("Length is in indirect obj %ld\n",
+ length);
+ snprintf(b, sizeof(b),
+ "\n%ld 0 obj", length);
+ length = (unsigned long)strlen(b);
+ r = cli_pmemstr(alloced ? alloced : buf,
+ size, b, length);
+ if(r == NULL) {
+ b[0] = '\r';
+ r = cli_pmemstr(alloced ? alloced : buf,
+ size, b, length);
+ }
+ if(r) {
+ r += length - 1;
+ r = pdf_nextobject(r, bytesleft - (r - q));
+ if(r) {
+ length = atoi(r);
+ while(isdigit(*r))
+ r++;
+ cli_dbgmsg("length in '%s' %ld\n",
+ &b[1],
+ length);
+ }
+ } else
+ cli_warnmsg("Couldn't find '%s'\n",
+ &b[1]);
+ }
+ q--;
+ } else if(strncmp(q, "Length2 ", 8) == 0)
+ is_embedded_font = 1;
+ else if(strncmp(q, "Predictor ", 10) == 0) {
+ q += 10;
+ predictor = atoi(q);
+ while(isdigit(*q))
+ q++;
+ q--;
+ } else if(strncmp(q, "FlateDecode", 11) == 0) {
+ is_flatedecode = 1;
+ q += 11;
+ } else if(strncmp(q, "ASCII85Decode", 13) == 0) {
+ is_ascii85decode = 1;
+ q += 13;
+ }
+ }
+ q = pdf_nextobject(q, (size_t)(streamstart - q));
+ if(q == NULL)
+ break;
+ }
+
+ if(is_embedded_font) {
+ /*
+ * Need some documentation, the only I can find a
+ * reference to is not free, if some kind soul wishes
+ * to donate a copy, please contact me!
+ * (http://safari.adobepress.com/0321304748)
+ */
+ if(!printed_embedded_font_message) {
+ cli_dbgmsg("Embedded fonts not yet supported\n");
+ printed_embedded_font_message = 1;
+ }
+ continue;
+ }
+ if(predictor > 1) {
+ /*
+ * Needs some thought
+ */
+ if(!printed_predictor_message) {
+ cli_dbgmsg("Predictor %d not honoured for embedded image\n",
+ predictor);
+ printed_predictor_message = 1;
+ }
+ continue;
+ }
+
+ /* objend points to the end of the object (start of "endobj") */
+ streamstart += 6; /* go past the word "stream" */
+ len = (int)(objend - streamstart);
+ q = pdf_nextlinestart(streamstart, len);
+ if(q == NULL)
+ break;
+ len -= (int)(q - streamstart);
+ streamstart = q;
+ streamend = cli_pmemstr(streamstart, len, "endstream\n", 10);
+ if(streamend == NULL) {
+ streamend = cli_pmemstr(streamstart, len, "endstream\r", 10);
+ if(streamend == NULL) {
+ cli_dbgmsg("No endstream\n");
+ break;
+ }
+ has_cr = 1;
+ } else
+ has_cr = 0;
+ snprintf(fullname, sizeof(fullname), "%s/pdfXXXXXX", dir);
+#if defined(C_LINUX) || defined(C_BSD) || defined(HAVE_MKSTEMP) || defined(C_SOLARIS) || defined(C_CYGWIN)
+ fout = mkstemp(fullname);
+#elif defined(C_WINDOWS)
+ if(_mktemp(fullname) == NULL) {
+ /* mktemp only allows 26 files */
+ char *name = cli_gentemp(dir);
+ if(name == NULL)
+ fout = -1;
+ else {
+ strcpy(fullname, name);
+ free(name);
+ fout = open(fullname,
+ O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+ }
+ } else
+ fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#else
+ mktemp(fullname);
+ fout = open(fullname, O_WRONLY|O_CREAT|O_EXCL|O_TRUNC|O_BINARY, 0600);
+#endif
+
+ if(fout < 0) {
+ cli_errmsg("cli_pdf: can't create temporary file %s: %s\n", fullname, strerror(errno));
+ rc = CL_ETMPFILE;
+ break;
+ }
+
+ /*
+ * Calculate the length ourself, the Length parameter is often
+ * wrong
+ */
+ if((*--streamend != '\n') && (*streamend != '\r'))
+ streamend++;
+ else if(has_cr && (*--streamend != '\r'))
+ streamend++;
+
+ if(streamend <= streamstart) {
+ close(fout);
+ cli_dbgmsg("Empty stream\n");
+ unlink(fullname);
+ continue;
+ }
+ calculated_streamlen = (int)(streamend - streamstart);
+ real_streamlen = length;
+
+ if(calculated_streamlen != real_streamlen)
+ cli_dbgmsg("cli_pdf: Incorrect Length field in file attempting to recover\n");
+
+ cli_dbgmsg("length %ld, calculated_streamlen %ld isFlate %d isASCII85 %d\n",
+ length, calculated_streamlen,
+ is_flatedecode, is_ascii85decode);
+
+#if 0
+ /* FIXME: this isn't right... */
+ if(length)
+ /*streamlen = (is_flatedecode) ? length : MIN(length, streamlen);*/
+ streamlen = MIN(length, streamlen);
+#endif
+
+ if(is_ascii85decode) {
+ unsigned char *tmpbuf = cli_malloc(calculated_streamlen * 5);
+ int ret;
+
+ if(tmpbuf == NULL) {
+ close(fout);
+ unlink(fullname);
+ rc = CL_EMEM;
+ continue;
+ }
+
+ ret = ascii85decode(streamstart, calculated_streamlen, tmpbuf);
+
+ if(ret == -1) {
+ free(tmpbuf);
+ close(fout);
+ unlink(fullname);
+ rc = CL_EFORMAT;
+ continue;
+ }
+ if(ret) {
+ unsigned char *t;
+
+ real_streamlen = ret;
+ /* free unused trailing bytes */
+ t = (unsigned char *)cli_realloc(tmpbuf,
+ calculated_streamlen);
+ if(t == NULL) {
+ free(tmpbuf);
+ close(fout);
+ unlink(fullname);
+ rc = CL_EMEM;
+ continue;
+ }
+ tmpbuf = t;
+ /*
+ * Note that it will probably be both
+ * ascii85encoded and flateencoded
+ */
+ if(is_flatedecode)
+ rc = try_flatedecode((unsigned char *)tmpbuf, real_streamlen, real_streamlen, fout, ctx);
+ else
+ cli_writen(fout, (const char *)streamstart, real_streamlen);
+ }
+ free(tmpbuf);
+ } else if(is_flatedecode)
+ rc = try_flatedecode((unsigned char *)streamstart, real_streamlen, calculated_streamlen, fout, ctx);
+
+ else {
+ cli_dbgmsg("cli_pdf: writing %lu bytes from the stream\n",
+ (unsigned long)real_streamlen);
+ cli_writen(fout, (const char *)streamstart, real_streamlen);
+ }
+
+ close(fout);
+ md5digest = cli_md5file(fullname);
+ if(tableFind(md5table, md5digest) >= 0) {
+ cli_dbgmsg("cli_pdf: not scanning duplicate embedded file '%s'\n", fullname);
+ unlink(fullname);
+ } else
+ tableInsert(md5table, md5digest, 1);
+ free(md5digest);
+ cli_dbgmsg("cli_pdf: extracted file %d to %s\n", ++files,
+ fullname);
+ if(limits && limits->maxfiles && (files >= limits->maxfiles)) {
+ /* Bug 698 */
+ cli_dbgmsg("cli_pdf: number of files exceeded %u\n", limits->maxfiles);
+ rc = CL_EMAXFILES;
+ }
+ }
+
+ if(alloced)
+ free(alloced);
+ else
+ munmap(buf, size);
+
+ tableDestroy(md5table);
+
+ cli_dbgmsg("cli_pdf: returning %d\n", rc);
+ return rc;
+}
+
+/*
+ * flate inflation - returns clamAV status, e.g CL_SUCCESS, CL_EZIP
+ */
+static int
+try_flatedecode(unsigned char *buf, off_t real_len, off_t calculated_len, int fout, const cli_ctx *ctx)
+{
+ int ret = flatedecode(buf, real_len, fout, ctx);
+
+ if(ret == CL_SUCCESS)
+ return CL_SUCCESS;
+
+ if(real_len == calculated_len) {
+ /*
+ * Nothing more we can do to inflate
+ */
+ cli_warnmsg("Bad compression in flate stream\n");
+ return (ret == CL_SUCCESS) ? CL_EFORMAT : ret;
+ }
+
+ ret = flatedecode(buf, calculated_len, fout, ctx);
+ if(ret == CL_SUCCESS)
+ return CL_SUCCESS;
+
+ /* i.e. the PDF file is broken :-( */
+ cli_warnmsg("cli_pdf: Bad compressed block length in flate stream\n");
+
+ return ret;
+}
+
+static int
+flatedecode(unsigned char *buf, off_t len, int fout, const cli_ctx *ctx)
+{
+ int zstat;
+ off_t nbytes;
+ z_stream stream;
+ unsigned char output[BUFSIZ];
+#ifdef SAVE_TMP
+ char tmpfilename[16];
+ int tmpfd;
+#endif
+
+ cli_dbgmsg("cli_pdf: flatedecode %lu bytes\n", (unsigned long)len);
+
+ if(len == 0) {
+ cli_warnmsg("cli_pdf: flatedecode len == 0\n");
+ return CL_CLEAN;
+ }
+
+#ifdef SAVE_TMP
+ /*
+ * Copy the embedded area for debugging, so that if it falls over
+ * we have a copy of the offending data. This is debugging code
+ * that you shouldn't of course install in a live environment. I am
+ * not interested in hearing about security issues with this section
+ * of the parser.
+ */
+ strcpy(tmpfilename, "/tmp/pdfXXXXXX");
+ tmpfd = mkstemp(tmpfilename);
+ if(tmpfd < 0) {
+ perror(tmpfilename);
+ cli_errmsg("Can't make debugging file\n");
+ } else {
+ FILE *tmpfp = fdopen(tmpfd, "w");
+
+ if(tmpfp) {
+ fwrite(buf, sizeof(char), len, tmpfp);
+ fclose(tmpfp);
+ cli_dbgmsg("cli_pdf: flatedecode: debugging file is %s\n",
+ tmpfilename);
+ } else
+ cli_errmsg("cli_pdf: can't fdopen debugging file\n");
+ }
+#endif
+ stream.zalloc = (alloc_func)Z_NULL;
+ stream.zfree = (free_func)Z_NULL;
+ stream.opaque = (void *)NULL;
+ stream.next_in = (Bytef *)buf;
+ stream.avail_in = len;
+ stream.next_out = output;
+ stream.avail_out = sizeof(output);
+
+ zstat = inflateInit(&stream);
+ if(zstat != Z_OK) {
+ cli_warnmsg("cli_pdf: inflateInit failed");
+ return CL_EZIP;
+ }
+
+ nbytes = 0;
+
+ while(stream.avail_in) {
+ zstat = inflate(&stream, Z_NO_FLUSH); /* zlib */
+ switch(zstat) {
+ case Z_OK:
+ if(stream.avail_out == 0) {
+
+ nbytes += cli_writen(fout, output, sizeof(output));
+
+ if(ctx->limits &&
+ ctx->limits->maxfilesize &&
+ (nbytes > (off_t) ctx->limits->maxfilesize)) {
+ cli_dbgmsg("cli_pdf: flatedecode size exceeded (%lu)\n",
+ (unsigned long)nbytes);
+ inflateEnd(&stream);
+ if(BLOCKMAX) {
+ *ctx->virname = "PDF.ExceededFileSize";
+ return CL_VIRUS;
+ }
+ return CL_EZIP;
+ }
+ stream.next_out = output;
+ stream.avail_out = sizeof(output);
+ }
+ continue;
+ case Z_STREAM_END:
+ break;
+ default:
+ if(stream.msg)
+ cli_dbgmsg("pdf: after writing %lu bytes, got error \"%s\" inflating PDF attachment\n",
+ (unsigned long)nbytes,
+ stream.msg);
+ else
+ cli_dbgmsg("pdf: after writing %lu bytes, got error %d inflating PDF attachment\n",
+ (unsigned long)nbytes, zstat);
+ inflateEnd(&stream);
+ return (zstat == Z_OK) ? CL_SUCCESS : CL_EZIP;
+ }
+ break;
+ }
+
+ if(stream.avail_out != sizeof(output))
+ if(cli_writen(fout, output, sizeof(output) - stream.avail_out) < 0)
+ return CL_EIO;
+
+ /*
+ * On BSD systems total_in and total_out are "long long", so these
+ * numbers could (in theory) get truncated in the debug statement
+ */
+ cli_dbgmsg("cli_pdf: flatedecode in=%lu out=%lu ratio %lu (max %u)\n",
+ (unsigned long)stream.total_in, (unsigned long)stream.total_out,
+ (unsigned long)(stream.total_out / stream.total_in),
+ ctx->limits ? ctx->limits->maxratio : 0);
+
+ if(ctx->limits &&
+ ctx->limits->maxratio &&
+ ((stream.total_out / stream.total_in) > ctx->limits->maxratio)) {
+ cli_dbgmsg("cli_pdf: flatedecode Max ratio reached\n");
+ inflateEnd(&stream);
+ if(BLOCKMAX) {
+ *ctx->virname = "Oversized.PDF";
+ return CL_VIRUS;
+ }
+ return CL_EZIP;
+ }
+
+#ifdef SAVE_TMP
+ unlink(tmpfilename);
+#endif
+ return inflateEnd(&stream) == Z_OK ? CL_SUCCESS : CL_EZIP;
+}
+
+/*
+ * ascii85 inflation, returns number of bytes in output, -1 for error
+ *
+ * See http://www.piclist.com/techref/method/encode.htm (look for base85)
+ */
+static int
+ascii85decode(const char *buf, off_t len, unsigned char *output)
+{
+ const char *ptr;
+ uint32_t sum = 0;
+ int quintet = 0;
+ int ret = 0;
+
+ if(cli_pmemstr(buf, len, "~>", 2) == NULL)
+ cli_warnmsg("ascii85decode: no EOF marker found\n");
+
+ ptr = buf;
+
+ cli_dbgmsg("cli_pdf: ascii85decode %lu bytes\n", (unsigned long)len);
+
+ while(len > 0) {
+ int byte = (len--) ? (int)*ptr++ : EOF;
+
+ if((byte == '~') && (*ptr == '>'))
+ byte = EOF;
+
+ if(byte >= '!' && byte <= 'u') {
+ sum = (sum * 85) + ((uint32_t)byte - '!');
+ if(++quintet == 5) {
+ *output++ = (unsigned char)(sum >> 24);
+ *output++ = (unsigned char)((sum >> 16) & 0xFF);
+ *output++ = (unsigned char)((sum >> 8) & 0xFF);
+ *output++ = (unsigned char)(sum & 0xFF);
+ ret += 4;
+ quintet = 0;
+ sum = 0;
+ }
+ } else if(byte == 'z') {
+ if(quintet) {
+ cli_warnmsg("ascii85decode: unexpected 'z'\n");
+ return -1;
+ }
+ *output++ = '\0';
+ *output++ = '\0';
+ *output++ = '\0';
+ *output++ = '\0';
+ ret += 4;
+ } else if(byte == EOF) {
+ cli_dbgmsg("ascii85decode: quintet %d\n", quintet);
+ if(quintet) {
+ int i;
+
+ if(quintet == 1) {
+ cli_warnmsg("ascii85Decode: only 1 byte in last quintet\n");
+ return -1;
+ }
+ for(i = quintet; i < 5; i++)
+ sum *= 85;
+
+ if(quintet > 1)
+ sum += (0xFFFFFF >> ((quintet - 2) * 8));
+ ret += quintet;
+ for(i = 0; i < quintet - 1; i++)
+ *output++ = (unsigned char)((sum >> (24 - 8 * i)) & 0xFF);
+ quintet = 0;
+ }
+ len = 0;
+ break;
+ } else if(!isspace(byte)) {
+ cli_warnmsg("ascii85Decode: invalid character 0x%x, len %lu\n",
+ byte & 0xFF, (unsigned long)len);
+ return -1;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Find the start of the next line
+ */
+static const char *
+pdf_nextlinestart(const char *ptr, size_t len)
+{
+ while(strchr("\r\n", *ptr) == NULL) {
+ if(--len == 0L)
+ return NULL;
+ ptr++;
+ }
+ while(strchr("\r\n", *ptr) != NULL) {
+ if(--len == 0L)
+ return NULL;
+ ptr++;
+ }
+ return ptr;
+}
+
+/*
+ * Return the start of the next PDF object.
+ * This assumes that we're not in a stream.
+ */
+static const char *
+pdf_nextobject(const char *ptr, size_t len)
+{
+ const char *p;
+ int inobject = 1;
+
+ while(len) {
+ switch(*ptr) {
+ case '\n':
+ case '\r':
+ case '%': /* comment */
+ p = pdf_nextlinestart(ptr, len);
+ if(p == NULL)
+ return NULL;
+ len -= (size_t)(p - ptr);
+ ptr = p;
+ inobject = 0;
+ break;
+
+ case ' ':
+ case '\t':
+ case '[': /* Start of an array object */
+ case '\v':
+ case '\f':
+ case '<': /* Start of a dictionary object */
+ inobject = 0;
+ ptr++;
+ len--;
+ break;
+ case '/': /* Start of a name object */
+ return ptr;
+ default:
+ if(!inobject)
+ /* TODO: parse and return object type */
+ return ptr;
+ ptr++;
+ len--;
+ }
+ }
+ return NULL;
+}
+
+/*
+ * like cli_memstr - but returns the location of the match
+ * FIXME: need a case insensitive version
+ */
+static const char *
+cli_pmemstr(const char *haystack, size_t hs, const char *needle, size_t ns)
+{
+ const char *pt, *hay;
+ size_t n;
+
+ if(haystack == needle)
+ return haystack;
+
+ if(hs < ns)
+ return NULL;
+
+ if(memcmp(haystack, needle, ns) == 0)
+ return haystack;
+
+ pt = hay = haystack;
+ n = hs;
+
+ while((pt = memchr(hay, needle[0], n)) != NULL) {
+ n -= (size_t)(pt - hay);
+ if(n < ns)
+ break;
+
+ if(memcmp(pt, needle, ns) == 0)
+ return pt;
+
+ if(hay == pt) {
+ n--;
+ hay++;
+ } else
+ hay = pt;
+ }
+
+ return NULL;
+}
+#else /*!HAVE_MMAP*/
+
+#include "clamav.h"
+#include "others.h"
+#include "pdf.h"
+
+int
+cli_pdf(const char *dir, int desc, const cli_ctx *ctx)
+{
+ cli_warnmsg("File not decoded - PDF decoding needs mmap() (for now)\n");
+ return CL_CLEAN;
+}
+#endif
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pe.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,2232 @@
+/*
+ * Copyright (C) 2004 - 2006 Tomasz Kojm <tkojm at clamav.net>
+ * aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <time.h>
+#include <stdarg.h>
+
+#include "cltypes.h"
+#include "clamav.h"
+#include "others.h"
+#include "pe.h"
+#include "petite.h"
+#include "fsg.h"
+#include "spin.h"
+#include "upx.h"
+#include "yc.h"
+#include "aspack.h"
+#include "wwunpack.h"
+#include "unsp.h"
+#include "scanners.h"
+#include "str.h"
+#include "execs.h"
+#include "md5.h"
+#include "mew.h"
+#include "upack.h"
+#include "matcher.h"
+#include "matcher-bm.h"
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+
+#define DCONF ctx->dconf->pe
+
+#define IMAGE_DOS_SIGNATURE 0x5a4d /* MZ */
+#define IMAGE_DOS_SIGNATURE_OLD 0x4d5a /* ZM */
+#define IMAGE_NT_SIGNATURE 0x00004550
+#define PE32_SIGNATURE 0x010b
+#define PE32P_SIGNATURE 0x020b
+
+#define optional_hdr64 pe_opt.opt64
+#define optional_hdr32 pe_opt.opt32
+
+#define UPX_NRV2B "\x11\xdb\x11\xc9\x01\xdb\x75\x07\x8b\x1e\x83\xee\xfc\x11\xdb\x11\xc9\x11\xc9\x75\x20\x41\x01\xdb"
+#define UPX_NRV2D "\x83\xf0\xff\x74\x78\xd1\xf8\x89\xc5\xeb\x0b\x01\xdb\x75\x07\x8b\x1e\x83\xee\xfc\x11\xdb\x11\xc9"
+#define UPX_NRV2E "\xeb\x52\x31\xc9\x83\xe8\x03\x72\x11\xc1\xe0\x08\x8a\x06\x46\x83\xf0\xff\x74\x75\xd1\xf8\x89\xc5"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x)
+/* lower and upper bondary alignment (size vs offset) */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+#define CLI_UNPSIZELIMITS(NAME,CHK) \
+if(ctx->limits && ctx->limits->maxfilesize && (CHK) > ctx->limits->maxfilesize) { \
+ cli_dbgmsg(NAME": Sizes exceeded (%lu > %lu)\n", (CHK), ctx->limits->maxfilesize); \
+ free(exe_sections); \
+ if(BLOCKMAX) { \
+ *ctx->virname = "PE."NAME".ExceededFileSize"; \
+ return CL_VIRUS; \
+ } else { \
+ return CL_CLEAN; \
+ } \
+}
+
+#define CLI_UNPTEMP(NAME,FREEME) \
+if(!(tempfile = cli_gentemp(NULL))) { \
+ cli_multifree FREEME; \
+ return CL_EMEM; \
+} \
+if((ndesc = open(tempfile, O_RDWR|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU)) < 0) { \
+ cli_dbgmsg(NAME": Can't create file %s\n", tempfile); \
+ free(tempfile); \
+ cli_multifree FREEME; \
+ return CL_EIO; \
+}
+
+#define CLI_TMPUNLK() if(!cli_leavetemps_flag) unlink(tempfile)
+
+#define FSGCASE(NAME,FREESEC) \
+ case 0: /* Unpacked and NOT rebuilt */ \
+ cli_dbgmsg(NAME": Successfully decompressed\n"); \
+ close(ndesc); \
+ unlink(tempfile); \
+ free(tempfile); \
+ FREESEC; \
+ found = 0; \
+ upx_success = 1; \
+ break; /* FSG ONLY! - scan raw data after upx block */
+
+#define SPINCASE() \
+ case 2: \
+ free(spinned); \
+ close(ndesc); \
+ unlink(tempfile); \
+ cli_dbgmsg("PESpin: Size exceeded\n"); \
+ if(BLOCKMAX) { \
+ free(tempfile); \
+ free(exe_sections); \
+ *ctx->virname = "PE.Pespin.ExceededFileSize"; \
+ return CL_VIRUS; \
+ } \
+ free(tempfile); \
+ break; \
+
+#define CLI_UNPRESULTS_(NAME,FSGSTUFF,EXPR,GOOD,FREEME) \
+ switch(EXPR) { \
+ case GOOD: /* Unpacked and rebuilt */ \
+ if(cli_leavetemps_flag) \
+ cli_dbgmsg(NAME": Unpacked and rebuilt executable saved in %s\n", tempfile); \
+ else \
+ cli_dbgmsg(NAME": Unpacked and rebuilt executable\n"); \
+ cli_multifree FREEME; \
+ free(exe_sections); \
+ fsync(ndesc); \
+ lseek(ndesc, 0, SEEK_SET); \
+ cli_dbgmsg("***** Scanning rebuilt PE file *****\n"); \
+ if(cli_magic_scandesc(ndesc, ctx) == CL_VIRUS) { \
+ close(ndesc); \
+ CLI_TMPUNLK(); \
+ free(tempfile); \
+ return CL_VIRUS; \
+ } \
+ close(ndesc); \
+ CLI_TMPUNLK(); \
+ free(tempfile); \
+ return CL_CLEAN; \
+\
+FSGSTUFF; \
+\
+ default: \
+ cli_dbgmsg(NAME": Unpacking failed\n"); \
+ close(ndesc); \
+ unlink(tempfile); \
+ cli_multifree FREEME; \
+ free(tempfile); \
+ }
+
+
+#define CLI_UNPRESULTS(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,NULL,EXPR,GOOD,FREEME)
+#define CLI_UNPRESULTSFSG1(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,FSGCASE(NAME,free(sections)),EXPR,GOOD,FREEME)
+#define CLI_UNPRESULTSFSG2(NAME,EXPR,GOOD,FREEME) CLI_UNPRESULTS_(NAME,FSGCASE(NAME,NULL),EXPR,GOOD,FREEME)
+
+struct offset_list {
+ uint32_t offset;
+ struct offset_list *next;
+};
+
+static void cli_multifree(void *f, ...) {
+ void *ff;
+ va_list ap;
+ free(f);
+ va_start(ap, f);
+ while((ff=va_arg(ap, void*))) free(ff);
+ va_end(ap);
+}
+
+static uint32_t cli_rawaddr(uint32_t rva, struct cli_exe_section *shp, uint16_t nos, unsigned int *err, size_t fsize, uint32_t hdr_size)
+{
+ int i, found = 0;
+ uint32_t ret;
+
+ if (rva<hdr_size) { /* Out of section EP - mapped to imagebase+rva */
+ if (rva >= fsize) {
+ *err=1;
+ return 0;
+ }
+ *err=0;
+ return rva;
+ }
+
+ for(i = nos-1; i >= 0; i--) {
+ if(shp[i].rsz && shp[i].rva <= rva && shp[i].rsz > rva - shp[i].rva) {
+ found = 1;
+ break;
+ }
+ }
+
+ if(!found) {
+ *err = 1;
+ return 0;
+ }
+
+ ret = rva - shp[i].rva + shp[i].raw;
+ *err = 0;
+ return ret;
+}
+
+
+/*
+static int cli_ddump(int desc, int offset, int size, const char *file) {
+ int pos, ndesc, bread, sum = 0;
+ char buff[FILEBUFF];
+
+
+ cli_dbgmsg("in ddump()\n");
+
+ if((pos = lseek(desc, 0, SEEK_CUR)) == -1) {
+ cli_dbgmsg("Invalid descriptor\n");
+ return -1;
+ }
+
+ if(lseek(desc, offset, SEEK_SET) == -1) {
+ cli_dbgmsg("lseek() failed\n");
+ lseek(desc, pos, SEEK_SET);
+ return -1;
+ }
+
+ if((ndesc = open(file, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, S_IRWXU)) < 0) {
+ cli_dbgmsg("Can't create file %s\n", file);
+ lseek(desc, pos, SEEK_SET);
+ return -1;
+ }
+
+ while((bread = cli_readn(desc, buff, FILEBUFF)) > 0) {
+ if(sum + bread >= size) {
+ if(write(ndesc, buff, size - sum) == -1) {
+ cli_dbgmsg("Can't write to file\n");
+ lseek(desc, pos, SEEK_SET);
+ close(ndesc);
+ unlink(file);
+ return -1;
+ }
+ break;
+ } else {
+ if(write(ndesc, buff, bread) == -1) {
+ cli_dbgmsg("Can't write to file\n");
+ lseek(desc, pos, SEEK_SET);
+ close(ndesc);
+ unlink(file);
+ return -1;
+ }
+ }
+ sum += bread;
+ }
+
+ close(ndesc);
+ lseek(desc, pos, SEEK_SET);
+ return 0;
+}
+*/
+
+static off_t cli_seeksect(int fd, struct cli_exe_section *s) {
+ off_t ret;
+
+ if(!s->rsz) return 0;
+ if((ret=lseek(fd, s->raw, SEEK_SET)) == -1)
+ cli_dbgmsg("cli_seeksect: lseek() failed\n");
+ return ret+1;
+}
+
+static unsigned int cli_md5sect(int fd, struct cli_exe_section *s, unsigned char *digest) {
+ void *hashme;
+ cli_md5_ctx md5;
+
+ if (s->rsz > CLI_MAX_ALLOCATION) {
+ cli_dbgmsg("cli_md5sect: skipping md5 calculation for too big section\n");
+ return 0;
+ }
+
+ if(!cli_seeksect(fd, s)) return 0;
+
+ if(!(hashme=cli_malloc(s->rsz))) {
+ cli_dbgmsg("cli_md5sect: out of memory\n");
+ return 0;
+ }
+
+ if(cli_readn(fd, hashme, s->rsz)!=s->rsz) {
+ cli_dbgmsg("cli_md5sect: unable to read section data\n");
+ return 0;
+ }
+
+ cli_md5_init(&md5);
+ cli_md5_update(&md5, hashme, s->rsz);
+ free(hashme);
+ cli_md5_final(digest, &md5);
+ return 1;
+}
+
+int cli_scanpe(int desc, cli_ctx *ctx)
+{
+ uint16_t e_magic; /* DOS signature ("MZ") */
+ uint16_t nsections;
+ uint32_t e_lfanew; /* address of new exe header */
+ uint32_t ep, vep; /* entry point (raw, virtual) */
+ uint8_t polipos = 0;
+ time_t timestamp;
+ struct pe_image_file_hdr file_hdr;
+ union {
+ struct pe_image_optional_hdr64 opt64;
+ struct pe_image_optional_hdr32 opt32;
+ } pe_opt;
+ struct pe_image_section_hdr *section_hdr;
+ struct stat sb;
+ char sname[9], buff[4096], epbuff[4096], *tempfile;
+ uint32_t epsize;
+ ssize_t bytes;
+ unsigned int i, found, upx_success = 0, min = 0, max = 0, err;
+ unsigned int ssize = 0, dsize = 0, dll = 0, pe_plus = 0;
+ int (*upxfn)(char *, uint32_t, char *, uint32_t *, uint32_t, uint32_t, uint32_t) = NULL;
+ char *src = NULL, *dest = NULL;
+ int ndesc, ret = CL_CLEAN, upack = 0, native=0;
+ size_t fsize;
+ uint32_t valign, falign, hdr_size, j;
+ struct cli_exe_section *exe_sections;
+ struct cli_matcher *md5_sect;
+
+
+ if(!ctx) {
+ cli_errmsg("cli_scanpe: ctx == NULL\n");
+ return CL_ENULLARG;
+ }
+
+ if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) {
+ cli_dbgmsg("Can't read DOS signature\n");
+ return CL_CLEAN;
+ }
+
+ if(EC16(e_magic) != IMAGE_DOS_SIGNATURE && EC16(e_magic) != IMAGE_DOS_SIGNATURE_OLD) {
+ cli_dbgmsg("Invalid DOS signature\n");
+ return CL_CLEAN;
+ }
+
+ lseek(desc, 58, SEEK_CUR); /* skip to the end of the DOS header */
+
+ if(cli_readn(desc, &e_lfanew, sizeof(e_lfanew)) != sizeof(e_lfanew)) {
+ cli_dbgmsg("Can't read new header address\n");
+ /* truncated header? */
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ e_lfanew = EC32(e_lfanew);
+ cli_dbgmsg("e_lfanew == %d\n", e_lfanew);
+ if(!e_lfanew) {
+ cli_dbgmsg("Not a PE file\n");
+ return CL_CLEAN;
+ }
+
+ if(lseek(desc, e_lfanew, SEEK_SET) < 0) {
+ /* probably not a PE file */
+ cli_dbgmsg("Can't lseek to e_lfanew\n");
+ return CL_CLEAN;
+ }
+
+ if(cli_readn(desc, &file_hdr, sizeof(struct pe_image_file_hdr)) != sizeof(struct pe_image_file_hdr)) {
+ /* bad information in e_lfanew - probably not a PE file */
+ cli_dbgmsg("Can't read file header\n");
+ return CL_CLEAN;
+ }
+
+ if(EC32(file_hdr.Magic) != IMAGE_NT_SIGNATURE) {
+ cli_dbgmsg("Invalid PE signature (probably NE file)\n");
+ return CL_CLEAN;
+ }
+
+ if(EC16(file_hdr.Characteristics) & 0x2000) {
+ cli_dbgmsg("File type: DLL\n");
+ dll = 1;
+ } else if(EC16(file_hdr.Characteristics) & 0x01) {
+ cli_dbgmsg("File type: Executable\n");
+ }
+
+ switch(EC16(file_hdr.Machine)) {
+ case 0x0:
+ cli_dbgmsg("Machine type: Unknown\n");
+ break;
+ case 0x14c:
+ cli_dbgmsg("Machine type: 80386\n");
+ break;
+ case 0x14d:
+ cli_dbgmsg("Machine type: 80486\n");
+ break;
+ case 0x14e:
+ cli_dbgmsg("Machine type: 80586\n");
+ break;
+ case 0x160:
+ cli_dbgmsg("Machine type: R30000 (big-endian)\n");
+ break;
+ case 0x162:
+ cli_dbgmsg("Machine type: R3000\n");
+ break;
+ case 0x166:
+ cli_dbgmsg("Machine type: R4000\n");
+ break;
+ case 0x168:
+ cli_dbgmsg("Machine type: R10000\n");
+ break;
+ case 0x184:
+ cli_dbgmsg("Machine type: DEC Alpha AXP\n");
+ break;
+ case 0x284:
+ cli_dbgmsg("Machine type: DEC Alpha AXP 64bit\n");
+ break;
+ case 0x1f0:
+ cli_dbgmsg("Machine type: PowerPC\n");
+ break;
+ case 0x200:
+ cli_dbgmsg("Machine type: IA64\n");
+ break;
+ case 0x268:
+ cli_dbgmsg("Machine type: M68k\n");
+ break;
+ case 0x266:
+ cli_dbgmsg("Machine type: MIPS16\n");
+ break;
+ case 0x366:
+ cli_dbgmsg("Machine type: MIPS+FPU\n");
+ break;
+ case 0x466:
+ cli_dbgmsg("Machine type: MIPS16+FPU\n");
+ break;
+ case 0x1a2:
+ cli_dbgmsg("Machine type: Hitachi SH3\n");
+ break;
+ case 0x1a3:
+ cli_dbgmsg("Machine type: Hitachi SH3-DSP\n");
+ break;
+ case 0x1a4:
+ cli_dbgmsg("Machine type: Hitachi SH3-E\n");
+ break;
+ case 0x1a6:
+ cli_dbgmsg("Machine type: Hitachi SH4\n");
+ break;
+ case 0x1a8:
+ cli_dbgmsg("Machine type: Hitachi SH5\n");
+ break;
+ case 0x1c0:
+ cli_dbgmsg("Machine type: ARM\n");
+ break;
+ case 0x1c2:
+ cli_dbgmsg("Machine type: THUMB\n");
+ break;
+ case 0x1d3:
+ cli_dbgmsg("Machine type: AM33\n");
+ break;
+ case 0x520:
+ cli_dbgmsg("Machine type: Infineon TriCore\n");
+ break;
+ case 0xcef:
+ cli_dbgmsg("Machine type: CEF\n");
+ break;
+ case 0xebc:
+ cli_dbgmsg("Machine type: EFI Byte Code\n");
+ break;
+ case 0x9041:
+ cli_dbgmsg("Machine type: M32R\n");
+ break;
+ case 0xc0ee:
+ cli_dbgmsg("Machine type: CEE\n");
+ break;
+ case 0x8664:
+ cli_dbgmsg("Machine type: AMD64\n");
+ break;
+ default:
+ cli_warnmsg("Unknown machine type in PE header (0x%x)\n", EC16(file_hdr.Machine));
+ }
+
+ nsections = EC16(file_hdr.NumberOfSections);
+ if(nsections < 1 || nsections > 96) {
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ if(nsections)
+ cli_warnmsg("PE file contains %d sections\n", nsections);
+ else
+ cli_warnmsg("PE file contains no sections\n");
+ return CL_CLEAN;
+ }
+ cli_dbgmsg("NumberOfSections: %d\n", nsections);
+
+ timestamp = (time_t) EC32(file_hdr.TimeDateStamp);
+ cli_dbgmsg("TimeDateStamp: %s", ctime(×tamp));
+
+ cli_dbgmsg("SizeOfOptionalHeader: %x\n", EC16(file_hdr.SizeOfOptionalHeader));
+
+ if (EC16(file_hdr.SizeOfOptionalHeader) < sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("SizeOfOptionalHeader too small\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ if(cli_readn(desc, &optional_hdr32, sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("Can't read optional file header\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ /* This will be a chicken and egg problem until we drop 9x */
+ if(EC32(optional_hdr64.Magic)==PE32P_SIGNATURE) {
+ if(EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr64)) {
+ /* FIXME: need to play around a bit more with xp64 */
+ cli_dbgmsg("Incorrect SizeOfOptionalHeader for PE32+\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+ pe_plus = 1;
+ } else {
+ /*
+ either it's got a PE32_SIGNATURE or
+ we enable win9x compatibility in that we don't honor magic (see bb#119)
+ either way it's a 32bit thingy
+ */
+ if(EC16(optional_hdr32.Magic) != PE32_SIGNATURE) {
+ cli_warnmsg("Incorrect magic number in optional header\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ cli_dbgmsg("9x compatibility mode\n");
+ }
+ }
+
+ if(!pe_plus) { /* PE */
+ if (EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr32)) {
+ /* Seek to the end of the long header */
+ lseek(desc, (EC16(file_hdr.SizeOfOptionalHeader)-sizeof(struct pe_image_optional_hdr32)), SEEK_CUR);
+ }
+
+ if(DCONF & PE_CONF_UPACK)
+ upack = (EC16(file_hdr.SizeOfOptionalHeader)==0x148);
+
+ vep = EC32(optional_hdr32.AddressOfEntryPoint);
+ hdr_size = EC32(optional_hdr32.SizeOfHeaders);
+ cli_dbgmsg("File format: PE\n");
+
+ cli_dbgmsg("MajorLinkerVersion: %d\n", optional_hdr32.MajorLinkerVersion);
+ cli_dbgmsg("MinorLinkerVersion: %d\n", optional_hdr32.MinorLinkerVersion);
+ cli_dbgmsg("SizeOfCode: 0x%x\n", EC32(optional_hdr32.SizeOfCode));
+ cli_dbgmsg("SizeOfInitializedData: 0x%x\n", EC32(optional_hdr32.SizeOfInitializedData));
+ cli_dbgmsg("SizeOfUninitializedData: 0x%x\n", EC32(optional_hdr32.SizeOfUninitializedData));
+ cli_dbgmsg("AddressOfEntryPoint: 0x%x\n", vep);
+ cli_dbgmsg("BaseOfCode: 0x%x\n", EC32(optional_hdr32.BaseOfCode));
+ cli_dbgmsg("SectionAlignment: 0x%x\n", EC32(optional_hdr32.SectionAlignment));
+ cli_dbgmsg("FileAlignment: 0x%x\n", EC32(optional_hdr32.FileAlignment));
+ cli_dbgmsg("MajorSubsystemVersion: %d\n", EC16(optional_hdr32.MajorSubsystemVersion));
+ cli_dbgmsg("MinorSubsystemVersion: %d\n", EC16(optional_hdr32.MinorSubsystemVersion));
+ cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr32.SizeOfImage));
+ cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
+ cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr32.NumberOfRvaAndSizes));
+
+ } else { /* PE+ */
+ /* read the remaining part of the header */
+ if(cli_readn(desc, &optional_hdr32 + 1, sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("Can't read optional file header\n");
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ vep = EC32(optional_hdr64.AddressOfEntryPoint);
+ hdr_size = EC32(optional_hdr64.SizeOfHeaders);
+ cli_dbgmsg("File format: PE32+\n");
+
+ cli_dbgmsg("MajorLinkerVersion: %d\n", optional_hdr64.MajorLinkerVersion);
+ cli_dbgmsg("MinorLinkerVersion: %d\n", optional_hdr64.MinorLinkerVersion);
+ cli_dbgmsg("SizeOfCode: 0x%x\n", EC32(optional_hdr64.SizeOfCode));
+ cli_dbgmsg("SizeOfInitializedData: 0x%x\n", EC32(optional_hdr64.SizeOfInitializedData));
+ cli_dbgmsg("SizeOfUninitializedData: 0x%x\n", EC32(optional_hdr64.SizeOfUninitializedData));
+ cli_dbgmsg("AddressOfEntryPoint: 0x%x\n", vep);
+ cli_dbgmsg("BaseOfCode: 0x%x\n", EC32(optional_hdr64.BaseOfCode));
+ cli_dbgmsg("SectionAlignment: 0x%x\n", EC32(optional_hdr64.SectionAlignment));
+ cli_dbgmsg("FileAlignment: 0x%x\n", EC32(optional_hdr64.FileAlignment));
+ cli_dbgmsg("MajorSubsystemVersion: %d\n", EC16(optional_hdr64.MajorSubsystemVersion));
+ cli_dbgmsg("MinorSubsystemVersion: %d\n", EC16(optional_hdr64.MinorSubsystemVersion));
+ cli_dbgmsg("SizeOfImage: 0x%x\n", EC32(optional_hdr64.SizeOfImage));
+ cli_dbgmsg("SizeOfHeaders: 0x%x\n", hdr_size);
+ cli_dbgmsg("NumberOfRvaAndSizes: %d\n", EC32(optional_hdr64.NumberOfRvaAndSizes));
+ }
+
+
+ switch(pe_plus ? EC16(optional_hdr64.Subsystem) : EC16(optional_hdr32.Subsystem)) {
+ case 0:
+ cli_dbgmsg("Subsystem: Unknown\n");
+ break;
+ case 1:
+ cli_dbgmsg("Subsystem: Native (svc)\n");
+ native = 1;
+ break;
+ case 2:
+ cli_dbgmsg("Subsystem: Win32 GUI\n");
+ break;
+ case 3:
+ cli_dbgmsg("Subsystem: Win32 console\n");
+ break;
+ case 5:
+ cli_dbgmsg("Subsystem: OS/2 console\n");
+ break;
+ case 7:
+ cli_dbgmsg("Subsystem: POSIX console\n");
+ break;
+ case 8:
+ cli_dbgmsg("Subsystem: Native Win9x driver\n");
+ break;
+ case 9:
+ cli_dbgmsg("Subsystem: WinCE GUI\n");
+ break;
+ case 10:
+ cli_dbgmsg("Subsystem: EFI application\n");
+ break;
+ case 11:
+ cli_dbgmsg("Subsystem: EFI driver\n");
+ break;
+ case 12:
+ cli_dbgmsg("Subsystem: EFI runtime driver\n");
+ break;
+ default:
+ cli_warnmsg("Unknown subsystem in PE header (0x%x)\n", pe_plus ? EC16(optional_hdr64.Subsystem) : EC16(optional_hdr32.Subsystem));
+ }
+
+ cli_dbgmsg("------------------------------------\n");
+
+ if (DETECT_BROKEN && !native && (!(pe_plus?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment)) || (pe_plus?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment))%0x1000)) {
+ cli_dbgmsg("Bad virtual alignemnt\n");
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+
+ if (DETECT_BROKEN && !native && (!(pe_plus?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment)) || (pe_plus?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment))%0x200)) {
+ cli_dbgmsg("Bad file alignemnt\n");
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+
+ if(fstat(desc, &sb) == -1) {
+ cli_dbgmsg("fstat failed\n");
+ return CL_EIO;
+ }
+
+ fsize = sb.st_size;
+
+ section_hdr = (struct pe_image_section_hdr *) cli_calloc(nsections, sizeof(struct pe_image_section_hdr));
+
+ if(!section_hdr) {
+ cli_dbgmsg("Can't allocate memory for section headers\n");
+ return CL_EMEM;
+ }
+
+ exe_sections = (struct cli_exe_section *) cli_calloc(nsections, sizeof(struct cli_exe_section));
+
+ if(!exe_sections) {
+ cli_dbgmsg("Can't allocate memory for section headers\n");
+ free(section_hdr);
+ return CL_EMEM;
+ }
+
+ valign = (pe_plus)?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment);
+ falign = (pe_plus)?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment);
+
+ if(cli_readn(desc, section_hdr, sizeof(struct pe_image_section_hdr)*nsections) != (int)(nsections*sizeof(struct pe_image_section_hdr))) {
+ cli_dbgmsg("Can't read section header\n");
+ cli_dbgmsg("Possibly broken PE file\n");
+ free(section_hdr);
+ free(exe_sections);
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ for(i = 0; falign!=0x200 && i<nsections; i++) {
+ /* file alignment fallback mode - blah */
+ if (falign && section_hdr[i].SizeOfRawData && EC32(section_hdr[i].PointerToRawData)%falign && !(EC32(section_hdr[i].PointerToRawData)%0x200)) {
+ cli_dbgmsg("Found misaligned section, using 0x200\n");
+ falign = 0x200;
+ }
+ }
+
+ hdr_size = PESALIGN(hdr_size, valign); /* Aligned headers virtual size */
+
+ for(i = 0; i < nsections; i++) {
+ strncpy(sname, (char *) section_hdr[i].Name, 8);
+ sname[8] = 0;
+ exe_sections[i].rva = PEALIGN(EC32(section_hdr[i].VirtualAddress), valign);
+ exe_sections[i].vsz = PESALIGN(EC32(section_hdr[i].VirtualSize), valign);
+ exe_sections[i].raw = PEALIGN(EC32(section_hdr[i].PointerToRawData), falign);
+ exe_sections[i].rsz = PESALIGN(EC32(section_hdr[i].SizeOfRawData), falign);
+ exe_sections[i].chr = EC32(section_hdr[i].Characteristics);
+ exe_sections[i].urva = EC32(section_hdr[i].VirtualAddress); /* Just in case */
+ exe_sections[i].uvsz = EC32(section_hdr[i].VirtualSize);
+ exe_sections[i].uraw = EC32(section_hdr[i].PointerToRawData);
+ exe_sections[i].ursz = EC32(section_hdr[i].SizeOfRawData);
+
+ if (!exe_sections[i].vsz && exe_sections[i].rsz)
+ exe_sections[i].vsz=PESALIGN(exe_sections[i].ursz, valign);
+
+ if (exe_sections[i].rsz && fsize>exe_sections[i].raw && !CLI_ISCONTAINED(0, (uint32_t) fsize, exe_sections[i].raw, exe_sections[i].rsz))
+ exe_sections[i].rsz = fsize - exe_sections[i].raw;
+
+ cli_dbgmsg("Section %d\n", i);
+ cli_dbgmsg("Section name: %s\n", sname);
+ cli_dbgmsg("Section data (from headers - in memory)\n");
+ cli_dbgmsg("VirtualSize: 0x%x 0x%x\n", exe_sections[i].uvsz, exe_sections[i].vsz);
+ cli_dbgmsg("VirtualAddress: 0x%x 0x%x\n", exe_sections[i].urva, exe_sections[i].rva);
+ cli_dbgmsg("SizeOfRawData: 0x%x 0x%x\n", exe_sections[i].ursz, exe_sections[i].rsz);
+ cli_dbgmsg("PointerToRawData: 0x%x 0x%x\n", exe_sections[i].uraw, exe_sections[i].raw);
+
+ if(exe_sections[i].chr & 0x20) {
+ cli_dbgmsg("Section contains executable code\n");
+
+ if(exe_sections[i].vsz < exe_sections[i].rsz) {
+ cli_dbgmsg("Section contains free space\n");
+ /*
+ cli_dbgmsg("Dumping %d bytes\n", section_hdr.SizeOfRawData - section_hdr.VirtualSize);
+ ddump(desc, section_hdr.PointerToRawData + section_hdr.VirtualSize, section_hdr.SizeOfRawData - section_hdr.VirtualSize, cli_gentemp(NULL));
+ */
+
+ }
+ }
+
+ if(exe_sections[i].chr & 0x20000000)
+ cli_dbgmsg("Section's memory is executable\n");
+
+ if(exe_sections[i].chr & 0x80000000)
+ cli_dbgmsg("Section's memory is writeable\n");
+
+ cli_dbgmsg("------------------------------------\n");
+
+ if (DETECT_BROKEN && (exe_sections[i].urva % valign)) { /* Bad virtual alignment */
+ cli_dbgmsg("VirtualAddress is misaligned\n");
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ free(section_hdr);
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+
+ if (exe_sections[i].rsz) { /* Don't bother with virtual only sections */
+ if (exe_sections[i].raw >= fsize) { /* really broken */
+ cli_dbgmsg("Broken PE file - Section %d starts beyond the end of file (Offset@ %d, Total filesize %d)\n", i, exe_sections[i].raw, fsize);
+ free(section_hdr);
+ free(exe_sections);
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN; /* no ninjas to see here! move along! */
+ }
+
+ if(SCAN_ALGO && (DCONF & PE_CONF_POLIPOS) && !*sname && exe_sections[i].vsz > 40000 && exe_sections[i].vsz < 70000 && exe_sections[i].chr == 0xe0000060) polipos = i;
+
+ /* check MD5 section sigs */
+ md5_sect = ctx->engine->md5_sect;
+ if((DCONF & PE_CONF_MD5SECT) && md5_sect) {
+ found = 0;
+ for(j = 0; j < md5_sect->soff_len && md5_sect->soff[j] <= exe_sections[i].rsz; j++) {
+ if(md5_sect->soff[j] == exe_sections[i].rsz) {
+ unsigned char md5_dig[16];
+ if(cli_md5sect(desc, &exe_sections[i], md5_dig) && cli_bm_scanbuff(md5_dig, 16, ctx->virname, ctx->engine->md5_sect, 0, 0, -1) == CL_VIRUS) {
+ free(section_hdr);
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ if(!i) {
+ if (DETECT_BROKEN && exe_sections[i].urva!=hdr_size) { /* Bad first section RVA */
+ cli_dbgmsg("First section is in the wrong place\n");
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ free(section_hdr);
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ min = exe_sections[i].rva;
+ max = exe_sections[i].rva + exe_sections[i].rsz;
+ } else {
+ if (DETECT_BROKEN && exe_sections[i].urva - exe_sections[i-1].urva != exe_sections[i-1].vsz) { /* No holes, no overlapping, no virtual disorder */
+ cli_dbgmsg("Virtually misplaced section (wrong order, overlapping, non contiguous)\n");
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ free(section_hdr);
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ if(exe_sections[i].rva < min)
+ min = exe_sections[i].rva;
+
+ if(exe_sections[i].rva + exe_sections[i].rsz > max)
+ max = exe_sections[i].rva + exe_sections[i].rsz;
+ }
+ }
+
+ free(section_hdr);
+
+ if(!(ep = cli_rawaddr(vep, exe_sections, nsections, &err, fsize, hdr_size)) && err) {
+ cli_dbgmsg("EntryPoint out of file\n");
+ free(exe_sections);
+ if(DETECT_BROKEN) {
+ if(ctx->virname)
+ *ctx->virname = "Broken.Executable";
+ return CL_VIRUS;
+ }
+ return CL_CLEAN;
+ }
+
+ cli_dbgmsg("EntryPoint offset: 0x%x (%d)\n", ep, ep);
+
+ if(pe_plus) { /* Do not continue for PE32+ files */
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ lseek(desc, ep, SEEK_SET);
+ epsize = cli_readn(desc, epbuff, 4096);
+
+ /* Attempt to detect some popular polymorphic viruses */
+
+ /* W32.Parite.B */
+ if(SCAN_ALGO && (DCONF & PE_CONF_PARITE) && !dll && epsize == 4096 && ep == exe_sections[nsections - 1].raw) {
+ const char *pt = cli_memstr(epbuff, 4040, "\x47\x65\x74\x50\x72\x6f\x63\x41\x64\x64\x72\x65\x73\x73\x00", 15);
+ if(pt) {
+ pt += 15;
+ if((((uint32_t)cli_readint32(pt) ^ (uint32_t)cli_readint32(pt + 4)) == 0x505a4f) && (((uint32_t)cli_readint32(pt + 8) ^ (uint32_t)cli_readint32(pt + 12)) == 0xffffb) && (((uint32_t)cli_readint32(pt + 16) ^ (uint32_t)cli_readint32(pt + 20)) == 0xb8)) {
+ *ctx->virname = "W32.Parite.B";
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ }
+ }
+
+ /* Kriz */
+ if(SCAN_ALGO && (DCONF & PE_CONF_KRIZ) && epsize >= 200 && CLI_ISCONTAINED(exe_sections[nsections - 1].raw, exe_sections[nsections - 1].rsz, ep, 0x0fd2) && epbuff[1]=='\x9c' && epbuff[2]=='\x60') {
+ enum {KZSTRASH,KZSCDELTA,KZSPDELTA,KZSGETSIZE,KZSXORPRFX,KZSXOR,KZSDDELTA,KZSLOOP,KZSTOP};
+ uint8_t kzs[] = {KZSTRASH,KZSCDELTA,KZSPDELTA,KZSGETSIZE,KZSTRASH,KZSXORPRFX,KZSXOR,KZSTRASH,KZSDDELTA,KZSTRASH,KZSLOOP,KZSTOP};
+ uint8_t *kzstate = kzs;
+ uint8_t *kzcode = (uint8_t *)epbuff + 3;
+ uint8_t kzdptr=0xff, kzdsize=0xff;
+ int kzlen = 197, kzinitlen=0xffff, kzxorlen=-1;
+ cli_dbgmsg("in kriz\n");
+
+ while(*kzstate!=KZSTOP) {
+ uint8_t op;
+ if(kzlen<=6) break;
+ op = *kzcode++;
+ kzlen--;
+ switch (*kzstate) {
+ case KZSTRASH: case KZSGETSIZE: {
+ int opsz=0;
+ switch(op) {
+ case 0x81:
+ kzcode+=5;
+ kzlen-=5;
+ break;
+ case 0xb8: case 0xb9: case 0xba: case 0xbb: case 0xbd: case 0xbe: case 0xbf:
+ if(*kzstate==KZSGETSIZE && cli_readint32(kzcode)==0x0fd2) {
+ kzinitlen = kzlen-5;
+ kzdsize=op-0xb8;
+ kzstate++;
+ op=4; /* fake the register to avoid breaking out */
+ cli_dbgmsg("kriz: using #%d as size counter\n", kzdsize);
+ }
+ opsz=4;
+ case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4d: case 0x4e: case 0x4f:
+ op&=7;
+ if(op!=kzdptr && op!=kzdsize) {
+ kzcode+=opsz;
+ kzlen-=opsz;
+ break;
+ }
+ default:
+ kzcode--;
+ kzlen++;
+ kzstate++;
+ }
+ break;
+ }
+ case KZSCDELTA:
+ if(op==0xe8 && (uint32_t)cli_readint32(kzcode) < 0xff) {
+ kzlen-=*kzcode+4;
+ kzcode+=*kzcode+4;
+ kzstate++;
+ } else *kzstate=KZSTOP;
+ break;
+ case KZSPDELTA:
+ if((op&0xf8)==0x58 && (kzdptr=op-0x58)!=4) {
+ kzstate++;
+ cli_dbgmsg("kriz: using #%d as pointer\n", kzdptr);
+ } else *kzstate=KZSTOP;
+ break;
+ case KZSXORPRFX:
+ kzstate++;
+ if(op==0x3e) break;
+ case KZSXOR:
+ if (op==0x80 && *kzcode==kzdptr+0xb0) {
+ kzxorlen=kzlen;
+ kzcode+=+6;
+ kzlen-=+6;
+ kzstate++;
+ } else *kzstate=KZSTOP;
+ break;
+ case KZSDDELTA:
+ if (op==kzdptr+0x48) kzstate++;
+ else *kzstate=KZSTOP;
+ break;
+ case KZSLOOP:
+ if (op==kzdsize+0x48 && *kzcode==0x75 && kzlen-(int8_t)kzcode[1]-3<=kzinitlen && kzlen-(int8_t)kzcode[1]>=kzxorlen) {
+ *ctx->virname = "W32.Kriz";
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ cli_dbgmsg("kriz: loop out of bounds, corrupted sample?\n");
+ kzstate++;
+ }
+ }
+ }
+
+ /* W32.Magistr.A/B */
+ if(SCAN_ALGO && (DCONF & PE_CONF_MAGISTR) && !dll && (nsections>1) && (exe_sections[nsections - 1].chr & 0x80000000)) {
+ uint32_t rsize, vsize, dam = 0;
+
+ vsize = exe_sections[nsections - 1].uvsz;
+ rsize = exe_sections[nsections - 1].rsz;
+ if(rsize < exe_sections[nsections - 1].ursz) {
+ rsize = exe_sections[nsections - 1].ursz;
+ dam = 1;
+ }
+
+ if(vsize >= 0x612c && rsize >= 0x612c && ((vsize & 0xff) == 0xec)) {
+ int bw = rsize < 0x7000 ? rsize : 0x7000;
+
+ lseek(desc, exe_sections[nsections - 1].raw + rsize - bw, SEEK_SET);
+ if(cli_readn(desc, buff, 4096) == 4096) {
+ if(cli_memstr(buff, 4091, "\xe8\x2c\x61\x00\x00", 5)) {
+ *ctx->virname = dam ? "W32.Magistr.A.dam" : "W32.Magistr.A";
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ }
+
+ } else if(rsize >= 0x7000 && vsize >= 0x7000 && ((vsize & 0xff) == 0xed)) {
+ int bw = rsize < 0x8000 ? rsize : 0x8000;
+
+ lseek(desc, exe_sections[nsections - 1].raw + rsize - bw, SEEK_SET);
+ if(cli_readn(desc, buff, 4096) == 4096) {
+ if(cli_memstr(buff, 4091, "\xe8\x04\x72\x00\x00", 5)) {
+ *ctx->virname = dam ? "W32.Magistr.B.dam" : "W32.Magistr.B";
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ }
+ }
+ }
+
+ /* W32.Polipos.A */
+ while(polipos && !dll && nsections > 2 && nsections < 13 && e_lfanew <= 0x800 && (EC16(optional_hdr32.Subsystem) == 2 || EC16(optional_hdr32.Subsystem) == 3) && EC16(file_hdr.Machine) == 0x14c && optional_hdr32.SizeOfStackReserve >= 0x80000) {
+ uint32_t jump, jold, *jumps = NULL;
+ uint8_t *code;
+ unsigned int xsjs = 0;
+
+ if(exe_sections[0].rsz > CLI_MAX_ALLOCATION) break;
+ if(!cli_seeksect(desc, &exe_sections[0])) break;
+ if(!(code=cli_malloc(exe_sections[0].rsz))) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+ if(cli_readn(desc, code, exe_sections[0].rsz)!=exe_sections[0].rsz) {
+ free(exe_sections);
+ return CL_EIO;
+ }
+ for(i=0; i<exe_sections[0].rsz - 5; i++) {
+ if((uint8_t)(code[i]-0xe8) > 1) continue;
+ jump = cli_rawaddr(exe_sections[0].rva+i+5+cli_readint32(&code[i+1]), exe_sections, nsections, &err, fsize, hdr_size);
+ if(err || !CLI_ISCONTAINED(exe_sections[polipos].raw, exe_sections[polipos].rsz, jump, 9)) continue;
+ if(xsjs % 128 == 0) {
+ if(xsjs == 1280) break;
+ if(!(jumps=(uint32_t *)cli_realloc2(jumps, (xsjs+128)*sizeof(uint32_t)))) {
+ free(code);
+ free(exe_sections);
+ return CL_EMEM;
+ }
+ }
+ j=0;
+ for(; j<xsjs; j++) {
+ if(jumps[j]<jump) continue;
+ if(jumps[j]==jump) {
+ xsjs--;
+ break;
+ }
+ jold=jumps[j];
+ jumps[j]=jump;
+ jump=jold;
+ }
+ jumps[j]=jump;
+ xsjs++;
+ }
+ free(code);
+ if(!xsjs) break;
+ cli_dbgmsg("Polipos: Checking %d xsect jump(s)\n", xsjs);
+ for(i=0;i<xsjs;i++) {
+ lseek(desc, jumps[i], SEEK_SET);
+ if(cli_readn(desc, buff, 9) != 9) continue;
+ if((jump=cli_readint32(buff))==0x60ec8b55 || (buff[4]=='\xec' && ((jump==0x83ec8b55 && buff[6]=='\x60') || (jump==0x81ec8b55 && !buff[7] && !buff[8])))) {
+ *ctx->virname = "W32.Polipos.A";
+ free(jumps);
+ free(exe_sections);
+ return CL_VIRUS;
+ }
+ }
+ free(jumps);
+ break;
+ }
+
+
+ /* UPX, FSG, MEW support */
+
+ /* try to find the first section with physical size == 0 */
+ found = 0;
+ if(DCONF & (PE_CONF_UPX | PE_CONF_FSG | PE_CONF_MEW)) {
+ for(i = 0; i < (unsigned int) nsections - 1; i++) {
+ if(!exe_sections[i].rsz && exe_sections[i].vsz && exe_sections[i + 1].rsz && exe_sections[i + 1].vsz) {
+ found = 1;
+ cli_dbgmsg("UPX/FSG/MEW: empty section found - assuming compression\n");
+ break;
+ }
+ }
+ }
+
+ /* MEW support */
+ if (found && (DCONF & PE_CONF_MEW) && epsize>=16 && epbuff[0]=='\xe9') {
+ uint32_t fileoffset;
+
+ fileoffset = (vep + cli_readint32(epbuff + 1) + 5);
+ while (fileoffset == 0x154 || fileoffset == 0x158) {
+ uint32_t offdiff, uselzma;
+
+ cli_dbgmsg ("MEW: found MEW characteristics %08X + %08X + 5 = %08X\n",
+ cli_readint32(epbuff + 1), vep, cli_readint32(epbuff + 1) + vep + 5);
+
+ if(lseek(desc, fileoffset, SEEK_SET) == -1) {
+ cli_dbgmsg("MEW: lseek() failed\n");
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ if((bytes = read(desc, buff, 0xb0)) != 0xb0) {
+ cli_dbgmsg("MEW: Can't read 0xb0 bytes at 0x%x (%d) %d\n", fileoffset, fileoffset, bytes);
+ break;
+ }
+
+ if (fileoffset == 0x154) cli_dbgmsg("MEW: Win9x compatibility was set!\n");
+ else cli_dbgmsg("MEW: Win9x compatibility was NOT set!\n");
+
+ if((offdiff = cli_readint32(buff+1) - EC32(optional_hdr32.ImageBase)) <= exe_sections[i + 1].rva || offdiff >= exe_sections[i + 1].rva + exe_sections[i + 1].raw - 4) {
+ cli_dbgmsg("MEW: ESI is not in proper section\n");
+ break;
+ }
+ offdiff -= exe_sections[i + 1].rva;
+
+ if(!cli_seeksect(desc, &exe_sections[i + 1])) {
+ free(exe_sections);
+ return CL_EIO;
+ }
+ ssize = exe_sections[i + 1].vsz;
+ dsize = exe_sections[i].vsz;
+
+ cli_dbgmsg("MEW: ssize %08x dsize %08x offdiff: %08x\n", ssize, dsize, offdiff);
+
+ CLI_UNPSIZELIMITS("MEW", MAX(ssize, dsize));
+ CLI_UNPSIZELIMITS("MEW", MAX(ssize + dsize, exe_sections[i + 1].rsz));
+
+ /* allocate needed buffer */
+ if (!(src = cli_calloc (ssize + dsize, sizeof(char)))) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if (exe_sections[i + 1].rsz < offdiff + 12 || exe_sections[i + 1].rsz > ssize) {
+ cli_dbgmsg("MEW: Size mismatch: %08x\n", exe_sections[i + 1].rsz);
+ free(src);
+ break;
+ }
+
+ if((bytes = read(desc, src + dsize, exe_sections[i + 1].rsz)) != exe_sections[i + 1].rsz) {
+ cli_dbgmsg("MEW: Can't read %d bytes [read: %d]\n", exe_sections[i + 1].rsz, bytes);
+ free(exe_sections);
+ free(src);
+ return CL_EIO;
+ }
+ cli_dbgmsg("MEW: %d (%08x) bytes read\n", bytes, bytes);
+
+ /* count offset to lzma proc, if lzma used, 0xe8 -> call */
+ if (buff[0x7b] == '\xe8') {
+ if (!CLI_ISCONTAINED(exe_sections[1].rva, exe_sections[1].vsz, cli_readint32(buff + 0x7c) + fileoffset + 0x80, 4)) {
+ cli_dbgmsg("MEW: lzma proc out of bounds!\n");
+ free(src);
+ break; /* to next unpacker in chain */
+ }
+ uselzma = cli_readint32(buff + 0x7c) - (exe_sections[0].rva - fileoffset - 0x80);
+ } else {
+ uselzma = 0;
+ }
+
+ CLI_UNPTEMP("MEW",(src,exe_sections,0));
+ CLI_UNPRESULTS("MEW",(unmew11(i, src, offdiff, ssize, dsize, EC32(optional_hdr32.ImageBase), exe_sections[0].rva, uselzma, NULL, NULL, ndesc)),1,(src,0));
+ break;
+ }
+ }
+
+ if(epsize<168) {
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ if (found || upack) {
+ /* Check EP for UPX vs. FSG vs. Upack */
+
+ /* Upack 0.39 produces 2 types of executables
+ * 3 sections: | 2 sections (one empty, I don't chech found if !upack, since it's in OR above):
+ * mov esi, value | pusha
+ * lodsd | call $+0x9
+ * push eax |
+ *
+ * Upack 1.1/1.2 Beta produces [based on 2 samples (sUx) provided by aCaB]:
+ * 2 sections
+ * mov esi, value
+ * loads
+ * mov edi, eax
+ *
+ * Upack unknown [sample 0297729]
+ * 3 sections
+ * mov esi, value
+ * push [esi]
+ * jmp
+ *
+ */
+ /* upack 0.39-3s + sample 0151477*/
+ while(((upack && nsections == 3) && /* 3 sections */
+ ((
+ epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > min && /* mov esi */
+ epbuff[5] == '\xad' && epbuff[6] == '\x50' /* lodsd; push eax */
+ )
+ ||
+ /* based on 0297729 sample from aCaB */
+ (epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > min && /* mov esi */
+ epbuff[5] == '\xff' && epbuff[6] == '\x36' /* push [esi] */
+ )
+ ))
+ ||
+ ((!upack && nsections == 2) && /* 2 sections */
+ (( /* upack 0.39-2s */
+ epbuff[0] == '\x60' && epbuff[1] == '\xe8' && cli_readint32(epbuff+2) == 0x9 /* pusha; call+9 */
+ )
+ ||
+ ( /* upack 1.1/1.2, based on 2 samples */
+ epbuff[0] == '\xbe' && cli_readint32(epbuff+1) - EC32(optional_hdr32.ImageBase) < min && /* mov esi */
+ cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) > 0 &&
+ epbuff[5] == '\xad' && epbuff[6] == '\x8b' && epbuff[7] == '\xf8' /* loads; mov edi, eax */
+ )
+ ))
+ ) {
+ uint32_t vma, off;
+ int a,b,c;
+
+ cli_dbgmsg("Upack characteristics found.\n");
+ a = exe_sections[0].vsz;
+ b = exe_sections[1].vsz;
+ if (upack) {
+ cli_dbgmsg("Upack: var set\n");
+ c = exe_sections[2].vsz;
+ ssize = exe_sections[0].ursz + exe_sections[0].uraw;
+ off = exe_sections[0].rva;
+ vma = EC32(optional_hdr32.ImageBase) + exe_sections[0].rva;
+ } else {
+ cli_dbgmsg("Upack: var NOT set\n");
+ c = exe_sections[1].rva;
+ ssize = exe_sections[1].uraw;
+ off = 0;
+ vma = exe_sections[1].rva - exe_sections[1].uraw;
+ }
+
+ dsize = a+b+c;
+
+ CLI_UNPSIZELIMITS("Upack", MAX(MAX(dsize, ssize), exe_sections[1].ursz));
+
+ if (exe_sections[1].rva - off > dsize || exe_sections[1].rva - off > dsize - exe_sections[1].ursz || (upack && (exe_sections[2].rva - exe_sections[0].rva > dsize || exe_sections[2].rva - exe_sections[0].rva > dsize - ssize)) || ssize > dsize) {
+ cli_dbgmsg("Upack: probably malformed pe-header, skipping to next unpacker\n");
+ break;
+ }
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ lseek(desc, 0, SEEK_SET);
+ if(read(desc, dest, ssize) != ssize) {
+ cli_dbgmsg("Upack: Can't read raw data of section 0\n");
+ free(exe_sections);
+ free(dest);
+ return CL_EIO;
+ }
+
+ if(upack) memmove(dest + exe_sections[2].rva - exe_sections[0].rva, dest, ssize);
+
+ lseek(desc, exe_sections[1].uraw, SEEK_SET);
+
+ if(read(desc, dest + exe_sections[1].rva - off, exe_sections[1].ursz) != exe_sections[1].ursz) {
+ cli_dbgmsg("Upack: Can't read raw data of section 1\n");
+ free(exe_sections);
+ free(dest);
+ return CL_EIO;
+ }
+
+ CLI_UNPTEMP("Upack",(dest,exe_sections,0));
+ CLI_UNPRESULTS("Upack",(unupack(upack, dest, dsize, epbuff, vma, ep, EC32(optional_hdr32.ImageBase), exe_sections[0].rva, ndesc)),1,(dest,0));
+ break;
+ }
+ }
+
+
+ while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\x87' && epbuff[1] == '\x25') {
+
+ /* FSG v2.0 support - thanks to aCaB ! */
+
+ uint32_t newesi, newedi, newebx, newedx;
+
+ ssize = exe_sections[i + 1].rsz;
+ dsize = exe_sections[i].vsz;
+
+ CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+ if(ssize <= 0x19 || dsize <= ssize) {
+ cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ newedx = cli_readint32(epbuff + 2) - EC32(optional_hdr32.ImageBase);
+ if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newedx, 4)) {
+ cli_dbgmsg("FSG: xchg out of bounds (%x), giving up\n", newedx);
+ break;
+ }
+
+ if((src = (char *) cli_malloc(ssize)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+ cli_dbgmsg("Can't read raw data of section %d\n", i + 1);
+ free(exe_sections);
+ free(src);
+ return CL_EIO;
+ }
+
+ dest = src + newedx - exe_sections[i + 1].rva;
+ if(newedx < exe_sections[i + 1].rva || !CLI_ISCONTAINED(src, ssize, dest, 4)) {
+ cli_dbgmsg("FSG: New ESP out of bounds\n");
+ free(src);
+ break;
+ }
+
+ newedx = cli_readint32(dest) - EC32(optional_hdr32.ImageBase);
+ if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newedx, 4)) {
+ cli_dbgmsg("FSG: New ESP (%x) is wrong\n", newedx);
+ free(src);
+ break;
+ }
+
+ dest = src + newedx - exe_sections[i + 1].rva;
+ if(!CLI_ISCONTAINED(src, ssize, dest, 32)) {
+ cli_dbgmsg("FSG: New stack out of bounds\n");
+ free(src);
+ break;
+ }
+
+ newedi = cli_readint32(dest) - EC32(optional_hdr32.ImageBase);
+ newesi = cli_readint32(dest + 4) - EC32(optional_hdr32.ImageBase);
+ newebx = cli_readint32(dest + 16) - EC32(optional_hdr32.ImageBase);
+ newedx = cli_readint32(dest + 20);
+
+ if(newedi != exe_sections[i].rva) {
+ cli_dbgmsg("FSG: Bad destination buffer (edi is %x should be %x)\n", newedi, exe_sections[i].rva);
+ free(src);
+ break;
+ }
+
+ if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].rsz) {
+ cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+ free(src);
+ break;
+ }
+
+ if(!CLI_ISCONTAINED(exe_sections[i + 1].rva, exe_sections[i + 1].rsz, newebx, 16)) {
+ cli_dbgmsg("FSG: Array of functions out of bounds\n");
+ free(src);
+ break;
+ }
+
+ newedx=cli_readint32(newebx + 12 - exe_sections[i + 1].rva + src) - EC32(optional_hdr32.ImageBase);
+ cli_dbgmsg("FSG: found old EP @%x\n",newedx);
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ free(exe_sections);
+ free(src);
+ return CL_EMEM;
+ }
+
+ CLI_UNPTEMP("FSG",(src,dest,exe_sections,0));
+ CLI_UNPRESULTSFSG2("FSG",(unfsg_200(newesi - exe_sections[i + 1].rva + src, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, newedi, EC32(optional_hdr32.ImageBase), newedx, ndesc)),1,(src,dest,0));
+ break;
+ }
+
+
+ while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\xbe' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) < min) {
+
+ /* FSG support - v. 1.33 (thx trog for the many samples) */
+
+ int sectcnt = 0;
+ char *support;
+ uint32_t newesi, newedi, oldep, gp, t;
+ struct cli_exe_section *sections;
+
+ ssize = exe_sections[i + 1].rsz;
+ dsize = exe_sections[i].vsz;
+
+ CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+ if(ssize <= 0x19 || dsize <= ssize) {
+ cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ if(!(gp = cli_rawaddr(cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase), NULL, 0 , &err, fsize, hdr_size)) && err ) {
+ cli_dbgmsg("FSG: Support data out of padding area\n");
+ break;
+ }
+
+ lseek(desc, gp, SEEK_SET);
+ gp = exe_sections[i + 1].raw - gp;
+
+ CLI_UNPSIZELIMITS("FSG", gp)
+
+ if((support = (char *) cli_malloc(gp)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if((int)cli_readn(desc, support, gp) != (int)gp) {
+ cli_dbgmsg("Can't read %d bytes from padding area\n", gp);
+ free(exe_sections);
+ free(support);
+ return CL_EIO;
+ }
+
+ /* newebx = cli_readint32(support) - EC32(optional_hdr32.ImageBase); Unused */
+ newedi = cli_readint32(support + 4) - EC32(optional_hdr32.ImageBase); /* 1st dest */
+ newesi = cli_readint32(support + 8) - EC32(optional_hdr32.ImageBase); /* Source */
+
+ if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].rsz) {
+ cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+ free(support);
+ break;
+ }
+
+ if(newedi != exe_sections[i].rva) {
+ cli_dbgmsg("FSG: Bad destination (is %x should be %x)\n", newedi, exe_sections[i].rva);
+ free(support);
+ break;
+ }
+
+ /* Counting original sections */
+ for(t = 12; t < gp - 4; t += 4) {
+ uint32_t rva = cli_readint32(support+t);
+
+ if(!rva)
+ break;
+
+ rva -= EC32(optional_hdr32.ImageBase)+1;
+ sectcnt++;
+
+ if(rva % 0x1000) cli_dbgmsg("FSG: Original section %d is misaligned\n", sectcnt);
+
+ if(rva < exe_sections[i].rva || rva - exe_sections[i].rva >= exe_sections[i].vsz) {
+ cli_dbgmsg("FSG: Original section %d is out of bounds\n", sectcnt);
+ break;
+ }
+ }
+
+ if(t >= gp - 4 || cli_readint32(support + t)) {
+ free(support);
+ break;
+ }
+
+ if((sections = (struct cli_exe_section *) cli_malloc((sectcnt + 1) * sizeof(struct cli_exe_section))) == NULL) {
+ free(exe_sections);
+ free(support);
+ return CL_EMEM;
+ }
+
+ sections[0].rva = newedi;
+ for(t = 1; t <= (uint32_t)sectcnt; t++)
+ sections[t].rva = cli_readint32(support + 8 + t * 4) - 1 - EC32(optional_hdr32.ImageBase);
+
+ free(support);
+
+ if((src = (char *) cli_malloc(ssize)) == NULL) {
+ free(exe_sections);
+ free(sections);
+ return CL_EMEM;
+ }
+
+ if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+ cli_dbgmsg("Can't read raw data of section %d\n", i);
+ free(exe_sections);
+ free(sections);
+ free(src);
+ return CL_EIO;
+ }
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ free(exe_sections);
+ free(src);
+ free(sections);
+ return CL_EMEM;
+ }
+
+ oldep = vep + 161 + 6 + cli_readint32(epbuff+163);
+ cli_dbgmsg("FSG: found old EP @%x\n", oldep);
+
+ CLI_UNPTEMP("FSG",(src,dest,sections,exe_sections,0));
+ CLI_UNPRESULTSFSG1("FSG",(unfsg_133(src + newesi - exe_sections[i + 1].rva, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, sections, sectcnt, EC32(optional_hdr32.ImageBase), oldep, ndesc)),1,(src,dest,sections,0));
+ break; /* were done with 1.33 */
+ }
+
+
+ while(found && (DCONF & PE_CONF_FSG) && epbuff[0] == '\xbb' && cli_readint32(epbuff + 1) - EC32(optional_hdr32.ImageBase) < min && epbuff[5] == '\xbf' && epbuff[10] == '\xbe' && vep >= exe_sections[i + 1].rva && vep - exe_sections[i + 1].rva > exe_sections[i + 1].rva - 0xe0 ) {
+
+ /* FSG support - v. 1.31 */
+
+ int sectcnt = 0;
+ uint32_t t;
+ uint32_t gp = cli_rawaddr(cli_readint32(epbuff+1) - EC32(optional_hdr32.ImageBase), NULL, 0 , &err, fsize, hdr_size);
+ char *support;
+ uint32_t newesi = cli_readint32(epbuff+11) - EC32(optional_hdr32.ImageBase);
+ uint32_t newedi = cli_readint32(epbuff+6) - EC32(optional_hdr32.ImageBase);
+ uint32_t oldep = vep - exe_sections[i + 1].rva;
+ struct cli_exe_section *sections;
+
+ ssize = exe_sections[i + 1].rsz;
+ dsize = exe_sections[i].vsz;
+
+
+ if(err) {
+ cli_dbgmsg("FSG: Support data out of padding area\n");
+ break;
+ }
+
+ if(newesi < exe_sections[i + 1].rva || newesi - exe_sections[i + 1].rva >= exe_sections[i + 1].raw) {
+ cli_dbgmsg("FSG: Source buffer out of section bounds\n");
+ break;
+ }
+
+ if(newedi != exe_sections[i].rva) {
+ cli_dbgmsg("FSG: Bad destination (is %x should be %x)\n", newedi, exe_sections[i].rva);
+ break;
+ }
+
+ CLI_UNPSIZELIMITS("FSG", MAX(dsize, ssize));
+
+ if(ssize <= 0x19 || dsize <= ssize) {
+ cli_dbgmsg("FSG: Size mismatch (ssize: %d, dsize: %d)\n", ssize, dsize);
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ lseek(desc, gp, SEEK_SET);
+ gp = exe_sections[i + 1].raw - gp;
+
+ CLI_UNPSIZELIMITS("FSG", gp)
+
+ if((support = (char *) cli_malloc(gp)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if(cli_readn(desc, support, gp) != (int)gp) {
+ cli_dbgmsg("Can't read %d bytes from padding area\n", gp);
+ free(exe_sections);
+ free(support);
+ return CL_EIO;
+ }
+
+ /* Counting original sections */
+ for(t = 0; t < gp - 2; t += 2) {
+ uint32_t rva = support[t]|(support[t+1]<<8);
+
+ if (rva == 2 || rva == 1)
+ break;
+
+ rva = ((rva-2)<<12) - EC32(optional_hdr32.ImageBase);
+ sectcnt++;
+
+ if(rva < exe_sections[i].rva || rva - exe_sections[i].rva >= exe_sections[i].vsz) {
+ cli_dbgmsg("FSG: Original section %d is out of bounds\n", sectcnt);
+ break;
+ }
+ }
+
+ if(t >= gp-10 || cli_readint32(support + t + 6) != 2) {
+ free(support);
+ break;
+ }
+
+ if((sections = (struct cli_exe_section *) cli_malloc((sectcnt + 1) * sizeof(struct cli_exe_section))) == NULL) {
+ free(exe_sections);
+ free(support);
+ return CL_EMEM;
+ }
+
+ sections[0].rva = newedi;
+ for(t = 0; t <= (uint32_t)sectcnt - 1; t++) {
+ sections[t+1].rva = (((support[t*2]|(support[t*2+1]<<8))-2)<<12)-EC32(optional_hdr32.ImageBase);
+ }
+
+ free(support);
+
+ if((src = (char *) cli_malloc(ssize)) == NULL) {
+ free(exe_sections);
+ free(sections);
+ return CL_EMEM;
+ }
+
+ if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+ cli_dbgmsg("FSG: Can't read raw data of section %d\n", i);
+ free(exe_sections);
+ free(sections);
+ free(src);
+ return CL_EIO;
+ }
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ free(exe_sections);
+ free(src);
+ free(sections);
+ return CL_EMEM;
+ }
+
+ gp = 0xda + 6*(epbuff[16]=='\xe8');
+ oldep = vep + gp + 6 + cli_readint32(src+gp+2+oldep);
+ cli_dbgmsg("FSG: found old EP @%x\n", oldep);
+
+ CLI_UNPTEMP("FSG",(src,dest,sections,exe_sections,0));
+ CLI_UNPRESULTSFSG1("FSG",(unfsg_133(src + newesi - exe_sections[i + 1].rva, dest, ssize + exe_sections[i + 1].rva - newesi, dsize, sections, sectcnt, EC32(optional_hdr32.ImageBase), oldep, ndesc)),1,(src,dest,sections,0));
+ break; /* were done with 1.31 */
+ }
+
+
+ if(found && (DCONF & PE_CONF_UPX)) {
+
+ /* UPX support */
+
+ /* we assume (i + 1) is UPX1 */
+ ssize = exe_sections[i + 1].rsz;
+ dsize = exe_sections[i].vsz + exe_sections[i + 1].vsz;
+
+ CLI_UNPSIZELIMITS("UPX", MAX(dsize, ssize));
+
+ if(ssize <= 0x19 || dsize <= ssize || dsize > CLI_MAX_ALLOCATION ) {
+ cli_dbgmsg("UPX: Size mismatch or dsize too big (ssize: %d, dsize: %d)\n", ssize, dsize);
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ if((src = (char *) cli_malloc(ssize)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if((dest = (char *) cli_calloc(dsize + 8192, sizeof(char))) == NULL) {
+ free(exe_sections);
+ free(src);
+ return CL_EMEM;
+ }
+
+ if(!cli_seeksect(desc, &exe_sections[i + 1]) || (unsigned int) cli_readn(desc, src, ssize) != ssize) {
+ cli_dbgmsg("UPX: Can't read raw data of section %d\n", i+1);
+ free(exe_sections);
+ free(src);
+ free(dest);
+ return CL_EIO;
+ }
+
+ /* try to detect UPX code */
+ if(cli_memstr(UPX_NRV2B, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2B, 24, epbuff + 0x69 + 8, 13)) {
+ cli_dbgmsg("UPX: Looks like a NRV2B decompression routine\n");
+ upxfn = upx_inflate2b;
+ } else if(cli_memstr(UPX_NRV2D, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2D, 24, epbuff + 0x69 + 8, 13)) {
+ cli_dbgmsg("UPX: Looks like a NRV2D decompression routine\n");
+ upxfn = upx_inflate2d;
+ } else if(cli_memstr(UPX_NRV2E, 24, epbuff + 0x69, 13) || cli_memstr(UPX_NRV2E, 24, epbuff + 0x69 + 8, 13)) {
+ cli_dbgmsg("UPX: Looks like a NRV2E decompression routine\n");
+ upxfn = upx_inflate2e;
+ }
+
+ if(upxfn) {
+ int skew = cli_readint32(epbuff + 2) - EC32(optional_hdr32.ImageBase) - exe_sections[i + 1].rva;
+
+ if(epbuff[1] != '\xbe' || skew <= 0 || skew > 0xfff) { /* FIXME: legit skews?? */
+ skew = 0;
+ if(upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
+ upx_success = 1;
+
+ } else {
+ cli_dbgmsg("UPX: UPX1 seems skewed by %d bytes\n", skew);
+ if(upxfn(src + skew, ssize - skew, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep-skew) >= 0 || upxfn(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) >= 0)
+ upx_success = 1;
+ }
+
+ if(upx_success)
+ cli_dbgmsg("UPX: Successfully decompressed\n");
+ else
+ cli_dbgmsg("UPX: Preferred decompressor failed\n");
+ }
+
+ if(!upx_success && upxfn != upx_inflate2b) {
+ if(upx_inflate2b(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2b(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+
+ cli_dbgmsg("UPX: NRV2B decompressor failed\n");
+ } else {
+ upx_success = 1;
+ cli_dbgmsg("UPX: Successfully decompressed with NRV2B\n");
+ }
+ }
+
+ if(!upx_success && upxfn != upx_inflate2d) {
+ if(upx_inflate2d(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2d(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+
+ cli_dbgmsg("UPX: NRV2D decompressor failed\n");
+ } else {
+ upx_success = 1;
+ cli_dbgmsg("UPX: Successfully decompressed with NRV2D\n");
+ }
+ }
+
+ if(!upx_success && upxfn != upx_inflate2e) {
+ if(upx_inflate2e(src, ssize, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep) == -1 && upx_inflate2e(src + 0x15, ssize - 0x15, dest, &dsize, exe_sections[i].rva, exe_sections[i + 1].rva, vep - 0x15) == -1) {
+ cli_dbgmsg("UPX: NRV2E decompressor failed\n");
+ } else {
+ upx_success = 1;
+ cli_dbgmsg("UPX: Successfully decompressed with NRV2E\n");
+ }
+ }
+
+ if(!upx_success) {
+ cli_dbgmsg("UPX: All decompressors failed\n");
+ free(src);
+ free(dest);
+ }
+ }
+
+ if(upx_success) {
+ free(src);
+ free(exe_sections);
+
+ CLI_UNPTEMP("UPX/FSG",(dest,0));
+
+ if((unsigned int) write(ndesc, dest, dsize) != dsize) {
+ cli_dbgmsg("UPX/FSG: Can't write %d bytes\n", dsize);
+ free(tempfile);
+ free(dest);
+ close(ndesc);
+ return CL_EIO;
+ }
+
+ free(dest);
+ fsync(ndesc);
+ lseek(ndesc, 0, SEEK_SET);
+
+ if(cli_leavetemps_flag)
+ cli_dbgmsg("UPX/FSG: Decompressed data saved in %s\n", tempfile);
+
+ cli_dbgmsg("***** Scanning decompressed file *****\n");
+ if((ret = cli_magic_scandesc(ndesc, ctx)) == CL_VIRUS) {
+ close(ndesc);
+ CLI_TMPUNLK();
+ free(tempfile);
+ return CL_VIRUS;
+ }
+
+ close(ndesc);
+ CLI_TMPUNLK();
+ free(tempfile);
+ return ret;
+ }
+
+
+ /* Petite */
+
+ if(epsize<200) {
+ free(exe_sections);
+ return CL_CLEAN;
+ }
+
+ found = 2;
+
+ if(epbuff[0] != '\xb8' || (uint32_t) cli_readint32(epbuff + 1) != exe_sections[nsections - 1].rva + EC32(optional_hdr32.ImageBase)) {
+ if(nsections < 2 || epbuff[0] != '\xb8' || (uint32_t) cli_readint32(epbuff + 1) != exe_sections[nsections - 2].rva + EC32(optional_hdr32.ImageBase))
+ found = 0;
+ else
+ found = 1;
+ }
+
+ if(found && (DCONF & PE_CONF_PETITE)) {
+ cli_dbgmsg("Petite: v2.%d compression detected\n", found);
+
+ if(cli_readint32(epbuff + 0x80) == 0x163c988d) {
+ cli_dbgmsg("Petite: level zero compression is not supported yet\n");
+ } else {
+ dsize = max - min;
+
+ CLI_UNPSIZELIMITS("Petite", dsize);
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ cli_dbgmsg("Petite: Can't allocate %d bytes\n", dsize);
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ for(i = 0 ; i < nsections; i++) {
+ if(exe_sections[i].raw) {
+ if(!cli_seeksect(desc, &exe_sections[i]) || (unsigned int) cli_readn(desc, dest + exe_sections[i].rva - min, exe_sections[i].ursz) != exe_sections[i].ursz) {
+ free(exe_sections);
+ free(dest);
+ return CL_EIO;
+ }
+ }
+ }
+
+ CLI_UNPTEMP("Petite",(dest,exe_sections,0));
+ CLI_UNPRESULTS("Petite",(petite_inflate2x_1to9(dest, min, max - min, exe_sections, nsections - (found == 1 ? 1 : 0), EC32(optional_hdr32.ImageBase),vep, ndesc, found, EC32(optional_hdr32.DataDirectory[2].VirtualAddress),EC32(optional_hdr32.DataDirectory[2].Size))),0,(dest,0));
+ }
+ }
+
+ /* PESpin 1.1 */
+
+ if((DCONF & PE_CONF_PESPIN) && nsections > 1 &&
+ vep >= exe_sections[nsections - 1].rva &&
+ vep < exe_sections[nsections - 1].rva + exe_sections[nsections - 1].rsz - 0x3217 - 4 &&
+ memcmp(epbuff+4, "\xe8\x00\x00\x00\x00\x8b\x1c\x24\x83\xc3", 10) == 0) {
+
+ char *spinned;
+
+ CLI_UNPSIZELIMITS("PEspin", fsize);
+
+ if((spinned = (char *) cli_malloc(fsize)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ lseek(desc, 0, SEEK_SET);
+ if((size_t) cli_readn(desc, spinned, fsize) != fsize) {
+ cli_dbgmsg("PESpin: Can't read %d bytes\n", fsize);
+ free(spinned);
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ CLI_UNPTEMP("PESpin",(spinned,exe_sections,0));
+ CLI_UNPRESULTS_("PEspin",SPINCASE(),(unspin(spinned, fsize, exe_sections, nsections - 1, vep, ndesc, ctx)),0,(spinned,0));
+ }
+
+
+ /* yC 1.3 */
+
+ if((DCONF & PE_CONF_YC) && nsections > 1 &&
+ EC32(optional_hdr32.AddressOfEntryPoint) == exe_sections[nsections - 1].rva + 0x60 &&
+ memcmp(epbuff, "\x55\x8B\xEC\x53\x56\x57\x60\xE8\x00\x00\x00\x00\x5D\x81\xED\x6C\x28\x40\x00\xB9\x5D\x34\x40\x00\x81\xE9\xC6\x28\x40\x00\x8B\xD5\x81\xC2\xC6\x28\x40\x00\x8D\x3A\x8B\xF7\x33\xC0\xEB\x04\x90\xEB\x01\xC2\xAC", 51) == 0 && fsize >= exe_sections[nsections - 1].raw + 0xC6 + 0xb97) {
+
+ char *spinned;
+
+ if((spinned = (char *) cli_malloc(fsize)) == NULL) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ lseek(desc, 0, SEEK_SET);
+ if((size_t) cli_readn(desc, spinned, fsize) != fsize) {
+ cli_dbgmsg("yC: Can't read %d bytes\n", fsize);
+ free(spinned);
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ CLI_UNPTEMP("yC",(spinned,exe_sections,0));
+ CLI_UNPRESULTS("yC",(yc_decrypt(spinned, fsize, exe_sections, nsections-1, e_lfanew, ndesc)),0,(spinned,0));
+ }
+
+
+ /* WWPack */
+
+ if((DCONF & PE_CONF_WWPACK) && nsections > 1 &&
+ exe_sections[nsections-1].raw>0x2b1 &&
+ vep == exe_sections[nsections - 1].rva &&
+ exe_sections[nsections - 1].rva + exe_sections[nsections - 1].rsz == max &&
+ memcmp(epbuff, "\x53\x55\x8b\xe8\x33\xdb\xeb", 7) == 0 &&
+ memcmp(epbuff+0x68, "\xe8\x00\x00\x00\x00\x58\x2d\x6d\x00\x00\x00\x50\x60\x33\xc9\x50\x58\x50\x50", 19) == 0) {
+ uint32_t headsize=exe_sections[nsections - 1].raw;
+ char *dest, *wwp;
+
+ for(i = 0 ; i < (unsigned int)nsections-1; i++)
+ if (exe_sections[i].raw<headsize) headsize=exe_sections[i].raw;
+
+ dsize = max-min+headsize-exe_sections[nsections - 1].rsz;
+
+ CLI_UNPSIZELIMITS("WWPack", dsize);
+
+ if((dest = (char *) cli_calloc(dsize, sizeof(char))) == NULL) {
+ cli_dbgmsg("WWPack: Can't allocate %d bytes\n", dsize);
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ lseek(desc, 0, SEEK_SET);
+ if((size_t) cli_readn(desc, dest, headsize) != headsize) {
+ cli_dbgmsg("WWPack: Can't read %d bytes from headers\n", headsize);
+ free(dest);
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ for(i = 0 ; i < (unsigned int)nsections-1; i++) {
+ if(exe_sections[i].rsz) {
+ if(!cli_seeksect(desc, &exe_sections[i]) || (unsigned int) cli_readn(desc, dest + headsize + exe_sections[i].rva - min, exe_sections[i].rsz) != exe_sections[i].rsz) {
+ free(dest);
+ free(exe_sections);
+ return CL_EIO;
+ }
+ }
+ }
+
+ if((wwp = (char *) cli_calloc(exe_sections[nsections - 1].rsz, sizeof(char))) == NULL) {
+ cli_dbgmsg("WWPack: Can't allocate %d bytes\n", exe_sections[nsections - 1].rsz);
+ free(dest);
+ free(exe_sections);
+ return CL_EMEM;
+ }
+
+ if(!cli_seeksect(desc, &exe_sections[nsections - 1]) || (size_t) cli_readn(desc, wwp, exe_sections[nsections - 1].rsz) != exe_sections[nsections - 1].rsz) {
+ cli_dbgmsg("WWPack: Can't read %d bytes from wwpack sect\n", exe_sections[nsections - 1].rsz);
+ free(dest);
+ free(wwp);
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ if (!wwunpack(dest, dsize, headsize, min, exe_sections[nsections-1].rva, e_lfanew, wwp, exe_sections[nsections - 1].rsz, nsections-1)) {
+
+ free(wwp);
+
+ CLI_UNPTEMP("WWPack",(dest,exe_sections,0));
+
+ if((unsigned int) write(ndesc, dest, dsize) != dsize) {
+ cli_dbgmsg("WWPack: Can't write %d bytes\n", dsize);
+ close(ndesc);
+ free(tempfile);
+ free(dest);
+ free(exe_sections);
+ return CL_EIO;
+ }
+
+ free(dest);
+ if (cli_leavetemps_flag)
+ cli_dbgmsg("WWPack: Unpacked and rebuilt executable saved in %s\n", tempfile);
+ else
+ cli_dbgmsg("WWPack: Unpacked and rebuilt executable\n");
+
+ fsync(ndesc);
+ lseek(ndesc, 0, SEEK_SET);
+
+ if(cli_magic_scandesc(ndesc, ctx) == CL_VIRUS) {
+ free(exe_sections);
+ close(ndesc);
+ if(!cli_leavetemps_flag)
+ unlink(tempfile);
+ free(tempfile);
+ return CL_VIRUS;
+ }
+
+ close(ndesc);
+ if(!cli_leavetemps_flag)
+ unlink(tempfile);
+ free(tempfile);
+ } else {
+ free(wwp);
+ free(dest);
+ cli_dbgmsg("WWPpack: Decompression failed\n");
+ }
+ }
+
+
+ /* ASPACK support */
+ while((DCONF & PE_CONF_ASPACK) && ep+58+0x70e < fsize && !memcmp(epbuff,"\x60\xe8\x03\x00\x00\x00\xe9\xeb",8)) {
+
+ if(epsize<0x3bf || memcmp(epbuff+0x3b9, "\x68\x00\x00\x00\x00\xc3",6)) break;
+ ssize = 0;
+ for(i=0 ; i< nsections ; i++)
+ if(ssize<exe_sections[i].rva+exe_sections[i].vsz)
+ ssize=exe_sections[i].rva+exe_sections[i].vsz;
+ if(!ssize) break;
+
+ CLI_UNPSIZELIMITS("Aspack", ssize);
+
+ if(!(src=(char *)cli_calloc(ssize, sizeof(char)))) {
+ free(exe_sections);
+ return CL_EMEM;
+ }
+ for(i = 0 ; i < (unsigned int)nsections; i++) {
+ if(!exe_sections[i].rsz) continue;
+ if(!cli_seeksect(desc, &exe_sections[i])) break;
+ if(!CLI_ISCONTAINED(src, ssize, src+exe_sections[i].rva, exe_sections[i].rsz)) break;
+ if(cli_readn(desc, src+exe_sections[i].rva, exe_sections[i].rsz)!=exe_sections[i].rsz) break;
+ }
+ if(i!=nsections) {
+ cli_dbgmsg("Aspack: Probably hacked/damaged Aspack file.\n");
+ free(src);
+ break;
+ }
+
+ CLI_UNPTEMP("Aspack",(src,exe_sections,0));
+ CLI_UNPRESULTS("Aspack",(unaspack212((uint8_t *)src, ssize, exe_sections, nsections, vep-1, EC32(optional_hdr32.ImageBase), ndesc)),1,(src,0));
+ break;
+ }
+
+ /* NsPack */
+
+ while (DCONF & PE_CONF_NSPACK) {
+ uint32_t eprva = vep;
+ uint32_t start_of_stuff, ssize, dsize, rep = ep;
+ unsigned int nowinldr;
+ char nbuff[24];
+ char *src=epbuff, *dest;
+
+ if (*epbuff=='\xe9') { /* bitched headers */
+ eprva = cli_readint32(epbuff+1)+vep+5;
+ if (!(rep = cli_rawaddr(eprva, exe_sections, nsections, &err, fsize, hdr_size)) && err) break;
+ if (lseek(desc, rep, SEEK_SET)==-1) break;
+ if (cli_readn(desc, nbuff, 24)!=24) break;
+ src = nbuff;
+ }
+
+ if (memcmp(src, "\x9c\x60\xe8\x00\x00\x00\x00\x5d\xb8\x07\x00\x00\x00", 13)) break;
+
+ nowinldr = 0x54-cli_readint32(src+17);
+ cli_dbgmsg("NsPack: Found *start_of_stuff @delta-%x\n", nowinldr);
+
+ if (lseek(desc, rep-nowinldr, SEEK_SET)==-1) break;
+ if (cli_readn(desc, nbuff, 4)!=4) break;
+ start_of_stuff=rep+cli_readint32(nbuff);
+ if (lseek(desc, start_of_stuff, SEEK_SET)==-1) break;
+ if (cli_readn(desc, nbuff, 20)!=20) break;
+ src = nbuff;
+ if (!cli_readint32(nbuff)) {
+ start_of_stuff+=4; /* FIXME: more to do */
+ src+=4;
+ }
+
+ ssize = cli_readint32(src+5)|0xff;
+ dsize = cli_readint32(src+9);
+
+ CLI_UNPSIZELIMITS("NsPack", MAX(ssize,dsize));
+
+ if ( !ssize || !dsize || dsize != exe_sections[0].vsz) break;
+ if (lseek(desc, start_of_stuff, SEEK_SET)==-1) break;
+ if (!(dest=cli_malloc(dsize))) break;
+ /* memset(dest, 0xfc, dsize); */
+
+ if (!(src=cli_malloc(ssize))) {
+ free(dest);
+ break;
+ }
+ /* memset(src, 0x00, ssize); */
+ cli_readn(desc, src, ssize);
+
+ eprva+=0x27a;
+ if (!(rep = cli_rawaddr(eprva, exe_sections, nsections, &err, fsize, hdr_size)) && err) {
+ free(dest);
+ free(src);
+ break;
+ }
+ if (lseek(desc, rep, SEEK_SET)==-1) {
+ free(dest);
+ free(src);
+ break;
+ }
+ if (cli_readn(desc, nbuff, 5)!=5) {
+ free(dest);
+ free(src);
+ break;
+ }
+ eprva=eprva+5+cli_readint32(nbuff+1);
+ cli_dbgmsg("NsPack: OEP = %08x\n", eprva);
+
+ CLI_UNPTEMP("NsPack",(src,dest,exe_sections,0));
+ CLI_UNPRESULTS("NsPack",(unspack(src, dest, ctx, exe_sections[0].rva, EC32(optional_hdr32.ImageBase), eprva, ndesc)),0,(src,dest,0));
+ break;
+ }
+
+ /* to be continued ... */
+
+ free(exe_sections);
+ return CL_CLEAN;
+}
+
+int cli_peheader(int desc, struct cli_exe_info *peinfo)
+{
+ uint16_t e_magic; /* DOS signature ("MZ") */
+ uint32_t e_lfanew; /* address of new exe header */
+ /* Obsolete - see below
+ uint32_t min = 0, max = 0;
+ */
+ struct pe_image_file_hdr file_hdr;
+ union {
+ struct pe_image_optional_hdr64 opt64;
+ struct pe_image_optional_hdr32 opt32;
+ } pe_opt;
+ struct pe_image_section_hdr *section_hdr;
+ struct stat sb;
+ int i;
+ unsigned int err, pe_plus = 0;
+ uint32_t valign, falign, hdr_size;
+ size_t fsize;
+
+ cli_dbgmsg("in cli_peheader\n");
+
+ if(fstat(desc, &sb) == -1) {
+ cli_dbgmsg("fstat failed\n");
+ return -1;
+ }
+
+ fsize = sb.st_size - peinfo->offset;
+
+ if(cli_readn(desc, &e_magic, sizeof(e_magic)) != sizeof(e_magic)) {
+ cli_dbgmsg("Can't read DOS signature\n");
+ return -1;
+ }
+
+ if(EC16(e_magic) != IMAGE_DOS_SIGNATURE && EC16(e_magic) != IMAGE_DOS_SIGNATURE_OLD) {
+ cli_dbgmsg("Invalid DOS signature\n");
+ return -1;
+ }
+
+ lseek(desc, 58, SEEK_CUR); /* skip to the end of the DOS header */
+
+ if(cli_readn(desc, &e_lfanew, sizeof(e_lfanew)) != sizeof(e_lfanew)) {
+ cli_dbgmsg("Can't read new header address\n");
+ /* truncated header? */
+ return -1;
+ }
+
+ e_lfanew = EC32(e_lfanew);
+ if(!e_lfanew) {
+ cli_dbgmsg("Not a PE file\n");
+ return -1;
+ }
+
+ if(lseek(desc, peinfo->offset + e_lfanew, SEEK_SET) < 0) {
+ /* probably not a PE file */
+ cli_dbgmsg("Can't lseek to e_lfanew\n");
+ return -1;
+ }
+
+ if(cli_readn(desc, &file_hdr, sizeof(struct pe_image_file_hdr)) != sizeof(struct pe_image_file_hdr)) {
+ /* bad information in e_lfanew - probably not a PE file */
+ cli_dbgmsg("Can't read file header\n");
+ return -1;
+ }
+
+ if(EC32(file_hdr.Magic) != IMAGE_NT_SIGNATURE) {
+ cli_dbgmsg("Invalid PE signature (probably NE file)\n");
+ return -1;
+ }
+
+ if ( (peinfo->nsections = EC16(file_hdr.NumberOfSections)) < 1 || peinfo->nsections > 96 ) return -1;
+
+ if (EC16(file_hdr.SizeOfOptionalHeader) < sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("SizeOfOptionalHeader too small\n");
+ return -1;
+ }
+
+ if(cli_readn(desc, &optional_hdr32, sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("Can't read optional file header\n");
+ return -1;
+ }
+
+ if(EC32(optional_hdr64.Magic)==PE32P_SIGNATURE) { /* PE+ */
+ if(EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr64)) {
+ cli_dbgmsg("Incorrect SizeOfOptionalHeader for PE32+\n");
+ return -1;
+ }
+ if(cli_readn(desc, &optional_hdr32 + 1, sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) != sizeof(struct pe_image_optional_hdr64) - sizeof(struct pe_image_optional_hdr32)) {
+ cli_dbgmsg("Can't read optional file header\n");
+ return -1;
+ }
+ hdr_size = EC32(optional_hdr64.SizeOfHeaders);
+ pe_plus=1;
+ } else { /* PE */
+ if (EC16(file_hdr.SizeOfOptionalHeader)!=sizeof(struct pe_image_optional_hdr32)) {
+ /* Seek to the end of the long header */
+ lseek(desc, (EC16(file_hdr.SizeOfOptionalHeader)-sizeof(struct pe_image_optional_hdr32)), SEEK_CUR);
+ }
+ hdr_size = EC32(optional_hdr32.SizeOfHeaders);
+ }
+
+ valign = (pe_plus)?EC32(optional_hdr64.SectionAlignment):EC32(optional_hdr32.SectionAlignment);
+ falign = (pe_plus)?EC32(optional_hdr64.FileAlignment):EC32(optional_hdr32.FileAlignment);
+
+ hdr_size = PESALIGN(hdr_size, valign);
+
+ peinfo->section = (struct cli_exe_section *) cli_calloc(peinfo->nsections, sizeof(struct cli_exe_section));
+
+ if(!peinfo->section) {
+ cli_dbgmsg("Can't allocate memory for section headers\n");
+ return -1;
+ }
+
+ section_hdr = (struct pe_image_section_hdr *) cli_calloc(peinfo->nsections, sizeof(struct pe_image_section_hdr));
+
+ if(!section_hdr) {
+ cli_dbgmsg("Can't allocate memory for section headers\n");
+ free(peinfo->section);
+ peinfo->section = NULL;
+ return -1;
+ }
+
+ if(cli_readn(desc, section_hdr, peinfo->nsections * sizeof(struct pe_image_section_hdr)) != peinfo->nsections * sizeof(struct pe_image_section_hdr)) {
+ cli_dbgmsg("Can't read section header\n");
+ cli_dbgmsg("Possibly broken PE file\n");
+ free(section_hdr);
+ free(peinfo->section);
+ peinfo->section = NULL;
+ return -1;
+ }
+
+ for(i = 0; falign!=0x200 && i<peinfo->nsections; i++) {
+ /* file alignment fallback mode - blah */
+ if (falign && section_hdr[i].SizeOfRawData && EC32(section_hdr[i].PointerToRawData)%falign && !(EC32(section_hdr[i].PointerToRawData)%0x200)) {
+ falign = 0x200;
+ }
+ }
+
+ for(i = 0; i < peinfo->nsections; i++) {
+ peinfo->section[i].rva = PEALIGN(EC32(section_hdr[i].VirtualAddress), valign);
+ peinfo->section[i].vsz = PESALIGN(EC32(section_hdr[i].VirtualSize), valign);
+ peinfo->section[i].raw = PEALIGN(EC32(section_hdr[i].PointerToRawData), falign);
+ peinfo->section[i].rsz = PESALIGN(EC32(section_hdr[i].SizeOfRawData), falign);
+
+ if (!peinfo->section[i].vsz && peinfo->section[i].rsz)
+ peinfo->section[i].vsz=PESALIGN(EC32(section_hdr[i].SizeOfRawData), valign);
+
+ if (peinfo->section[i].rsz && !CLI_ISCONTAINED(0, (uint32_t) fsize, peinfo->section[i].raw, peinfo->section[i].rsz))
+ peinfo->section[i].rsz = (fsize - peinfo->section[i].raw)*(fsize>peinfo->section[i].raw);
+ }
+
+ if(pe_plus)
+ peinfo->ep = EC32(optional_hdr64.AddressOfEntryPoint);
+ else
+ peinfo->ep = EC32(optional_hdr32.AddressOfEntryPoint);
+
+ if(!(peinfo->ep = cli_rawaddr(peinfo->ep, peinfo->section, peinfo->nsections, &err, fsize, hdr_size)) && err) {
+ cli_dbgmsg("Broken PE file\n");
+ free(section_hdr);
+ free(peinfo->section);
+ peinfo->section = NULL;
+ return -1;
+ }
+
+ free(section_hdr);
+ return 0;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_petite.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,497 @@
+/*
+ * Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+/*
+** petitep.c
+**
+** 09/07/2k4 - Dumped and reversed
+** 10/07/2k4 - Very 1st approach
+** 10/07/2k4 - PE stuff and main loop
+** 11/07/2k4 - Porting finished, tracking my bugs...
+** 12/07/2k4 - ARRRRRGHHH :D
+** 14/07/2k4 - Code cleaned
+** 15/07/2k4 - Securing && ClamAV porting
+** 21/07/2k4 - Unmangled imports now supported
+** 22/07/2k4 - Unstripped .relocs now supported
+**
+*/
+
+/*
+** Unpacks a buffer containing a petite 2.2 compressed
+** file. Doesn't perform Import Table unmangling. Doesn't
+** fixup call/jumps. Tries to "guess" the original sections
+** structure and entrypoint.
+**
+** Lotta phanx to Micky for patiently bearing my screams :P
+** Greets to Ian Luck: the SEH MOVSB thingy almost got me :O
+** TODO: Cope with level 0 and older petite versions.
+*/
+
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "cltypes.h"
+#include "rebuildpe.h"
+#include "execs.h"
+#include "others.h"
+#include "petite.h"
+
+
+static int doubledl(char **scur, uint8_t *mydlptr, char *buffer, uint32_t buffersize)
+{
+ unsigned char mydl = *mydlptr;
+ unsigned char olddl = mydl;
+
+ mydl*=2;
+ if ( !(olddl & 0x7f)) {
+ if ( *scur < buffer || *scur >= buffer+buffersize-1 )
+ return -1;
+ olddl = **scur;
+ mydl = olddl*2+1;
+ *scur=*scur + 1;
+ }
+ *mydlptr = mydl;
+ return (olddl>>7)&1;
+}
+
+int petite_inflate2x_1to9(char *buf, uint32_t minrva, uint32_t bufsz, struct cli_exe_section *sections, unsigned int sectcount, uint32_t Imagebase, uint32_t pep, int desc, int version, uint32_t ResRva, uint32_t ResSize)
+{
+ char *adjbuf = buf - minrva;
+ char *packed = NULL;
+ uint32_t thisrva=0, bottom = 0, enc_ep=0, irva=0, workdone=0, grown=0x355, skew=0x35;
+ int j = 0, oob, mangled = 0, check4resources=0;
+ struct cli_exe_section *usects = NULL;
+ void *tmpsct = NULL;
+
+ /*
+ -] The real thing [-
+ */
+
+ /* NOTE: (435063->4350a5) Petite kernel32!imports and error strings */
+
+ /* Here we adjust the start of packed blob, the size of petite code,
+ * the difference in size if relocs were stripped
+ * See below...
+ */
+
+ if ( version == 2 )
+ packed = adjbuf + sections[sectcount-1].rva + 0x1b8;
+ if ( version == 1 ) {
+ packed = adjbuf + sections[sectcount-1].rva + 0x178;
+ grown=0x323; /* My name is Harry potter */
+ skew=0x34;
+ }
+
+ while (1) {
+ char *ssrc, *ddst;
+ uint32_t size, srva;
+ int backbytes, oldback, backsize, addsize;
+
+ if ( ! CLI_ISCONTAINED(buf, bufsz, packed, 4)) {
+ if (usects)
+ free(usects);
+ return 1;
+ }
+ srva = cli_readint32(packed);
+
+ if (! srva) {
+ /* WERE DONE !!! :D */
+ int t, upd = 1;
+
+ if ( j <= 0 ) /* Some non petite compressed files will get here */
+ return 1;
+
+ /* Select * from sections order by rva asc; */
+ while ( upd ) {
+ upd = 0;
+ for (t = 0; t < j-1 ; t++) {
+ uint32_t trva, trsz, tvsz;
+
+ if ( usects[t].rva <= usects[t+1].rva )
+ continue;
+ trva = usects[t].rva;
+ trsz = usects[t].rsz;
+ tvsz = usects[t].vsz;
+ usects[t].rva = usects[t+1].rva;
+ usects[t].rsz = usects[t+1].rsz;
+ usects[t].vsz = usects[t+1].vsz;
+ usects[t+1].rva = trva;
+ usects[t+1].rsz = trsz;
+ usects[t+1].vsz = tvsz;
+ upd = 1;
+ }
+ }
+
+ /* Computes virtualsize... we try to guess, actually :O */
+ for (t = 0; t < j-1 ; t++) {
+ if ( usects[t].vsz != usects[t+1].rva - usects[t].rva )
+ usects[t].vsz = usects[t+1].rva - usects[t].rva;
+ }
+
+ /*
+ * Our encryption is pathetic and out software is lame but
+ * we need to claim it's unbreakable.
+ * So why dont we just mangle the imports and encrypt the EP?!
+ */
+
+ /* Decrypts old entrypoint if we got enough clues */
+ if (enc_ep) {
+ uint32_t virtaddr = pep + 5 + Imagebase, tmpep;
+ int rndm = 0, dummy = 1;
+ char *thunk = adjbuf+irva;
+ char *imports;
+
+ if ( version == 2 ) { /* 2.2 onley */
+
+ while ( dummy && CLI_ISCONTAINED(buf, bufsz, thunk, 4) ) {
+ uint32_t api;
+
+ if (! cli_readint32(thunk)) {
+ workdone = 1;
+ break;
+ }
+
+ imports = adjbuf + cli_readint32(thunk);
+ thunk+=4;
+ dummy = 0;
+
+ while ( CLI_ISCONTAINED(buf, bufsz, imports, 4)) {
+ dummy = 0;
+
+ imports+=4;
+ if ( ! (api = cli_readint32(imports-4)) ) {
+ dummy = 1;
+ break;
+ }
+ if ( (api != (api | 0x80000000)) && mangled && --rndm < 0) {
+ api = virtaddr;
+ virtaddr +=5; /* EB + 1 double */
+ rndm = virtaddr & 7;
+ } else {
+ api = 0xbff01337; /* KERNEL32!leet */
+ }
+ if (sections[sectcount-1].rva+Imagebase < api )
+ enc_ep--;
+ if ( api < virtaddr )
+ enc_ep--;
+ tmpep = (enc_ep & 0xfffffff8)>>3 & 0x1fffffff;
+ enc_ep = (enc_ep & 7)<<29 | tmpep;
+ }
+ }
+ } else
+ workdone = 1;
+ enc_ep = pep+5+enc_ep;
+ if ( workdone == 1 ) {
+ cli_dbgmsg("Petite: Old EP: %x\n", enc_ep);
+ } else {
+ enc_ep = usects[0].rva;
+ cli_dbgmsg("Petite: In troubles while attempting to decrypt old EP, using bogus %x\n", enc_ep);
+ }
+ }
+
+ /* Let's compact data */
+ for (t = 0; t < j ; t++) {
+ usects[t].raw = (t>0)?(usects[t-1].raw + usects[t-1].rsz):0;
+ if (usects[t].rsz != 0 && CLI_ISCONTAINED(buf, bufsz, buf + usects[t].raw, usects[t].rsz))
+ memmove(buf + usects[t].raw, adjbuf + usects[t].rva, usects[t].rsz);
+ }
+
+ /* Showtime!!! */
+ cli_dbgmsg("Petite: Sections dump:\n");
+ for (t = 0; t < j ; t++)
+ cli_dbgmsg("Petite: .SECT%d RVA:%x VSize:%x ROffset: %x, RSize:%x\n", t, usects[t].rva, usects[t].vsz, usects[t].raw, usects[t].rsz);
+ if (! cli_rebuildpe(buf, usects, j, Imagebase, enc_ep, ResRva, ResSize, desc)) {
+ cli_dbgmsg("Petite: Rebuilding failed\n");
+ free(usects);
+ return 1;
+ }
+ free(usects);
+ return 0;
+ }
+
+
+ size = srva & 0x7fffffff;
+ if ( srva != size ) { /* Test and clear bit 31 */
+ check4resources=0;
+ /*
+ Enumerates each petite data section
+ I should get here once ot twice:
+ - 1 time for the resource section (if present)
+ - 1 time for the all_the_rest section
+ */
+
+ if ( ! CLI_ISCONTAINED(buf, bufsz, packed+4, 8) ) {
+ if (usects)
+ free(usects);
+ return 1;
+ }
+ /* Save the end of current packed section for later use */
+ bottom = cli_readint32(packed+8) + 4;
+ ssrc = adjbuf + cli_readint32(packed+4) - (size-1)*4;
+ ddst = adjbuf + cli_readint32(packed+8) - (size-1)*4;
+
+ if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, size*4) || !CLI_ISCONTAINED(buf, bufsz, ddst, size*4) ) {
+ if (usects)
+ free(usects);
+ return 1;
+ }
+
+ /* Copy packed data to the end of the current packed section */
+ memmove(ddst, ssrc, size*4);
+ packed += 0x0c;
+ } else {
+ uint32_t check1, check2;
+ uint8_t mydl = 0;
+ uint8_t goback;
+
+ /* Unpak each original section in turn */
+
+ if ( ! CLI_ISCONTAINED(buf, bufsz, packed+4, 8)) {
+ if (usects)
+ free(usects);
+ return 1;
+ }
+
+ size = cli_readint32(packed+4); /* How many bytes to unpack */
+ thisrva=cli_readint32(packed+8); /* RVA of the original section */
+ packed += 0x10;
+
+ if ( j >= 96 ) {
+ cli_dbgmsg("Petite: maximum number of sections exceeded, giving up.\n");
+ free(usects);
+ return 1;
+ }
+ /* Alloc 1 more struct */
+ if ( ! (tmpsct = cli_realloc(usects, sizeof(struct cli_exe_section) * (j+1))) ) {
+ if (usects)
+ free(usects);
+ return 1;
+ }
+
+ usects = (struct cli_exe_section *) tmpsct;
+ /* Save section spex for later rebuilding */
+ usects[j].rva = thisrva;
+ usects[j].rsz = size;
+ if ( (int)(bottom - thisrva) >0 )
+ usects[j].vsz = bottom - thisrva;
+ else
+ usects[j].vsz = size;
+ usects[j].raw = 0; /* Cheaper than memset */
+
+ if (!size) { /* That's a ghost section! reloc any1? :P */
+ j++;
+ continue;
+ }
+
+ ssrc = adjbuf + srva;
+ ddst = adjbuf + thisrva;
+
+ /* Last petite section (unpacked 1st) could contain unpacked data
+ * (eg the icon): let's fix the rva
+ */
+
+ if (!check4resources) {
+ unsigned int q;
+ for ( q = 0 ; q < sectcount ; q++ ) {
+ if ( thisrva <= sections[q].rva || thisrva >= sections[q].rva + sections[q].vsz)
+ continue;
+ usects[j].rva = sections[q].rva;
+ usects[j].rsz = thisrva - sections[q].rva + size;
+ break;
+ }
+ }
+
+ /* Increase count of unpacked sections */
+ j++;
+
+
+ /* Setup some crap for later checks */
+ if ( size < 0x10000 ) {
+ check1 = 0x0FFFFC060;
+ check2 = 0x0FFFFFC60;
+ goback = 5;
+ } else if ( size < 0x40000 ) {
+ check1 = 0x0FFFF8180;
+ check2 = 0x0FFFFF980;
+ goback = 7;
+ } else {
+ check1 = 0x0FFFF8300;
+ check2 = 0x0FFFFFB00;
+ goback = 8;
+ }
+
+ /*
+ * NOTE: on last loop we get esi=edi=ImageBase (which is not writeable)
+ * The movsb on the next line causes the iat_rebuild_and_decrypt_oldEP()
+ * func to get called instead... ehehe very smart ;)
+ */
+
+ if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, 1) || !CLI_ISCONTAINED(buf, bufsz, ddst, 1)) {
+ free(usects);
+ return 1;
+ }
+
+ size--;
+ *ddst++=*ssrc++; /* eheh u C gurus gotta luv these monsters :P */
+ backbytes=0;
+ oldback = 0;
+
+ /* No surprises here... NRV any1??? ;) */
+ while (size > 0) {
+ oob = doubledl(&ssrc, &mydl, buf, bufsz);
+ if ( oob == -1 ) {
+ free(usects);
+ return 1;
+ }
+ if (!oob) {
+ if ( !CLI_ISCONTAINED(buf, bufsz, ssrc, 1) || !CLI_ISCONTAINED(buf, bufsz, ddst, 1) ) {
+ free(usects);
+ return 1;
+ }
+ *ddst++ = (char)((*ssrc++)^(size & 0xff));
+ size--;
+ } else {
+ addsize = 0;
+ backbytes++;
+ while (1) {
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ backbytes = backbytes*2 + oob;
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ if (!oob)
+ break;
+ }
+ backbytes -= 3;
+ if ( backbytes >= 0 ) {
+ backsize = goback;
+ do {
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ backbytes = backbytes*2 + oob;
+ backsize--;
+ } while (backsize);
+ backbytes^=0xffffffff;
+ addsize += 1 + ( backbytes < (int) check2 ) + ( backbytes < (int) check1 );
+ oldback = backbytes;
+ } else {
+ backsize = backbytes+1;
+ backbytes = oldback;
+ }
+
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ backsize = backsize*2 + oob;
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ backsize = backsize*2 + oob;
+ if (!backsize) {
+ backsize++;
+ while (1) {
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ backsize = backsize*2 + oob;
+ if ( (oob = doubledl(&ssrc, &mydl, buf, bufsz)) == -1 ) {
+ free(usects);
+ return 1;
+ }
+ if (!oob)
+ break;
+ }
+ backsize+=2;
+ }
+ backsize+=addsize;
+ size-=backsize;
+ if(!CLI_ISCONTAINED(buf, bufsz, ddst, backsize) || !CLI_ISCONTAINED(buf, bufsz, ddst+backbytes, backsize)) {
+ free(usects);
+ return 1;
+ }
+ while(backsize--) {
+ *ddst=*(ddst+backbytes);
+ ddst++;
+ }
+ backbytes=0;
+ backsize=0;
+ } /* else */
+ } /* while(ebx) */
+
+ /* Any lame petite code here? If so let's strip it
+ * We've done version adjustments already, see above
+ */
+
+ if ( j ) {
+ int strippetite=0;
+ uint32_t reloc;
+
+ /* LONG MAGIC = 33C05E64 8B188B1B 8D63D65D */
+ if ( usects[j-1].rsz > grown &&
+ CLI_ISCONTAINED(buf, bufsz, ddst-grown+5+0x4f, 8) &&
+ cli_readint32(ddst-grown+5+0x4f) == 0x645ec033 &&
+ cli_readint32(ddst-grown+5+0x4f+4) == 0x1b8b188b
+ ) {
+ reloc = 0;
+ strippetite = 1;
+ }
+ if ( !strippetite &&
+ usects[j-1].rsz > grown+skew &&
+ CLI_ISCONTAINED(buf, bufsz, ddst-grown+5+0x4f-skew, 8) &&
+ cli_readint32(ddst-grown+5+0x4f-skew) == 0x645ec033 &&
+ cli_readint32(ddst-grown+5+0x4f+4-skew) == 0x1b8b188b
+ ) {
+ reloc = skew; /* If the original exe had a .reloc were skewed */
+ strippetite = 1;
+ }
+
+ if (strippetite && CLI_ISCONTAINED(buf, bufsz, ddst-grown+0x0f-8-reloc, 8)) {
+ uint32_t test1, test2;
+
+ /* REMINDER: DON'T BPX IN HERE U DUMBASS!!!!!!!!!!!!!!!!!!!!!!!! */
+ test1 = cli_readint32(ddst-grown+0x0f-8-reloc)^0x9d6661aa;
+ test2 = cli_readint32(ddst-grown+0x0f-4-reloc)^0xe908c483;
+
+ cli_dbgmsg("Petite: Found petite code in sect%d(%x). Let's strip it.\n", j-1, usects[j-1].rva);
+ if (test1 == test2 && CLI_ISCONTAINED(buf, bufsz, ddst-grown+0x0f-reloc, 0x1c0-0x0f+4)) {
+ irva = cli_readint32(ddst-grown+0x121-reloc);
+ enc_ep = cli_readint32(ddst-grown+0x0f-reloc)^test1;
+ mangled = ((uint32_t) cli_readint32(ddst-grown+0x1c0-reloc) != 0x90909090); /* FIXME: Magic's too short??? */
+ cli_dbgmsg("Petite: Encrypted EP: %x | Array of imports: %x\n",enc_ep, irva);
+ }
+ usects[j-1].rsz -= grown+reloc;
+ }
+ }
+ check4resources++;
+ } /* outer else */
+ } /* while true */
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_domaincheck_db.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,96 @@
+/*
+ * Phishing module: domain list implementation.
+ *
+ * Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "phishcheck.h"
+#include "phish_domaincheck_db.h"
+#include "regex_list.h"
+
+int domainlist_match(const struct cl_engine* engine,char* real_url,const char* display_url,const struct pre_fixup_info* pre_fixup,int hostOnly,unsigned short* flags)
+{
+ const char* info;
+ int rc = engine->domainlist_matcher ? regex_list_match(engine->domainlist_matcher,real_url,display_url,hostOnly ? pre_fixup : NULL,hostOnly,&info,0) : 0;
+ if(rc && info && info[0] && info[0] != ':') {/*match successful, and has custom flags*/
+ if(strlen(info)==3 && isxdigit(info[0]) && isxdigit(info[1]) && isxdigit(info[2])) {
+ unsigned short notwantedflags=0;
+ sscanf(info,"%hx",¬wantedflags);
+ *flags &= ~notwantedflags;/* filter unwanted phishcheck flags */
+ }
+ else {
+ cli_warnmsg("Phishcheck:Unknown flag format in domain-list, 3 hex digits expected");
+ }
+ }
+ return rc;
+}
+
+int init_domainlist(struct cl_engine* engine)
+{
+ if(engine) {
+ engine->domainlist_matcher = (struct regex_matcher *) cli_malloc(sizeof(struct regex_matcher));
+ if(!engine->domainlist_matcher)
+ return CL_EMEM;
+ return init_regex_list(engine->domainlist_matcher);
+ }
+ else
+ return CL_ENULLARG;
+}
+
+int is_domainlist_ok(const struct cl_engine* engine)
+{
+ return (engine && engine->domainlist_matcher) ? is_regex_ok(engine->domainlist_matcher) : 1;
+}
+
+void domainlist_cleanup(const struct cl_engine* engine)
+{
+ if(engine && engine->domainlist_matcher) {
+ regex_list_cleanup(engine->domainlist_matcher);
+ }
+}
+
+void domainlist_done(struct cl_engine* engine)
+{
+ if(engine && engine->domainlist_matcher) {
+ regex_list_done(engine->domainlist_matcher);
+ free(engine->domainlist_matcher);
+ engine->domainlist_matcher = NULL;
+ }
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phish_whitelist.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,86 @@
+/*
+ * Phishing module: whitelist implementation.
+ *
+ * Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "phish_whitelist.h"
+#include "regex_list.h"
+
+int whitelist_match(const struct cl_engine* engine,char* real_url,const char* display_url,int hostOnly)
+{
+ const char* info;/*unused*/
+ cli_dbgmsg("Phishing: looking up in whitelist: %s:%s; host-only:%d\n",real_url,display_url,hostOnly);
+ return engine->whitelist_matcher ? regex_list_match(engine->whitelist_matcher,real_url,display_url,NULL,hostOnly,&info,1) : 0;
+}
+
+int init_whitelist(struct cl_engine* engine)
+{
+ if(engine) {
+ engine->whitelist_matcher = (struct regex_matcher *) cli_malloc(sizeof(struct regex_matcher));
+ if(!engine->whitelist_matcher)
+ return CL_EMEM;
+ return init_regex_list(engine->whitelist_matcher);
+ }
+ else
+ return CL_ENULLARG;
+}
+
+int is_whitelist_ok(const struct cl_engine* engine)
+{
+ return (engine && engine->whitelist_matcher) ? is_regex_ok(engine->whitelist_matcher) : 1;
+}
+
+void whitelist_cleanup(const struct cl_engine* engine)
+{
+ if(engine && engine->whitelist_matcher) {
+ regex_list_cleanup(engine->whitelist_matcher);
+ }
+}
+
+void whitelist_done(struct cl_engine* engine)
+{
+ if(engine && engine->whitelist_matcher) {
+ regex_list_done(engine->whitelist_matcher);
+ free(engine->whitelist_matcher);
+ engine->whitelist_matcher = NULL;
+ }
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_phishcheck.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1336 @@
+/*
+ * Detect phishing, based on URL spoofing detection.
+ *
+ * Copyright (C) 2006-2007 Török Edvin <edwin at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#ifndef CL_DEBUG
+#define NDEBUG
+#endif
+
+#ifdef CL_THREAD_SAFE
+#ifndef _REENTRANT
+#define _REENTRANT
+#endif
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clamav.h"
+#include "others.h"
+#include "mbox.h"
+#include "message.h"
+#include "htmlnorm.h"
+#include "phishcheck.h"
+#include "phish_domaincheck_db.h"
+#include "phish_whitelist.h"
+#include "iana_tld.h"
+
+
+#define DOMAIN_REAL 1
+#define DOMAIN_DISPLAY 0
+
+#define PHISHY_USERNAME_IN_URL 1
+#define PHISHY_NUMERIC_IP 2
+#define REAL_IS_MAILTO 4
+/* this is just a flag, so that the displayed url will be parsed as mailto too, for example
+ * <a href='mailto:somebody at yahoo.com'>to:somebody at yahoo.com</a>*/
+#define DOMAIN_LISTED 8
+#define PHISHY_CLOAKED_NULL 16
+#define PHISHY_HEX_URL 32
+
+/*
+* Phishing design documentation,
+(initially written at http://wiki.clamav.net/index.php/phishing_design as discussed with aCaB)
+
+TODO:update this doc
+
+*Warning*: if flag *--phish-scan-alldomains* (or equivalent clamd/clamav-milter config option) isn't given, then phishing scanning is done only for domains listed in daily.pdb.
+If your daily.pdb is empty, then by default NO PHISHING is DONE, UNLESS you give the *--phish-scan-alldomains*
+This is just a side-effect, daily.pdb is empty, because it isn't yet officialy in daily.cvd.
+
+phishingCheck() determines if @displayedLink is a legit representation of @realLink.
+
+Steps:
+
+1. if _realLink_ *==* _displayLink_ => *CLEAN*
+
+2. url cleanup (normalization)
+- whitespace elimination
+- html entity conversion
+- convert hostname to lowercase
+- normalize \ to /
+If there is a dot after the last space, then all spaces are replaced with dots,
+otherwise spaces are stripped.
+So both: 'Go to yahoo.com', and 'Go to e b a y . c o m', and 'Go to ebay. com' will work.
+
+
+3. Matched the urls against a _whitelist_:
+a _realLink_, _displayedLink_ pair is matched against the _whitelist_.
+the _whitelist_ is a list of pairs of realLink, displayedLink. Any of the elements of those pairs can be a _regex_.
+ if url *is found* in _whitelist_ --> *CLEAN*
+
+4. URL is looked up in the _domainlist_, unless disabled via flags (_--phish-scan-alldomains_).
+The _domainlist_ is a list of pairs of realLink, displayedLink (any of which can be regex).
+This is the list of domains we do phishing detection for (such as ebay,paypal,chase,....)
+We can't decide to stop processing here or not, so we just set a flag.
+
+Note(*!*): the flags are modified by the the domainlist checker. If domain is found, then the flags associated with it filter the default compile-time flags.
+
+5. _Hostname_ is extracted from the _displayed URL_.
+It is checked against the _whitelist_, and _domainlist_.
+
+6. Now we know if we want to stop processing.
+If we are only scanning domains in the _domainlist_ (default behaviour), and the url/domain
+isn't found in it, we return (and mark url as not_list/clean).
+If we scan all domains, then the domainlist isn't even checked.
+
+7. URL cloak check.
+check for %00, and hex-encoded IPs in URL.
+
+8. Skip empty displayedURLs
+
+9. SSL mismatch detection.
+Checks if realLink is http, but displayedLink is https or viceversa.
+(by default the SSL detection is done for hrefs only, not for imgs)
+
+10. Hostname of real URL is extracted.
+
+12. Numeric IP detection.
+If url is a numeric IP, then -> phish.
+Maybe we should do DNS lookup?
+Maybe we should disable numericIP checks for --phish-scan-alldomains?
+
+13. isURL(displayedLink).
+Checks if displayedLink is really a url.
+if not -> clean
+
+14. Hostnames of real, displayedLink are compared. If equal -> clean
+
+15. Extract domain names, and compare. If equal -> clean
+
+16. Do DNS lookups/reverse lookups. Disabled now (too much load/too many lookups). *
+
+For the Whitelist(.wdb)/Domainlist(.pdb) format see regex_list.c (search for Flags)
+ *
+ */
+
+/* Constant strings and tables */
+static char empty_string[]="";
+
+
+#define ANY_CLOAK "(0[xX][0-9a-fA-F]+|[0-9]+)"
+#define CLOAKED_URL "^"ANY_CLOAK"(\\."ANY_CLOAK"){0,3}$"
+
+static const char cloaked_host_regex[] = CLOAKED_URL;
+static const char tld_regex[] = "^"iana_tld"$";
+static const char cctld_regex[] = "^"iana_cctld"$";
+static const char dotnet[] = ".net";
+static const char adonet[] = "ado.net";
+static const char aspnet[] = "asp.net";
+/* ; is replaced by ' ' so omit it here*/
+static const char lt[]="<";
+static const char gt[]=">";
+static const char src_text[] = "src";
+static const char href_text[] = "href";
+static const char mailto[] = "mailto:";
+static const char https[]="https://";
+
+static const size_t href_text_len = sizeof(href_text);
+static const size_t src_text_len = sizeof(src_text);
+static const size_t dotnet_len = sizeof(dotnet)-1;
+static const size_t adonet_len = sizeof(adonet)-1;
+static const size_t aspnet_len = sizeof(aspnet)-1;
+static const size_t lt_len = sizeof(lt)-1;
+static const size_t gt_len = sizeof(gt)-1;
+static const size_t mailto_len = sizeof(mailto)-1;
+static const size_t https_len = sizeof(https)-1;
+
+/* for urls, including mailto: urls, and (broken) http:www... style urls*/
+/* refer to: http://www.w3.org/Addressing/URL/5_URI_BNF.html
+ * Modifications: don't allow empty domains/subdomains, such as www..com <- that is no url
+ * So the 'safe' char class has been split up
+ * */
+/* character classes */
+#define URI_alpha "a-zA-Z"
+#define URI_digit "0-9"
+#define URI_safe_nodot "-$_@&"
+#define URI_safe "-$_ at .&"
+#define URI_extra "!*\"'(),"
+#define URI_reserved "=;/#?: "
+#define URI_national "{}|[]\\^~"
+#define URI_punctuation "<>"
+
+#define URI_hex "[0-9a-fA-f]"
+#define URI_escape "%"URI_hex"{2}"
+#define URI_xalpha "([" URI_safe URI_alpha URI_digit URI_extra "]|"URI_escape")" /* URI_safe has to be first, because it contains - */
+#define URI_xalpha_nodot "([" URI_safe_nodot URI_alpha URI_digit URI_extra "]|"URI_escape")"
+
+#define URI_xalphas URI_xalpha"+"
+#define URI_xalphas_nodot URI_xalpha_nodot"*"
+
+#define URI_ialpha "["URI_alpha"]"URI_xalphas_nodot""
+#define URI_xpalpha URI_xalpha"|\\+"
+#define URI_xpalpha_nodot URI_xalpha_nodot"|\\+"
+#define URI_xpalphas "("URI_xpalpha")+"
+#define URI_xpalphas_nodot "("URI_xpalpha_nodot")+"
+#define optional_URI_xpalphas "("URI_xpalpha"|=)*"
+
+#define URI_scheme URI_ialpha
+#define URI_tld iana_tld
+#define URI_path1 URI_xpalphas_nodot"\\.("URI_xpalphas_nodot"\\.)*"
+#define URI_path2 URI_tld
+#define URI_path3 "(/"optional_URI_xpalphas")*"
+
+#define URI_search "("URI_xalphas")*"
+#define URI_fragmentid URI_xalphas
+
+#define URI_IP_digits "["URI_digit"]{1,3}"
+#define URI_numeric_path URI_IP_digits"(\\."URI_IP_digits"){3}(:"URI_xpalphas_nodot")?(/("URI_xpalphas"/?)*)?"
+#define URI_numeric_URI "("URI_scheme":(//)?)?"URI_numeric_path"(\\?" URI_search")?"
+#define URI_numeric_fragmentaddress URI_numeric_URI"(#"URI_fragmentid")?"
+
+#define URI_URI1 "("URI_scheme":(//)?)?"URI_path1
+#define URI_URI2 URI_path2
+#define URI_URI3 URI_path3"(\\?" URI_search")?"
+
+#define URI_fragmentaddress1 URI_URI1
+#define URI_fragmentaddress2 URI_URI2
+#define URI_fragmentaddress3 URI_URI3"(#"URI_fragmentid")?"
+
+#define URI_CHECK_PROTOCOLS "(http|https|ftp|mailto)://.+"
+
+/*Warning: take care when modifying this regex, it has been tweaked, and tuned, just don't break it please.
+ * there is fragmentaddress1, and 2 to work around the ISO limitation of 509 bytes max length for string constants*/
+static const char numeric_url_regex[] = "^ *"URI_numeric_fragmentaddress" *$";
+
+/* generated by contrib/phishing/generate_tables.c */
+static const short int hextable[256] = {
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
+};
+
+/* Prototypes*/
+static void string_init_c(struct string* dest,char* data);
+static void string_assign_null(struct string* dest);
+static char *rfind(char *start, char c, size_t len);
+static char hex2int(const unsigned char* src);
+static int isTLD(const struct phishcheck* pchk,const char* str,int len);
+static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls);
+static const char* phishing_ret_toString(enum phish_status rc);
+
+static void url_check_init(struct url_check* urls)
+{
+ string_init_c(&urls->realLink, NULL);
+ string_init_c(&urls->displayLink, NULL);
+ string_init_c(&urls->pre_fixup.pre_displayLink, NULL);
+}
+
+/* string reference counting implementation,
+ * so that: we don't have to keep in mind who allocated what, and when needs to be freed,
+ * and thus we won't leak memory*/
+
+static void string_free(struct string* str)
+{
+ for(;;){
+ str->refcount--;
+ if(!str->refcount) {
+ if(str->ref)/* don't free, this is a portion of another string */
+ str=str->ref;/* try to free that one*/
+ else {
+ if(str->data)
+ free(str->data);
+ break;
+ }
+ }
+ else break;
+ }
+}
+
+/* always use the string_assign when assigning to a string, this makes sure the old one's reference count is incremented*/
+static void string_assign(struct string* dest,struct string* src)
+{
+ string_free(dest);
+ src->refcount++;
+ dest->data=src->data;
+ dest->refcount=1;
+ dest->ref=src;
+}
+
+/* data will be freed when string freed */
+/* it doesn't free old string, use only for initialization
+ * Doesn't allow NULL pointers, they are replaced by pointer to empty string
+ * */
+static void string_init_c(struct string* dest,char* data)
+{
+ dest->refcount = data ? 1 : 0;
+ dest->data = data ? data : empty_string;
+ dest->ref = NULL;
+}
+
+/* make a copy of the string between start -> end*/
+static int string_assign_dup(struct string* dest,const char* start,const char* end)
+{
+ char* ret = cli_malloc(end-start+1);
+ if(!ret)
+ return CL_EMEM;
+ strncpy(ret,start,end-start);
+ ret[end-start]='\0';
+
+ string_free(dest);
+ dest->data=ret;
+ dest->refcount=1;
+ dest->ref=NULL;
+ return CL_SUCCESS;
+}
+
+static void string_assign_null(struct string* dest)
+{
+ if(dest) {
+ string_free(dest);
+ dest->data=empty_string;
+ dest->refcount=-1;/* don't free it! */
+ dest->ref=NULL;
+ }
+}
+
+/* this string uses portion of another string*/
+static void string_assign_ref(struct string* dest,struct string* ref,char* data)
+{
+ string_free(dest);
+ ref->refcount++;
+ dest->data=data;
+ dest->refcount=1;
+ dest->ref=ref;
+}
+
+static void free_if_needed(struct url_check* url)
+{
+ string_free(&url->realLink);
+ string_free(&url->displayLink);
+ string_free(&url->pre_fixup.pre_displayLink);
+}
+
+static int build_regex(regex_t* preg,const char* regex,int nosub)
+{
+ int rc;
+ cli_dbgmsg("Phishcheck: Compiling regex: %s\n",regex);
+ rc = cli_regcomp(preg,regex,REG_EXTENDED|REG_ICASE|(nosub ? REG_NOSUB :0));
+ if(rc) {
+
+#ifdef C_WINDOWS
+ cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks\n");
+#else
+ size_t buflen = cli_regerror(rc,preg,NULL,0);
+ char *errbuf = cli_malloc(buflen);
+
+ if(errbuf) {
+ cli_regerror(rc,preg,errbuf,buflen);
+ cli_errmsg("Phishcheck: Error in compiling regex:%s\nDisabling phishing checks\n",errbuf);
+ free(errbuf);
+ } else
+ cli_errmsg("Phishcheck: Error in compiling regex, disabling phishing checks. Additionally an Out-of-memory error was encountered while generating a detailed error message\n");
+#endif
+ return 1;
+ }
+ return CL_SUCCESS;
+}
+
+/* allocates memory */
+static int get_host(const struct phishcheck* s,const char* URL,int isReal,int* phishy,const char **hstart, const char **hend)
+{
+ int rc,ismailto = 0;
+ const char* start;
+ const char* end=NULL;
+ if(!URL) {
+ *hstart=*hend=NULL;
+ return 0;
+ }
+ start = strstr(URL,"://");
+ if(!start) {
+ if(!strncmp(URL,mailto,mailto_len)) {
+ start = URL + mailto_len;
+ ismailto = 1;
+ }
+ else if (!isReal && *phishy&REAL_IS_MAILTO) {
+ /* it is not required to use mailto: in the displayed url, they might use to:, or whatever */
+ end = URL+strlen(URL)+1;
+ start = URL + strcspn(URL,": ")+1;
+ if (start==end)
+ start = URL;
+ ismailto = 1;
+ }
+ else {
+ start=URL;/*URL without protocol*/
+ if(isReal)
+ cli_dbgmsg("Phishcheck: Real URL without protocol: %s\n",URL);
+ else ismailto=2;/*no-protocol, might be mailto, @ is no problem*/
+ }
+ }
+ else
+ start += 3; /* :// */
+
+ if(!ismailto || !isReal) {
+ const char *realhost,*tld;
+
+ do {
+ end = start + strcspn(start,":/?");
+ realhost = strchr(start,'@');
+
+ if(realhost == NULL || (start!=end && realhost>end)) {
+ /*don't check beyond end of hostname*/
+ break;
+ }
+
+ tld = strrchr(realhost,'.');
+ rc = tld ? isTLD(s,tld,tld-realhost-1) : 0;
+ if(rc < 0)
+ return rc;
+ if(rc)
+ *phishy |= PHISHY_USERNAME_IN_URL;/* if the url contains a username that is there just to fool people,
+ like http://www.ebay.com@somevilplace.someevildomain.com/ */
+ start = realhost+1;/*skip the username*/
+ } while(realhost);/*skip over multiple @ characters, text following last @ character is the real host*/
+ }
+ else if (ismailto && isReal)
+ *phishy |= REAL_IS_MAILTO;
+
+ if(!end) {
+ end = start + strcspn(start,":/?");/*especially important for mailto:somebody at yahoo.com?subject=...*/
+ if(!end)
+ end = start + strlen(start);
+ }
+ *hstart = start;
+ *hend = end;
+ return 0;
+}
+
+static int isCountryCode(const struct phishcheck* s,const char* str)
+{
+ return str ? !cli_regexec(&s->preg_cctld,str,0,NULL,0) : 0;
+}
+
+static int isTLD(const struct phishcheck* pchk,const char* str,int len)
+{
+ if (!str)
+ return 0;
+ else {
+ char* s = cli_malloc(len+1);
+ int rc;
+
+ if(!s)
+ return CL_EMEM;
+ strncpy(s,str,len);
+ s[len]='\0';
+ rc = !cli_regexec(&pchk->preg_tld,s,0,NULL,0);
+ free(s);
+ return rc ? 1 : 0;
+ }
+}
+
+/*
+ * memrchr isn't standard, so I use this
+ */
+static char *
+rfind(char *start, char c, size_t len)
+{
+ char *p;
+
+ if(start == NULL)
+ return NULL;
+
+ for(p = start + len; (p >= start) && (*p != c); p--)
+ ;
+ return (p < start) ? NULL : p;
+}
+
+static void get_domain(const struct phishcheck* pchk,struct string* dest,struct string* host)
+{
+ char* domain;
+ char* tld = strrchr(host->data,'.');
+ if(!tld) {
+ cli_dbgmsg("Phishcheck: Encountered a host without a tld? (%s)\n",host->data);
+ string_assign(dest,host);
+ return;
+ }
+ if(isCountryCode(pchk,tld+1)) {
+ const char* countrycode = tld+1;
+ tld = rfind(host->data,'.',tld-host->data-1);
+ if(!tld) {
+ cli_dbgmsg("Phishcheck: Weird, a name with only 2 levels (%s)\n",
+ host->data);
+ string_assign(dest,host);
+ return;
+ }
+ if(!isTLD(pchk,tld+1,countrycode-tld-1)) {
+ string_assign_ref(dest,host,tld+1);
+ return;/*it was a name like: subdomain.domain.uk, return domain.uk*/
+ }
+ }
+ /*we need to strip one more level, this is the actual domain*/
+ domain = rfind(host->data,'.',tld-host->data-1);
+ if(!domain) {
+ string_assign(dest,host);
+ return;/* it was like sourceforge.net?*/
+ }
+ string_assign_ref(dest,host,domain+1);
+}
+
+static int isNumeric(const char* host)
+{
+ int len = strlen(host);
+ int a,b,c,d,n=0;
+ /* 1.2.3.4 -> 7*/
+ /* 127.127.127.127 -> 15*/
+ if(len<7 || len>15)
+ return 0;
+ sscanf(host,"%d.%d.%d.%d%n",&a,&b,&c,&d,&n);
+ if(n==len)
+ if(a>=0 && a<=256 && b>=0 && b<=256 && c>=0 && c<=256 && d>=0 && d<=256)
+ return 1;
+ return 0;
+}
+
+static int isSSL(const char* URL)
+{
+ return URL ? !strncmp(https,URL,https_len) : 0;
+}
+
+/* deletes @what from the string @begin.
+ * @what_len: length of @what, excluding the terminating \0 */
+static void
+str_hex_to_char(char **begin, const char **end)
+{
+ char *sbegin = *begin;
+ const char *str_end = *end;
+
+ if(str_end <= sbegin)
+ return;
+
+ if(strlen(sbegin) <= 2)
+ return;
+
+ /* convert leading %xx*/
+ if (sbegin[0] == '%') {
+ sbegin[2] = hex2int((unsigned char*)sbegin+1);
+ sbegin += 2;
+ }
+ *begin = sbegin++;
+ while(sbegin+3 < str_end) {
+ while(sbegin+3<str_end && sbegin[0]=='%') {
+ const char* src = sbegin+3;
+ *sbegin = hex2int((unsigned char*)sbegin+1);
+ /* move string */
+ memmove(sbegin+1,src,str_end-src+1);
+ str_end -= 2;
+ }
+ sbegin++;
+ }
+ *end = str_end;
+}
+
+/*
+ * deletes @what from the string @begin.
+ * @what_len: length of @what, excluding the terminating \0
+ */
+static void
+str_strip(char **begin, const char **end, const char *what, size_t what_len)
+{
+ char *sbegin = *begin;
+ const char *str_end = *end;
+ const char *str_end_what;
+ size_t cmp_len = what_len;
+
+ if(begin == NULL || str_end <= sbegin)
+ return;
+
+ /*if(str_end < (sbegin + what_len))
+ return;*/
+ if(strlen(sbegin) < what_len)
+ return;
+
+ /* strip leading @what */
+ while(cmp_len && !strncmp(sbegin,what,cmp_len)) {
+ sbegin += what_len;
+
+ if(cmp_len > what_len)
+ cmp_len -= what_len;
+ else
+ cmp_len = 0;
+ }
+
+ /* strip trailing @what */
+ if(what_len <= (size_t)(str_end - sbegin)) {
+ str_end_what = str_end - what_len + 1;
+ while((str_end_what > sbegin) &&
+ (strncmp(str_end_what, what, what_len) == 0)) {
+ str_end -= what_len;
+ str_end_what -= what_len;
+ }
+ }
+
+ *begin = sbegin++;
+ while(sbegin+what_len <= str_end) {
+ while(sbegin+what_len<=str_end && !strncmp(sbegin,what,what_len)) {
+ const char* src = sbegin+what_len;
+ /* move string */
+ memmove(sbegin,src,str_end-src+1);
+ str_end -= what_len;
+ }
+ sbegin++;
+ }
+ *end = str_end;
+}
+
+
+/* replace every occurrence of @c in @str with @r*/
+static void str_replace(char* str,const char* end,char c,char r)
+{
+ for(;str<=end;str++) {
+ if(*str==c)
+ *str=r;
+ }
+}
+static void str_make_lowercase(char* str,size_t len)
+{
+ for(;len;str++,len--) {
+ *str = tolower(*str);
+ }
+}
+
+#define fix32(x) ((x)<32 ? 32 : (x))
+static void clear_msb(char* begin)
+{
+ for(;*begin;begin++)
+ *begin = fix32((*begin)&0x7f);
+}
+
+/*
+ * Particularly yahoo puts links like this in mails:
+ * http:/ /mail.yahoo.com
+ * So first step: delete space between / /
+ *
+ * Next there could be possible links like this:
+ * <a href="phishlink">w w w . e b a y . c o m</a>
+ * Here we need to strip spaces to get this picked up.
+ *
+ * Next there are links like:
+ * <a href="www.yahoo.com">Check out yahoo.com</a>
+ * Here we add a ., so we get: check.out.yahoo.com (it won't trigger)
+ *
+ * Old Rule for adding .: if substring from right contains dot, then add dot,
+ * otherwise strip space
+ * New Rule: strip all spaces
+ * strip leading and trailing garbage
+ *
+ */
+static void
+str_fixup_spaces(char **begin, const char **end)
+{
+ char* sbegin = *begin;
+ const char* send = *end;
+ if(!sbegin || !send || send < sbegin)
+ return;
+ /* strip spaces */
+ str_strip(&sbegin, &send, " ",1);
+ /* strip leading/trailing garbage */
+ while(!isalnum(sbegin[0]) && sbegin <= send) sbegin++;
+ while(!isalnum(send[0]) && send >= sbegin) send--;
+ *begin = sbegin;
+ *end = send;
+}
+
+/* allocates memory */
+static int
+cleanupURL(struct string *URL,struct string *pre_URL, int isReal)
+{
+ char *begin = URL->data;
+ const char *end;
+ size_t len;
+
+ clear_msb(begin);
+ /*if(begin == NULL)
+ return;*/
+ /*TODO: handle hex-encoded IPs*/
+ while(isspace(*begin))
+ begin++;
+
+ len = strlen(begin);
+ if(len == 0) {
+ string_assign_null(URL);
+ string_assign_null(pre_URL);
+ return 0;
+ }
+
+ end = begin + len - 1;
+ /*cli_dbgmsg("%d %d\n", end-begin, len);*/
+ if(begin >= end) {
+ string_assign_null(URL);
+ string_assign_null(pre_URL);
+ return 0;
+ }
+ while(isspace(*end))
+ end--;
+ /*TODO: convert \ to /, and stuff like that*/
+ /* From mailscanner, my comments enclosed in {} */
+ if(!strncmp(begin,dotnet,dotnet_len) || !strncmp(begin,adonet,adonet_len) || !strncmp(begin,aspnet,aspnet_len)) {
+ string_assign_null(URL);
+ string_assign_null(pre_URL);
+ }
+ else {
+ size_t host_len;
+ char* host_begin;
+ int rc;
+
+ str_replace(begin,end,'\\','/');
+ /* some broken MUAs put > in the href, and then
+ * we get a false positive, so remove them */
+ str_replace(begin,end,'<',' ');
+ str_replace(begin,end,'>',' ');
+ str_replace(begin,end,'\"',' ');
+ str_replace(begin,end,';',' ');
+ str_strip(&begin,&end,lt,lt_len);
+ str_strip(&begin,&end,gt,gt_len);
+ /* convert hostname to lowercase, but only hostname! */
+ host_begin = strchr(begin,':');
+ while(host_begin && host_begin[1]=='/') host_begin++;
+ if(!host_begin) host_begin=begin;
+ else host_begin++;
+ host_len = strcspn(host_begin,"/?");
+ str_make_lowercase(host_begin,host_len);
+ /* convert %xx to real value */
+ str_hex_to_char(&begin,&end);
+ if(isReal) {
+ /* htmlnorm converts \n to space, so we have to strip spaces */
+ str_strip(&begin, &end, " ", 1);
+ }
+ else {
+ /* trim space */
+ while((begin <= end) && (begin[0]==' ')) begin++;
+ while((begin <= end) && (end[0]==' ')) end--;
+ }
+ if (( rc = string_assign_dup(isReal ? URL : pre_URL,begin,end+1) )) {
+ string_assign_null(URL);
+ return rc;
+ }
+ if(!isReal) {
+ str_fixup_spaces(&begin,&end);
+ if (( rc = string_assign_dup(URL,begin,end+1) )) {
+ return rc;
+ }
+ }
+ /*cli_dbgmsg("%p::%s\n",URL->data,URL->data);*/
+ }
+ return 0;
+}
+
+
+/* -------end runtime disable---------*/
+static int found_possibly_unwanted(cli_ctx* ctx)
+{
+ ctx->found_possibly_unwanted = 1;
+ cli_dbgmsg("Phishcheck: found Possibly Unwanted: %s\n",*ctx->virname);
+ return CL_CLEAN;
+}
+
+int phishingScan(message* m,const char* dir,cli_ctx* ctx,tag_arguments_t* hrefs)
+{
+ int i;
+ struct phishcheck* pchk = (struct phishcheck*) ctx->engine->phishcheck;
+ /* check for status of whitelist fatal error, etc. */
+ if(!pchk || pchk->is_disabled)
+ return CL_CLEAN;
+
+ if(!ctx->found_possibly_unwanted)
+ *ctx->virname=NULL;
+ for(i=0;i<hrefs->count;i++)
+ if(hrefs->contents[i]) {
+ struct url_check urls;
+ enum phish_status rc;
+ urls.always_check_flags = DOMAINLIST_REQUIRED;/* required to work correctly */
+ urls.flags = strncmp((char*)hrefs->tag[i],href_text,href_text_len)? (CL_PHISH_ALL_CHECKS&~CHECK_SSL): CL_PHISH_ALL_CHECKS;
+ urls.link_type = 0;
+ if(!strncmp((char*)hrefs->tag[i],src_text,src_text_len)) {
+ if (!(urls.flags&CHECK_IMG_URL))
+ continue;
+ urls.link_type |= LINKTYPE_IMAGE;
+ }
+ if (ctx->options&CL_SCAN_PHISHING_DOMAINLIST)
+ urls.flags |= DOMAINLIST_REQUIRED;
+ if (ctx->options & CL_SCAN_PHISHING_BLOCKSSL) {
+ urls.always_check_flags |= CHECK_SSL;
+ }
+ if (ctx->options & CL_SCAN_PHISHING_BLOCKCLOAK) {
+ urls.always_check_flags |= CHECK_CLOAKING;
+ }
+ string_init_c(&urls.realLink,(char*)hrefs->value[i]);
+ string_init_c(&urls.displayLink,(char*)blobGetData(hrefs->contents[i]));
+ string_init_c(&urls.pre_fixup.pre_displayLink, NULL);
+ if (urls.displayLink.data[blobGetDataSize(hrefs->contents[i])-1]) {
+ cli_warnmsg("urls.displayLink.data[...]");
+ return CL_CLEAN;
+ }
+
+ urls.realLink.refcount=-1;
+ urls.displayLink.refcount=-1;/*don't free these, caller will free*/
+ if(strcmp((char*)hrefs->tag[i],"href")) {
+ char *url;
+ url = urls.realLink.data;
+ urls.realLink.data = urls.displayLink.data;
+ urls.displayLink.data = url;
+ }
+
+ rc = phishingCheck(ctx->engine,&urls);
+ if(pchk->is_disabled)
+ return CL_CLEAN;
+ free_if_needed(&urls);
+ cli_dbgmsg("Phishcheck: Phishing scan result: %s\n",phishing_ret_toString(rc));
+ switch(rc)/*TODO: support flags from ctx->options,*/
+ {
+ case CL_PHISH_CLEAN:
+ case CL_PHISH_CLEANUP_OK:
+ case CL_PHISH_HOST_OK:
+ case CL_PHISH_DOMAIN_OK:
+ case CL_PHISH_REDIR_OK:
+ case CL_PHISH_HOST_REDIR_OK:
+ case CL_PHISH_DOMAIN_REDIR_OK:
+ case CL_PHISH_HOST_REVERSE_OK:
+ case CL_PHISH_DOMAIN_REVERSE_OK:
+ case CL_PHISH_WHITELISTED:
+ case CL_PHISH_HOST_WHITELISTED:
+ case CL_PHISH_MAILTO_OK:
+ case CL_PHISH_TEXTURL:
+ case CL_PHISH_HOST_NOT_LISTED:
+ case CL_PHISH_CLEAN_CID:
+ continue;
+/* break;*/
+ case CL_PHISH_HEX_URL:
+ *ctx->virname="Phishing.Heuristics.Email.HexURL";
+ return found_possibly_unwanted(ctx);
+/* break;*/
+ case CL_PHISH_NUMERIC_IP:
+ *ctx->virname="Phishing.Heuristics.Email.Cloaked.NumericIP";
+ return found_possibly_unwanted(ctx);
+ case CL_PHISH_CLOAKED_NULL:
+ *ctx->virname="Phishing.Heuristics.Email.Cloaked.Null";/*http://www.real.com%01%00@www.evil.com*/
+ return found_possibly_unwanted(ctx);
+ case CL_PHISH_SSL_SPOOF:
+ *ctx->virname="Phishing.Heuristics.Email.SSL-Spoof";
+ return found_possibly_unwanted(ctx);
+ case CL_PHISH_CLOAKED_UIU:
+ *ctx->virname="Phishing.Heuristics.Email.Cloaked.Username";/*http://www.ebay.com@www.evil.com*/
+ return found_possibly_unwanted(ctx);
+ case CL_PHISH_NOMATCH:
+ default:
+ *ctx->virname="Phishing.Heuristics.Email.SpoofedDomain";
+ return found_possibly_unwanted(ctx);
+ }
+ }
+ else
+ if(strcmp((char*)hrefs->tag[i],"href"))
+ cli_dbgmsg("Phishcheck: href with no contents?\n");
+ return CL_CLEAN;
+}
+
+static char* str_compose(const char* a,const char* b,const char* c)
+{
+ const size_t a_len = strlen(a);
+ const size_t b_len = strlen(b);
+ const size_t c_len = strlen(c);
+ const size_t r_len = a_len+b_len+c_len+1;
+ char* concated = cli_malloc(r_len);
+ if(!concated)
+ return NULL;
+ strncpy(concated,a,a_len);
+ strncpy(concated+a_len,b,b_len);
+ strncpy(concated+a_len+b_len,c,c_len);
+ concated[r_len-1]='\0';
+ return concated;
+}
+
+static char hex2int(const unsigned char* src)
+{
+ return (src[0] == '0' && src[1] == '0') ?
+ 0x1 :/* don't convert %00 to \0, use 0x1
+ * this value is also used by cloak check*/
+ hextable[src[0]]<<4 | hextable[src[1]];
+}
+
+static void free_regex(regex_t* p)
+{
+ if(p) {
+ cli_regfree(p);
+ }
+}
+
+int phishing_init(struct cl_engine* engine)
+{
+ char *url_regex, *realurl_regex;
+ struct phishcheck* pchk;
+ if(!engine->phishcheck) {
+ pchk = engine->phishcheck = cli_malloc(sizeof(struct phishcheck));
+ if(!pchk)
+ return CL_EMEM;
+ pchk->is_disabled = 1;
+ }
+ else {
+ pchk = engine->phishcheck;
+ if(!pchk)
+ return CL_ENULLARG;
+ if(!pchk->is_disabled) {
+ /* already initialized */
+ return CL_SUCCESS;
+ }
+ }
+
+ cli_dbgmsg("Initializing phishcheck module\n");
+
+ if(build_regex(&pchk->preg_hexurl,cloaked_host_regex,1)) {
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+
+ if(build_regex(&pchk->preg_cctld,cctld_regex,1)) {
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+ if(build_regex(&pchk->preg_tld,tld_regex,1)) {
+ free_regex(&pchk->preg_cctld);
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+ url_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_fragmentaddress1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
+ if(build_regex(&pchk->preg,url_regex,1)) {
+ free_regex(&pchk->preg_cctld);
+ free_regex(&pchk->preg_tld);
+ free(url_regex);
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+ free(url_regex);
+ realurl_regex = str_compose("^ *(("URI_CHECK_PROTOCOLS")|("URI_path1,URI_fragmentaddress2,URI_fragmentaddress3")) *$");
+ if(build_regex(&pchk->preg_realurl, realurl_regex,1)) {
+ free_regex(&pchk->preg_cctld);
+ free_regex(&pchk->preg_tld);
+ free_regex(&pchk->preg);
+ free(url_regex);
+ free(realurl_regex);
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+ free(realurl_regex);
+ if(build_regex(&pchk->preg_numeric,numeric_url_regex,1)) {
+ free_regex(&pchk->preg_cctld);
+ free_regex(&pchk->preg_tld);
+ free_regex(&pchk->preg);
+ free_regex(&pchk->preg_realurl);
+ free(pchk);
+ engine->phishcheck = NULL;
+ return CL_EFORMAT;
+ }
+ pchk->is_disabled = 0;
+ cli_dbgmsg("Phishcheck module initialized\n");
+ return CL_SUCCESS;
+}
+
+void phishing_done(struct cl_engine* engine)
+{
+ struct phishcheck* pchk = engine->phishcheck;
+ cli_dbgmsg("Cleaning up phishcheck\n");
+ if(pchk && !pchk->is_disabled) {
+ free_regex(&pchk->preg);
+ free_regex(&pchk->preg_hexurl);
+ free_regex(&pchk->preg_cctld);
+ free_regex(&pchk->preg_tld);
+ free_regex(&pchk->preg_numeric);
+ free_regex(&pchk->preg_realurl);
+ pchk->is_disabled = 1;
+ }
+ whitelist_done(engine);
+ domainlist_done(engine);
+ if(pchk) {
+ cli_dbgmsg("Freeing phishcheck struct\n");
+ free(pchk);
+ engine->phishcheck = NULL;
+ }
+ cli_dbgmsg("Phishcheck cleaned up\n");
+}
+
+/*
+ * Only those URLs are identified as URLs for which phishing detection can be performed.
+ */
+static int isURL(const struct phishcheck* pchk,const char* URL)
+{
+ return URL ? !cli_regexec(&pchk->preg,URL,0,NULL,0) : 0;
+}
+
+/*
+ * Check if this is a real URL, which basically means to check if it has a known URL scheme (http,https,ftp).
+ * This prevents false positives with outbind:// and blocked:: links.
+ */
+static int isRealURL(const struct phishcheck* pchk,const char* URL)
+{
+ return URL ? !cli_regexec(&pchk->preg_realurl,URL,0,NULL,0) : 0;
+}
+
+static int isNumericURL(const struct phishcheck* pchk,const char* URL)
+{
+ return URL ? !cli_regexec(&pchk->preg_numeric,URL,0,NULL,0) : 0;
+}
+
+/* Cleans up @urls
+ * If URLs are identical after cleanup it will return CL_PHISH_CLEANUP_OK.
+ * */
+static enum phish_status cleanupURLs(struct url_check* urls)
+{
+ if(urls->flags&CLEANUP_URL) {
+ cleanupURL(&urls->realLink,NULL,1);
+ cleanupURL(&urls->displayLink,&urls->pre_fixup.pre_displayLink,0);
+ if(!urls->displayLink.data || !urls->realLink.data)
+ return CL_PHISH_NODECISION;
+ if(!strcmp(urls->realLink.data,urls->displayLink.data))
+ return CL_PHISH_CLEANUP_OK;
+ }
+ return CL_PHISH_NODECISION;
+}
+
+static int url_get_host(const struct phishcheck* pchk, struct url_check* url,struct url_check* host_url,int isReal,int* phishy)
+{
+ const char *start, *end;
+ struct string* host = isReal ? &host_url->realLink : &host_url->displayLink;
+ const char* URL = isReal ? url->realLink.data : url->displayLink.data;
+ int rc;
+ if ((rc = get_host(pchk, URL, isReal, phishy, &start, &end))) {
+ return rc;
+ }
+ if(!start || !end) {
+ string_assign_null(host);
+ }
+ else {
+ if(( rc = string_assign_dup(host,start,end) ))
+ return rc;
+ }
+ cli_dbgmsg("Phishcheck:host:%s\n", host->data);
+ if(!isReal) {
+ url->pre_fixup.host_start = start - URL;
+ url->pre_fixup.host_end = end - URL;
+ }
+ if(!host->data)
+ return CL_PHISH_CLEANUP_OK;
+ if(*phishy&REAL_IS_MAILTO)
+ return CL_PHISH_MAILTO_OK;
+ if(strchr(host->data,' ')) {
+ string_free(host);
+ return CL_PHISH_TEXTURL;
+ }
+ if(url->flags&CHECK_CLOAKING && !cli_regexec(&pchk->preg_hexurl,host->data,0,NULL,0)) {
+ /* uses a regex here, so that we don't accidentally block 0xacab.net style hosts */
+ string_free(host);
+ return CL_PHISH_HEX_URL;
+ }
+ if(isReal && host->data[0]=='\0')
+ return CL_PHISH_CLEAN;/* link without domain, such as: href="/isapi.dll?... */
+ if(isNumeric(host->data)) {
+ *phishy |= PHISHY_NUMERIC_IP;
+ }
+ return CL_PHISH_NODECISION;
+}
+
+static void url_get_domain(const struct phishcheck* pchk, struct url_check* url,struct url_check* domains)
+{
+ get_domain(pchk, &domains->realLink, &url->realLink);
+ get_domain(pchk, &domains->displayLink, &url->displayLink);
+ domains->flags = url->flags;
+}
+
+static enum phish_status phishy_map(int phishy,enum phish_status fallback)
+{
+ if(phishy&PHISHY_USERNAME_IN_URL)
+ return CL_PHISH_CLOAKED_UIU;
+ else if(phishy&PHISHY_NUMERIC_IP)
+ return CL_PHISH_NUMERIC_IP;
+ else
+ return fallback;
+}
+
+static int isEncoded(const char* url)
+{
+ const char* start=url;
+ size_t cnt=0;
+ do{
+ cnt++;
+ start=strstr(start,"&#");
+ if(start)
+ start=strstr(start,";");
+ } while(start);
+ return (cnt-1 >strlen(url)*7/10);/*more than 70% made up of &#;*/
+}
+
+static int whitelist_check(const struct cl_engine* engine,struct url_check* urls,int hostOnly)
+{
+ return whitelist_match(engine,urls->realLink.data,urls->displayLink.data,hostOnly);
+}
+
+static int isPhishing(enum phish_status rc)
+{
+ switch(rc) {
+ case CL_PHISH_CLEAN:
+ case CL_PHISH_CLEANUP_OK:
+ case CL_PHISH_WHITELISTED:
+ case CL_PHISH_HOST_WHITELISTED:
+ case CL_PHISH_HOST_OK:
+ case CL_PHISH_DOMAIN_OK:
+ case CL_PHISH_REDIR_OK:
+ case CL_PHISH_HOST_REDIR_OK:
+ case CL_PHISH_DOMAIN_REDIR_OK:
+ case CL_PHISH_HOST_REVERSE_OK:
+ case CL_PHISH_DOMAIN_REVERSE_OK:
+ case CL_PHISH_MAILTO_OK:
+ case CL_PHISH_TEXTURL:
+ case CL_PHISH_HOST_NOT_LISTED:
+ case CL_PHISH_CLEAN_CID:
+ return 0;
+ case CL_PHISH_HEX_URL:
+ case CL_PHISH_CLOAKED_NULL:
+ case CL_PHISH_SSL_SPOOF:
+ case CL_PHISH_CLOAKED_UIU:
+ case CL_PHISH_NUMERIC_IP:
+ case CL_PHISH_NOMATCH:
+ return 1;
+ default:
+ return 1;
+ }
+}
+/* urls can't contain null pointer, caller must ensure this */
+static enum phish_status phishingCheck(const struct cl_engine* engine,struct url_check* urls)
+{
+ struct url_check host_url;
+ enum phish_status rc=CL_PHISH_NODECISION;
+ int phishy=0;
+ const struct phishcheck* pchk = (const struct phishcheck*) engine->phishcheck;
+
+ if(!urls->realLink.data)
+ return CL_PHISH_CLEAN;
+
+ cli_dbgmsg("Phishcheck:Checking url %s->%s\n", urls->realLink.data,
+ urls->displayLink.data);
+
+ if(!strcmp(urls->realLink.data,urls->displayLink.data))
+ return CL_PHISH_CLEAN;/* displayed and real URL are identical -> clean */
+
+ if((rc = cleanupURLs(urls))) {
+ if(isPhishing(rc))/* not allowed to decide this is phishing */
+ return CL_PHISH_CLEAN;
+ return rc;/* URLs identical after cleanup */
+ }
+
+ if(whitelist_check(engine,urls,0))
+ return CL_PHISH_WHITELISTED;/* if url is whitelist don't perform further checks */
+
+ if((!isURL(pchk, urls->displayLink.data) || !isRealURL(pchk, urls->realLink.data) )&&
+ ( (phishy&PHISHY_NUMERIC_IP && !isNumericURL(pchk, urls->displayLink.data)) ||
+ !(phishy&PHISHY_NUMERIC_IP))) {
+ cli_dbgmsg("Displayed 'url' is not url:%s\n",urls->displayLink.data);
+ return CL_PHISH_TEXTURL;
+ }
+
+ if(urls->flags&DOMAINLIST_REQUIRED && domainlist_match(engine,urls->realLink.data,urls->displayLink.data,NULL,0,&urls->flags))
+ phishy |= DOMAIN_LISTED;
+ else {
+ /* although entire url is not listed, the host might be,
+ * so defer phishing decisions till we know if host is listed*/
+ }
+
+
+ url_check_init(&host_url);
+
+ if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_DISPLAY,&phishy))) {
+ free_if_needed(&host_url);
+ if(isPhishing(rc))
+ return CL_PHISH_CLEAN;
+ return rc;
+ }
+
+
+ if(urls->flags&DOMAINLIST_REQUIRED) {
+ if(!(phishy&DOMAIN_LISTED)) {
+ if(domainlist_match(engine,host_url.displayLink.data,host_url.realLink.data,&urls->pre_fixup,1,&urls->flags))
+ phishy |= DOMAIN_LISTED;
+ else {
+ }
+ }
+ }
+
+ /* link type filtering must occur after last domainlist_match */
+ if(urls->link_type & LINKTYPE_IMAGE && !(urls->flags&CHECK_IMG_URL))
+ return CL_PHISH_HOST_NOT_LISTED;/* its listed, but this link type is filtered */
+
+ if(urls->flags & DOMAINLIST_REQUIRED && !(phishy & DOMAIN_LISTED) ) {
+ urls->flags &= urls->always_check_flags;
+ if(!urls->flags) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HOST_NOT_LISTED;
+ }
+ }
+
+ if(urls->flags&CHECK_CLOAKING) {
+ /*Checks if URL is cloaked.
+ Should we check if it contains another http://, https://?
+ No because we might get false positives from redirect services.*/
+ if(strchr(urls->realLink.data,0x1)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_CLOAKED_NULL;
+ }
+ if(isEncoded(urls->displayLink.data)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HEX_URL;
+ }
+ }
+
+
+ if(urls->displayLink.data[0]=='\0') {
+ free_if_needed(&host_url);
+ return CL_PHISH_CLEAN;
+ }
+
+ if(urls->flags&CHECK_SSL && isSSL(urls->displayLink.data) && !isSSL(urls->realLink.data)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_SSL_SPOOF;
+ }
+
+ if(!urls->flags&CHECK_CLOAKING && urls->flags & DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED) ) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HOST_NOT_LISTED;
+ }
+
+ if((rc = url_get_host(pchk, urls,&host_url,DOMAIN_REAL,&phishy)))
+ {
+ free_if_needed(&host_url);
+ return rc;
+ }
+
+ if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HOST_NOT_LISTED;
+ }
+
+ if(whitelist_check(engine,&host_url,1)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HOST_WHITELISTED;
+ }
+
+
+ if(urls->flags&HOST_SUFFICIENT) {
+ if(!strcmp(urls->realLink.data,urls->displayLink.data)) {
+ free_if_needed(&host_url);
+ return CL_PHISH_HOST_OK;
+ }
+
+
+ if(urls->flags&DOMAIN_SUFFICIENT) {
+ struct url_check domain_url;
+ url_check_init(&domain_url);
+ url_get_domain(pchk, &host_url,&domain_url);
+ if(!strcmp(domain_url.realLink.data,domain_url.displayLink.data)) {
+ free_if_needed(&host_url);
+ free_if_needed(&domain_url);
+ return CL_PHISH_DOMAIN_OK;
+ }
+ free_if_needed(&domain_url);
+ }
+
+ free_if_needed(&host_url);
+ }/*HOST_SUFFICIENT*/
+ /*we failed to find a reason why the 2 URLs are different, this is definitely phishing*/
+ if(urls->flags&DOMAINLIST_REQUIRED && !(phishy&DOMAIN_LISTED))
+ return CL_PHISH_HOST_NOT_LISTED;
+ return phishy_map(phishy,CL_PHISH_NOMATCH);
+}
+
+static const char* phishing_ret_toString(enum phish_status rc)
+{
+ switch(rc) {
+ case CL_PHISH_CLEAN:
+ return "Clean";
+ case CL_PHISH_CLEANUP_OK:
+ return "URLs match after cleanup";
+ case CL_PHISH_WHITELISTED:
+ return "URL is whitelisted";
+ case CL_PHISH_HOST_WHITELISTED:
+ return "host part of URL is whitelist";
+ case CL_PHISH_HOST_OK:
+ return "Hosts match";
+ case CL_PHISH_DOMAIN_OK:
+ return "Domains match";
+ case CL_PHISH_REDIR_OK:
+ return "After redirecting realURL, they match";
+ case CL_PHISH_HOST_REDIR_OK:
+ return "After redirecting realURL, hosts match";
+ case CL_PHISH_DOMAIN_REDIR_OK:
+ return "After redirecting the domains match";
+ case CL_PHISH_MAILTO_OK:
+ return "URL is mailto";
+ case CL_PHISH_NUMERIC_IP:
+ return "IP address encountered in hostname";
+ case CL_PHISH_TEXTURL:
+ return "Displayed link is not an URL, can't check if phishing or not";
+ case CL_PHISH_CLOAKED_NULL:
+ return "Link URL is cloaked (null byte %00)";
+ case CL_PHISH_CLOAKED_UIU:
+ return "Link URL contains username, and real<->displayed hosts don't match.";
+ /*username is a legit domain, and after the @ comes the evil one*/
+ case CL_PHISH_SSL_SPOOF:
+ return "Visible links is SSL, real link is not";
+ case CL_PHISH_NOMATCH:
+ return "URLs are way too different";
+ case CL_PHISH_HOST_NOT_LISTED:
+ return "Host not listed in .pdb -> not checked";
+ case CL_PHISH_CLEAN_CID:
+ return "Embedded image in mail -> clean";
+ case CL_PHISH_HEX_URL:
+ return "Embedded hex urls";
+ default:
+ return "Unknown return code";
+ }
+}
+
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_pst.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,8 @@
+#include "clamav-config.h"
+#include "clamav.h"
+int
+cli_pst(const char *dir, int desc)
+{
+ cli_warnmsg("PST files not yet supported\n");
+ return CL_EFORMAT;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_readdb.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,1553 @@
+/*
+ * Copyright (C) 2002 - 2007 Tomasz Kojm <tkojm at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+#ifdef _MSC_VER
+#include <winsock.h> /* for Sleep() */
+#endif
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifndef C_WINDOWS
+#include <dirent.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#include <fcntl.h>
+
+#include "clamav.h"
+#include "cvd.h"
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif
+#include "matcher-ac.h"
+#include "matcher-bm.h"
+#include "matcher.h"
+#include "others.h"
+#include "str.h"
+#include "dconf.h"
+#include "lockdb.h"
+#include "readdb.h"
+
+#include "phishcheck.h"
+#include "phish_whitelist.h"
+#include "phish_domaincheck_db.h"
+#include "regex_list.h"
+
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+#include <limits.h>
+#include <stddef.h>
+#endif
+
+#ifdef CL_THREAD_SAFE
+# include <pthread.h>
+static pthread_mutex_t cli_ref_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+/* Prototypes for old public functions just to shut up some gcc warnings;
+ * to be removed in 1.0
+ */
+int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo);
+int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo);
+
+
+int cli_parse_add(struct cli_matcher *root, const char *virname, const char *hexsig, unsigned short type, const char *offset, unsigned short target)
+{
+ struct cli_bm_patt *bm_new;
+ char *pt, *hexcpy, *start, *n;
+ int ret, virlen, asterisk = 0;
+ unsigned int i, j, len, parts = 0;
+ int mindist = 0, maxdist = 0, error = 0;
+
+
+ if(strchr(hexsig, '{')) {
+
+ root->ac_partsigs++;
+
+ if(!(hexcpy = cli_strdup(hexsig)))
+ return CL_EMEM;
+
+ len = strlen(hexsig);
+ for(i = 0; i < len; i++)
+ if(hexsig[i] == '{' || hexsig[i] == '*')
+ parts++;
+
+ if(parts)
+ parts++;
+
+ start = pt = hexcpy;
+ for(i = 1; i <= parts; i++) {
+
+ if(i != parts) {
+ for(j = 0; j < strlen(start); j++) {
+ if(start[j] == '{') {
+ asterisk = 0;
+ pt = start + j;
+ break;
+ }
+ if(start[j] == '*') {
+ asterisk = 1;
+ pt = start + j;
+ break;
+ }
+ }
+ *pt++ = 0;
+ }
+
+ if((ret = cli_ac_addsig(root, virname, start, root->ac_partsigs, parts, i, type, mindist, maxdist, offset, target))) {
+ cli_errmsg("cli_parse_add(): Problem adding signature (1).\n");
+ error = 1;
+ break;
+ }
+
+ if(i == parts)
+ break;
+
+ mindist = maxdist = 0;
+
+ if(asterisk) {
+ start = pt;
+ continue;
+ }
+
+ if(!(start = strchr(pt, '}'))) {
+ error = 1;
+ break;
+ }
+ *start++ = 0;
+
+ if(!pt) {
+ error = 1;
+ break;
+ }
+
+ if(!strchr(pt, '-')) {
+ if((mindist = maxdist = atoi(pt)) < 0) {
+ error = 1;
+ break;
+ }
+ } else {
+ if((n = cli_strtok(pt, 0, "-"))) {
+ if((mindist = atoi(n)) < 0) {
+ error = 1;
+ free(n);
+ break;
+ }
+ free(n);
+ }
+
+ if((n = cli_strtok(pt, 1, "-"))) {
+ if((maxdist = atoi(n)) < 0) {
+ error = 1;
+ free(n);
+ break;
+ }
+ free(n);
+ }
+ }
+ }
+
+ free(hexcpy);
+ if(error)
+ return CL_EMALFDB;
+
+ } else if(strchr(hexsig, '*')) {
+ root->ac_partsigs++;
+
+ len = strlen(hexsig);
+ for(i = 0; i < len; i++)
+ if(hexsig[i] == '*')
+ parts++;
+
+ if(parts)
+ parts++;
+
+ for(i = 1; i <= parts; i++) {
+ if((pt = cli_strtok(hexsig, i - 1, "*")) == NULL) {
+ cli_errmsg("Can't extract part %d of partial signature.\n", i);
+ return CL_EMALFDB;
+ }
+
+ if((ret = cli_ac_addsig(root, virname, pt, root->ac_partsigs, parts, i, type, 0, 0, offset, target))) {
+ cli_errmsg("cli_parse_add(): Problem adding signature (2).\n");
+ free(pt);
+ return ret;
+ }
+
+ free(pt);
+ }
+
+ } else if(root->ac_only || strpbrk(hexsig, "?(") || type) {
+ if((ret = cli_ac_addsig(root, virname, hexsig, 0, 0, 0, type, 0, 0, offset, target))) {
+ cli_errmsg("cli_parse_add(): Problem adding signature (3).\n");
+ return ret;
+ }
+
+ } else {
+ bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
+ if(!bm_new)
+ return CL_EMEM;
+
+ if(!(bm_new->pattern = (unsigned char *) cli_hex2str(hexsig))) {
+ free(bm_new);
+ return CL_EMALFDB;
+ }
+
+ bm_new->length = strlen(hexsig) / 2;
+
+ if((pt = strstr(virname, "(Clam)")))
+ virlen = strlen(virname) - strlen(pt) - 1;
+ else
+ virlen = strlen(virname);
+
+ if(virlen <= 0) {
+ free(bm_new->pattern);
+ free(bm_new);
+ return CL_EMALFDB;
+ }
+
+ if((bm_new->virname = cli_calloc(virlen + 1, sizeof(char))) == NULL) {
+ free(bm_new->pattern);
+ free(bm_new);
+ return CL_EMEM;
+ }
+
+ strncpy(bm_new->virname, virname, virlen);
+
+ if(offset) {
+ bm_new->offset = cli_strdup(offset);
+ if(!bm_new->offset) {
+ free(bm_new->pattern);
+ free(bm_new->virname);
+ free(bm_new);
+ return CL_EMEM;
+ }
+ }
+
+ bm_new->target = target;
+
+ if(bm_new->length > root->maxpatlen)
+ root->maxpatlen = bm_new->length;
+
+ if((ret = cli_bm_addpatt(root, bm_new))) {
+ cli_errmsg("cli_parse_add(): Problem adding signature (4).\n");
+ free(bm_new->pattern);
+ free(bm_new->virname);
+ free(bm_new);
+ return ret;
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
+int cli_initengine(struct cl_engine **engine, unsigned int options)
+{
+ int ret;
+
+
+ if(!*engine) {
+#ifdef CL_EXPERIMENTAL
+ cli_dbgmsg("Initializing the engine ("VERSION"-exp)\n");
+#else
+ cli_dbgmsg("Initializing the engine ("VERSION")\n");
+#endif
+
+ *engine = (struct cl_engine *) cli_calloc(1, sizeof(struct cl_engine));
+ if(!*engine) {
+ cli_errmsg("Can't allocate memory for the engine structure!\n");
+ return CL_EMEM;
+ }
+
+ (*engine)->refcount = 1;
+
+ (*engine)->root = cli_calloc(CL_TARGET_TABLE_SIZE, sizeof(struct cli_matcher *));
+ if(!(*engine)->root) {
+ /* no need to free previously allocated memory here */
+ cli_errmsg("Can't allocate memory for roots!\n");
+ return CL_EMEM;
+ }
+
+ (*engine)->dconf = cli_dconf_init();
+ if(!(*engine)->dconf) {
+ cli_errmsg("Can't initialize dynamic configuration\n");
+ return CL_EMEM;
+ }
+ }
+
+ if((options & CL_DB_PHISHING_URLS) && (((struct cli_dconf*) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+ if((ret = phishing_init(*engine)))
+ return ret;
+
+ return CL_SUCCESS;
+}
+
+static int cli_initroots(struct cl_engine *engine, unsigned int options)
+{
+ int i, ret;
+ struct cli_matcher *root;
+
+
+ for(i = 0; i < CL_TARGET_TABLE_SIZE; i++) {
+ if(!engine->root[i]) {
+ cli_dbgmsg("Initializing engine->root[%d]\n", i);
+ root = engine->root[i] = (struct cli_matcher *) cli_calloc(1, sizeof(struct cli_matcher));
+ if(!root) {
+ cli_errmsg("cli_initroots: Can't allocate memory for cli_matcher\n");
+ return CL_EMEM;
+ }
+
+ if(options & CL_DB_ACONLY) {
+ cli_dbgmsg("cli_initroots: Only using AC pattern matcher.\n");
+ root->ac_only = 1;
+ }
+
+ cli_dbgmsg("Initialising AC pattern matcher of root[%d]\n", i);
+ if((ret = cli_ac_init(root, cli_ac_mindepth, cli_ac_maxdepth))) {
+ /* no need to free previously allocated memory here */
+ cli_errmsg("cli_initroots: Can't initialise AC pattern matcher\n");
+ return ret;
+ }
+
+ if(!root->ac_only) {
+ cli_dbgmsg("cli_initroots: Initializing BM tables of root[%d]\n", i);
+ if((ret = cli_bm_init(root))) {
+ cli_errmsg("cli_initroots: Can't initialise BM pattern matcher\n");
+ return ret;
+ }
+ }
+ }
+ }
+
+ return CL_SUCCESS;
+}
+
+static int cli_loaddb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+ char buffer[FILEBUFF], *pt, *start;
+ int line = 0, ret = 0;
+ struct cli_matcher *root;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ if((ret = cli_initroots(*engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ root = (*engine)->root[0];
+
+ while(fgets(buffer, FILEBUFF, fd)) {
+ line++;
+ cli_chomp(buffer);
+
+ pt = strchr(buffer, '=');
+ if(!pt) {
+ cli_errmsg("Malformed pattern line %d\n", line);
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ start = buffer;
+ *pt++ = 0;
+
+ if(*pt == '=') continue;
+
+ if((ret = cli_parse_add(root, start, pt, 0, NULL, 0))) {
+ cli_errmsg("Problem parsing signature at line %d\n", line);
+ ret = CL_EMALFDB;
+ break;
+ }
+ }
+
+ if(!line) {
+ cli_errmsg("Empty database file\n");
+ cl_free(*engine);
+ return CL_EMALFDB;
+ }
+
+ if(ret) {
+ cli_errmsg("Problem parsing database at line %d\n", line);
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(signo)
+ *signo += line;
+
+ return CL_SUCCESS;
+}
+
+static int cli_loadwdb(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+ int ret = 0;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(!(((struct cli_dconf *) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+ return CL_SUCCESS;
+
+ if(!(*engine)->whitelist_matcher) {
+ if((ret = init_whitelist(*engine))) {
+ phishing_done(*engine);
+ cl_free(*engine);
+ return ret;
+ }
+ }
+
+ if((ret = load_regex_matcher((*engine)->whitelist_matcher, fd, options, 1))) {
+ phishing_done(*engine);
+ cl_free(*engine);
+ return ret;
+ }
+
+ return CL_SUCCESS;
+}
+
+static int cli_loadpdb(FILE *fd, struct cl_engine **engine, unsigned int options)
+{
+ int ret = 0;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(!(((struct cli_dconf *) (*engine)->dconf)->phishing & PHISHING_CONF_ENGINE))
+ return CL_SUCCESS;
+
+ if(!(*engine)->domainlist_matcher) {
+ if((ret = init_domainlist(*engine))) {
+ phishing_done(*engine);
+ cl_free(*engine);
+ return ret;
+ }
+ }
+
+ if((ret = load_regex_matcher((*engine)->domainlist_matcher, fd, options, 0))) {
+ phishing_done(*engine);
+ cl_free(*engine);
+ return ret;
+ }
+
+ return CL_SUCCESS;
+}
+
+#define NDB_TOKENS 6
+static int cli_loadndb(FILE *fd, struct cl_engine **engine, unsigned int *signo, unsigned short sdb, unsigned int options)
+{
+ const char *tokens[NDB_TOKENS];
+ char buffer[FILEBUFF];
+ const char *sig, *virname, *offset, *pt;
+ struct cli_matcher *root;
+ int line = 0, sigs = 0, ret = 0;
+ unsigned short target;
+ unsigned int phish = options & CL_DB_PHISHING;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ if((ret = cli_initroots(*engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ while(fgets(buffer, FILEBUFF, fd)) {
+ line++;
+
+ if(!strncmp(buffer, "Exploit.JPEG.Comment", 20)) /* temporary */
+ continue;
+
+ if(!phish)
+ if(!strncmp(buffer, "HTML.Phishing", 13) || !strncmp(buffer, "Email.Phishing", 14))
+ continue;
+
+ sigs++;
+ cli_chomp(buffer);
+
+ cli_strtokenize(buffer, ':', NDB_TOKENS, tokens);
+
+ if(!(virname = tokens[0])) {
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if((pt = tokens[4])) { /* min version */
+ if(!isdigit(*pt)) {
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if((unsigned int) atoi(pt) > cl_retflevel()) {
+ cli_dbgmsg("Signature for %s not loaded (required f-level: %d)\n", virname, atoi(pt));
+ sigs--;
+ continue;
+ }
+
+
+ if((pt = tokens[5])) { /* max version */
+ if(!isdigit(*pt)) {
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if((unsigned int) atoi(pt) < cl_retflevel()) {
+ sigs--;
+ continue;
+ }
+
+ }
+ }
+
+ if(!(pt = tokens[1]) || !isdigit(*pt)) {
+ ret = CL_EMALFDB;
+ break;
+ }
+ target = (unsigned short) atoi(pt);
+
+ if(target >= CL_TARGET_TABLE_SIZE) {
+ cli_dbgmsg("Not supported target type in signature for %s\n", virname);
+ sigs--;
+ continue;
+ }
+
+ root = (*engine)->root[target];
+
+ if(!(offset = tokens[2])) {
+ ret = CL_EMALFDB;
+ break;
+ } else if(!strcmp(offset, "*")) {
+ offset = NULL;
+ }
+
+ if(!(sig = tokens[3])) {
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if((ret = cli_parse_add(root, virname, sig, 0, offset, target))) {
+ cli_errmsg("Problem parsing signature at line %d\n", line);
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ }
+
+ if(!line) {
+ cli_errmsg("Empty database file\n");
+ cl_free(*engine);
+ return CL_EMALFDB;
+ }
+
+ if(ret) {
+ cli_errmsg("Problem parsing database at line %d\n", line);
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(signo)
+ *signo += sigs;
+
+ if(sdb && sigs && !(*engine)->sdb) {
+ (*engine)->sdb = 1;
+ cli_dbgmsg("*** Self protection mechanism activated.\n");
+ }
+
+ return CL_SUCCESS;
+}
+
+static int scomp(const void *a, const void *b)
+{
+ return *(const uint32_t *)a - *(const uint32_t *)b;
+}
+
+#define MD5_HDB 0
+#define MD5_MDB 1
+#define MD5_FP 2
+static int cli_loadmd5(FILE *fd, struct cl_engine **engine, unsigned int *signo, uint8_t mode, unsigned int options)
+{
+ char buffer[FILEBUFF], *pt;
+ int ret = CL_SUCCESS;
+ uint8_t size_field = 1, md5_field = 0, found;
+ uint32_t line = 0, i;
+ struct cli_md5_node *new;
+ struct cli_bm_patt *bm_new;
+ struct cli_matcher *md5_sect = NULL;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(mode == MD5_MDB) {
+ size_field = 0;
+ md5_field = 1;
+ }
+
+ while(fgets(buffer, FILEBUFF, fd)) {
+ line++;
+ cli_chomp(buffer);
+
+ new = (struct cli_md5_node *) cli_calloc(1, sizeof(struct cli_md5_node));
+ if(!new) {
+ ret = CL_EMEM;
+ break;
+ }
+
+ if(mode == MD5_FP) /* fp */
+ new->fp = 1;
+
+ if(!(pt = cli_strtok(buffer, md5_field, ":"))) {
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if(!(new->md5 = (unsigned char *) cli_hex2str(pt))) {
+ cli_errmsg("cli_loadmd5: Malformed MD5 string at line %u\n", line);
+ free(pt);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ }
+ free(pt);
+
+ if(!(pt = cli_strtok(buffer, size_field, ":"))) {
+ free(new->md5);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ }
+ new->size = atoi(pt);
+ free(pt);
+
+ if(!(new->virname = cli_strtok(buffer, 2, ":"))) {
+ free(new->md5);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if(mode == MD5_MDB) { /* section MD5 */
+ if(!(*engine)->md5_sect) {
+ (*engine)->md5_sect = (struct cli_matcher *) cli_calloc(sizeof(struct cli_matcher), 1);
+ if(!(*engine)->md5_sect) {
+ free(new->virname);
+ free(new->md5);
+ free(new);
+ ret = CL_EMEM;
+ break;
+ }
+ if((ret = cli_bm_init((*engine)->md5_sect))) {
+ cli_errmsg("cli_loadmd5: Can't initialise BM pattern matcher\n");
+ free(new->virname);
+ free(new->md5);
+ free(new);
+ break;
+ }
+ }
+ md5_sect = (*engine)->md5_sect;
+
+ bm_new = (struct cli_bm_patt *) cli_calloc(1, sizeof(struct cli_bm_patt));
+ if(!bm_new) {
+ cli_errmsg("cli_loadmd5: Can't allocate memory for bm_new\n");
+ free(new->virname);
+ free(new->md5);
+ free(new);
+ ret = CL_EMEM;
+ break;
+ }
+
+ bm_new->pattern = new->md5;
+ bm_new->length = 16;
+ bm_new->virname = new->virname;
+
+ found = 0;
+ for(i = 0; i < md5_sect->soff_len; i++) {
+ if(md5_sect->soff[i] == new->size) {
+ found = 1;
+ break;
+ }
+ }
+
+ if(!found) {
+ md5_sect->soff_len++;
+ md5_sect->soff = (uint32_t *) cli_realloc2(md5_sect->soff, md5_sect->soff_len * sizeof(uint32_t));
+ if(!md5_sect->soff) {
+ cli_errmsg("cli_loadmd5: Can't realloc md5_sect->soff\n");
+ free(bm_new->pattern);
+ free(bm_new->virname);
+ free(bm_new);
+ free(new);
+ ret = CL_EMEM;
+ break;
+ }
+ md5_sect->soff[md5_sect->soff_len - 1] = new->size;
+ }
+
+ free(new);
+
+ if((ret = cli_bm_addpatt(md5_sect, bm_new))) {
+ cli_errmsg("cli_loadmd5: Error adding BM pattern\n");
+ free(bm_new->pattern);
+ free(bm_new->virname);
+ free(bm_new);
+ break;
+ }
+
+ } else {
+ if(!(*engine)->md5_hlist) {
+ cli_dbgmsg("cli_loadmd5: Initializing MD5 list structure\n");
+ (*engine)->md5_hlist = cli_calloc(256, sizeof(struct cli_md5_node *));
+ if(!(*engine)->md5_hlist) {
+ free(new->virname);
+ free(new->md5);
+ free(new);
+ ret = CL_EMEM;
+ break;
+ }
+ }
+
+ new->next = (*engine)->md5_hlist[new->md5[0] & 0xff];
+ (*engine)->md5_hlist[new->md5[0] & 0xff] = new;
+ }
+ }
+
+ if(!line) {
+ cli_errmsg("cli_loadmd5: Empty database file\n");
+ cl_free(*engine);
+ return CL_EMALFDB;
+ }
+
+ if(ret) {
+ cli_errmsg("cli_loadmd5: Problem parsing database at line %u\n", line);
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(signo)
+ *signo += line;
+
+ if(md5_sect)
+ qsort(md5_sect->soff, md5_sect->soff_len, sizeof(uint32_t), scomp);
+
+ return CL_SUCCESS;
+}
+
+static int cli_loadmd(FILE *fd, struct cl_engine **engine, unsigned int *signo, int type, unsigned int options)
+{
+ char buffer[FILEBUFF], *pt;
+ int line = 0, comments = 0, ret = 0, crc32;
+ struct cli_meta_node *new;
+
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ while(fgets(buffer, FILEBUFF, fd)) {
+ line++;
+ if(buffer[0] == '#') {
+ comments++;
+ continue;
+ }
+
+ cli_chomp(buffer);
+
+ new = (struct cli_meta_node *) cli_calloc(1, sizeof(struct cli_meta_node));
+ if(!new) {
+ ret = CL_EMEM;
+ break;
+ }
+
+ if(!(new->virname = cli_strtok(buffer, 0, ":"))) {
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ }
+
+ if(!(pt = cli_strtok(buffer, 1, ":"))) {
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ new->encrypted = atoi(pt);
+ free(pt);
+ }
+
+ if(!(new->filename = cli_strtok(buffer, 2, ":"))) {
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(new->filename, "*")) {
+ free(new->filename);
+ new->filename = NULL;
+ }
+ }
+
+ if(!(pt = cli_strtok(buffer, 3, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*"))
+ new->size = -1;
+ else
+ new->size = atoi(pt);
+ free(pt);
+ }
+
+ if(!(pt = cli_strtok(buffer, 4, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*"))
+ new->csize = -1;
+ else
+ new->csize = atoi(pt);
+ free(pt);
+ }
+
+ if(!(pt = cli_strtok(buffer, 5, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*")) {
+ new->crc32 = 0;
+ } else {
+ crc32 = cli_hex2num(pt);
+ if(crc32 == -1) {
+ ret = CL_EMALFDB;
+ break;
+ }
+ new->crc32 = (unsigned int) crc32;
+ }
+ free(pt);
+ }
+
+ if(!(pt = cli_strtok(buffer, 6, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*"))
+ new->method = -1;
+ else
+ new->method = atoi(pt);
+ free(pt);
+ }
+
+ if(!(pt = cli_strtok(buffer, 7, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*"))
+ new->fileno = 0;
+ else
+ new->fileno = atoi(pt);
+ free(pt);
+ }
+
+ if(!(pt = cli_strtok(buffer, 8, ":"))) {
+ free(new->filename);
+ free(new->virname);
+ free(new);
+ ret = CL_EMALFDB;
+ break;
+ } else {
+ if(!strcmp(pt, "*"))
+ new->maxdepth = 0;
+ else
+ new->maxdepth = atoi(pt);
+ free(pt);
+ }
+
+ if(type == 1) {
+ new->next = (*engine)->zip_mlist;
+ (*engine)->zip_mlist = new;
+ } else {
+ new->next = (*engine)->rar_mlist;
+ (*engine)->rar_mlist = new;
+ }
+ }
+
+ if(!line) {
+ cli_errmsg("Empty database file\n");
+ cl_free(*engine);
+ return CL_EMALFDB;
+ }
+
+ if(ret) {
+ cli_errmsg("Problem parsing database at line %d\n", line);
+ cl_free(*engine);
+ return ret;
+ }
+
+ if(signo)
+ *signo += (line - comments);
+
+ return CL_SUCCESS;
+}
+
+static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options);
+
+static int cli_load(const char *filename, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+ FILE *fd;
+ int ret = CL_SUCCESS;
+ uint8_t skipped = 0;
+
+
+ if((fd = fopen(filename, "rb")) == NULL) {
+ cli_errmsg("cli_load(): Can't open file %s\n", filename);
+ return CL_EOPEN;
+ }
+
+ if(cli_strbcasestr(filename, ".db")) {
+ ret = cli_loaddb(fd, engine, signo, options);
+
+ } else if(cli_strbcasestr(filename, ".cvd")) {
+ int warn = 0;
+
+ if(strstr(filename, "daily.cvd"))
+ warn = 1;
+
+ ret = cli_cvdload(fd, engine, signo, warn, options);
+
+ } else if(cli_strbcasestr(filename, ".hdb")) {
+ ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
+
+ } else if(cli_strbcasestr(filename, ".hdu")) {
+ if(options & CL_DB_PUA)
+ ret = cli_loadmd5(fd, engine, signo, MD5_HDB, options);
+ else
+ skipped = 1;
+
+ } else if(cli_strbcasestr(filename, ".fp")) {
+ ret = cli_loadmd5(fd, engine, signo, MD5_FP, options);
+
+ } else if(cli_strbcasestr(filename, ".mdb")) {
+ ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
+
+ } else if(cli_strbcasestr(filename, ".mdu")) {
+ if(options & CL_DB_PUA)
+ ret = cli_loadmd5(fd, engine, signo, MD5_MDB, options);
+ else
+ skipped = 1;
+
+ } else if(cli_strbcasestr(filename, ".ndb")) {
+ ret = cli_loadndb(fd, engine, signo, 0, options);
+
+ } else if(cli_strbcasestr(filename, ".ndu")) {
+ if(!(options & CL_DB_PUA))
+ skipped = 1;
+ else
+ ret = cli_loadndb(fd, engine, signo, 0, options);
+
+ } else if(cli_strbcasestr(filename, ".sdb")) {
+ ret = cli_loadndb(fd, engine, signo, 1, options);
+
+ } else if(cli_strbcasestr(filename, ".zmd")) {
+ ret = cli_loadmd(fd, engine, signo, 1, options);
+
+ } else if(cli_strbcasestr(filename, ".rmd")) {
+ ret = cli_loadmd(fd, engine, signo, 2, options);
+
+ } else if(cli_strbcasestr(filename, ".cfg")) {
+ ret = cli_dconf_load(fd, engine, options);
+
+ } else if(cli_strbcasestr(filename, ".wdb")) {
+ if(options & CL_DB_PHISHING_URLS)
+ ret = cli_loadwdb(fd, engine, options);
+ else
+ skipped = 1;
+ } else if(cli_strbcasestr(filename, ".pdb")) {
+ if(options & CL_DB_PHISHING_URLS)
+ ret = cli_loadpdb(fd, engine, options);
+ else
+ skipped = 1;
+ } else {
+ cli_dbgmsg("cli_load: unknown extension - assuming old database format\n");
+ ret = cli_loaddb(fd, engine, signo, options);
+ }
+
+ if(ret) {
+ cli_errmsg("Can't load %s: %s\n", filename, cl_strerror(ret));
+ } else {
+ if(skipped)
+ cli_dbgmsg("%s skipped\n", filename);
+ else
+ cli_dbgmsg("%s loaded\n", filename);
+ }
+
+ fclose(fd);
+ return ret;
+}
+
+int cl_loaddb(const char *filename, struct cl_engine **engine, unsigned int *signo) {
+ return cli_load(filename, engine, signo, CL_DB_STDOPT);
+}
+
+#define CLI_DBEXT(ext) \
+ ( \
+ cli_strbcasestr(ext, ".db") || \
+ cli_strbcasestr(ext, ".db2") || \
+ cli_strbcasestr(ext, ".db3") || \
+ cli_strbcasestr(ext, ".hdb") || \
+ cli_strbcasestr(ext, ".hdu") || \
+ cli_strbcasestr(ext, ".fp") || \
+ cli_strbcasestr(ext, ".mdb") || \
+ cli_strbcasestr(ext, ".mdu") || \
+ cli_strbcasestr(ext, ".ndb") || \
+ cli_strbcasestr(ext, ".ndu") || \
+ cli_strbcasestr(ext, ".sdb") || \
+ cli_strbcasestr(ext, ".zmd") || \
+ cli_strbcasestr(ext, ".rmd") || \
+ cli_strbcasestr(ext, ".pdb") || \
+ cli_strbcasestr(ext, ".wdb") || \
+ cli_strbcasestr(ext, ".inc") || \
+ cli_strbcasestr(ext, ".cvd") \
+ )
+
+static int cli_loaddbdir_l(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+ DIR *dd;
+ struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+ struct stat sb;
+ char *dbfile;
+ int ret = CL_ESUPPORT;
+
+
+ cli_dbgmsg("Loading databases from %s\n", dirname);
+
+ /* check for and load daily.cfg */
+ dbfile = (char *) cli_malloc(strlen(dirname) + 11);
+ if(!dbfile)
+ return CL_EMEM;
+ sprintf(dbfile, "%s/daily.cfg", dirname);
+ if(stat(dbfile, &sb) != -1) {
+ if((ret = cli_load(dbfile, engine, signo, options))) {
+ free(dbfile);
+ return ret;
+ }
+ }
+ free(dbfile);
+
+ if((dd = opendir(dirname)) == NULL) {
+ cli_errmsg("cli_loaddbdir(): Can't open directory %s\n", dirname);
+ return CL_EOPEN;
+ }
+
+#ifdef HAVE_READDIR_R_3
+ while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+ while((dent = readdir(dd))) {
+#endif
+#if (!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+ if(dent->d_ino)
+#endif
+ {
+ if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+
+ dbfile = (char *) cli_malloc(strlen(dent->d_name) + strlen(dirname) + 2);
+
+ if(!dbfile) {
+ cli_dbgmsg("cli_loaddbdir(): dbfile == NULL\n");
+ closedir(dd);
+ return CL_EMEM;
+ }
+ sprintf(dbfile, "%s/%s", dirname, dent->d_name);
+
+ if(cli_strbcasestr(dbfile, ".inc"))
+ ret = cli_loaddbdir(dbfile, engine, signo, options);
+ else
+ ret = cli_load(dbfile, engine, signo, options);
+
+ if(ret) {
+ cli_dbgmsg("cli_loaddbdir(): error loading database %s\n", dbfile);
+ free(dbfile);
+ closedir(dd);
+ return ret;
+ }
+ free(dbfile);
+ }
+ }
+ }
+
+ closedir(dd);
+ if(ret == CL_ESUPPORT)
+ cli_errmsg("cli_loaddb(): No supported database files found in %s\n", dirname);
+
+ return ret;
+}
+
+static int cli_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+ int ret, try = 0, lock;
+
+
+ cli_dbgmsg("cli_loaddbdir: Acquiring dbdir lock\n");
+ while((lock = cli_readlockdb(dirname, 0)) == CL_ELOCKDB) {
+#ifdef C_WINDOWS
+ Sleep(5);
+#else
+ sleep(5);
+#endif
+ if(try++ > 24) {
+ cli_errmsg("cl_load(): Unable to lock database directory: %s\n", dirname);
+ return CL_ELOCKDB;
+ }
+ }
+
+ ret = cli_loaddbdir_l(dirname, engine, signo, options);
+ if(lock == CL_SUCCESS)
+ cli_unlockdb(dirname);
+
+ return ret;
+}
+
+int cl_loaddbdir(const char *dirname, struct cl_engine **engine, unsigned int *signo) {
+ return cli_loaddbdir(dirname, engine, signo, CL_DB_STDOPT);
+}
+
+int cl_load(const char *path, struct cl_engine **engine, unsigned int *signo, unsigned int options)
+{
+ struct stat sb;
+ int ret;
+
+
+ if(stat(path, &sb) == -1) {
+ cli_errmsg("cl_loaddbdir(): Can't get status of %s\n", path);
+ return CL_EIO;
+ }
+
+ if((ret = cli_initengine(engine, options))) {
+ cl_free(*engine);
+ return ret;
+ }
+
+ (*engine)->dboptions = options;
+
+ switch(sb.st_mode & S_IFMT) {
+ case S_IFREG:
+ ret = cli_load(path, engine, signo, options);
+ break;
+
+ case S_IFDIR:
+ ret = cli_loaddbdir(path, engine, signo, options);
+ break;
+
+ default:
+ cli_errmsg("cl_load(%s): Not supported database file type\n", path);
+ return CL_EOPEN;
+ }
+
+ return ret;
+}
+
+const char *cl_retdbdir(void)
+{
+ return DATADIR;
+}
+
+int cl_statinidir(const char *dirname, struct cl_stat *dbstat)
+{
+ DIR *dd;
+ const struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+ char *fname;
+
+
+ if(dbstat) {
+ dbstat->entries = 0;
+ dbstat->stattab = NULL;
+ dbstat->statdname = NULL;
+ dbstat->dir = cli_strdup(dirname);
+ } else {
+ cli_errmsg("cl_statdbdir(): Null argument passed.\n");
+ return CL_ENULLARG;
+ }
+
+ if((dd = opendir(dirname)) == NULL) {
+ cli_errmsg("cl_statdbdir(): Can't open directory %s\n", dirname);
+ cl_statfree(dbstat);
+ return CL_EOPEN;
+ }
+
+ cli_dbgmsg("Stat()ing files in %s\n", dirname);
+
+#ifdef HAVE_READDIR_R_3
+ while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+ while((dent = readdir(dd))) {
+#endif
+#if (!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+ if(dent->d_ino)
+#endif
+ {
+ if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+ dbstat->entries++;
+ dbstat->stattab = (struct stat *) cli_realloc2(dbstat->stattab, dbstat->entries * sizeof(struct stat));
+ if(!dbstat->stattab) {
+ cl_statfree(dbstat);
+ closedir(dd);
+ return CL_EMEM;
+ }
+
+#if defined(C_INTERIX) || defined(C_OS2)
+ dbstat->statdname = (char **) cli_realloc2(dbstat->statdname, dbstat->entries * sizeof(char *));
+ if(!dbstat->statdname) {
+ cl_statfree(dbstat);
+ closedir(dd);
+ return CL_EMEM;
+ }
+#endif
+
+ fname = cli_malloc(strlen(dirname) + strlen(dent->d_name) + 32);
+ if(!fname) {
+ cl_statfree(dbstat);
+ closedir(dd);
+ return CL_EMEM;
+ }
+
+ if(cli_strbcasestr(dent->d_name, ".inc")) {
+ sprintf(fname, "%s/%s/%s.info", dirname, dent->d_name, strstr(dent->d_name, "daily") ? "daily" : "main");
+ } else {
+ sprintf(fname, "%s/%s", dirname, dent->d_name);
+ }
+#if defined(C_INTERIX) || defined(C_OS2)
+ dbstat->statdname[dbstat->entries - 1] = (char *) cli_malloc(strlen(dent->d_name) + 1);
+ if(!dbstat->statdname[dbstat->entries - 1]) {
+ cl_statfree(dbstat);
+ closedir(dd);
+ return CL_EMEM;
+ }
+
+ strcpy(dbstat->statdname[dbstat->entries - 1], dent->d_name);
+#endif
+ stat(fname, &dbstat->stattab[dbstat->entries - 1]);
+ free(fname);
+ }
+ }
+ }
+
+ closedir(dd);
+ return CL_SUCCESS;
+}
+
+int cl_statchkdir(const struct cl_stat *dbstat)
+{
+ DIR *dd;
+ struct dirent *dent;
+#if defined(HAVE_READDIR_R_3) || defined(HAVE_READDIR_R_2)
+ union {
+ struct dirent d;
+ char b[offsetof(struct dirent, d_name) + NAME_MAX + 1];
+ } result;
+#endif
+ struct stat sb;
+ unsigned int i, found;
+ char *fname;
+
+
+ if(!dbstat || !dbstat->dir) {
+ cli_errmsg("cl_statdbdir(): Null argument passed.\n");
+ return CL_ENULLARG;
+ }
+
+ if((dd = opendir(dbstat->dir)) == NULL) {
+ cli_errmsg("cl_statdbdir(): Can't open directory %s\n", dbstat->dir);
+ return CL_EOPEN;
+ }
+
+ cli_dbgmsg("Stat()ing files in %s\n", dbstat->dir);
+
+#ifdef HAVE_READDIR_R_3
+ while(!readdir_r(dd, &result.d, &dent) && dent) {
+#elif defined(HAVE_READDIR_R_2)
+ while((dent = (struct dirent *) readdir_r(dd, &result.d))) {
+#else
+ while((dent = readdir(dd))) {
+#endif
+#if (!defined(C_INTERIX)) && (!defined(C_WINDOWS)) && (!defined(C_CYGWIN))
+ if(dent->d_ino)
+#endif
+ {
+ if(strcmp(dent->d_name, ".") && strcmp(dent->d_name, "..") && CLI_DBEXT(dent->d_name)) {
+ fname = cli_malloc(strlen(dbstat->dir) + strlen(dent->d_name) + 32);
+ if(!fname) {
+ closedir(dd);
+ return CL_EMEM;
+ }
+
+ if(cli_strbcasestr(dent->d_name, ".inc")) {
+ sprintf(fname, "%s/%s/%s.info", dbstat->dir, dent->d_name, strstr(dent->d_name, "daily") ? "daily" : "main");
+ } else {
+ sprintf(fname, "%s/%s", dbstat->dir, dent->d_name);
+ }
+ stat(fname, &sb);
+ free(fname);
+
+ found = 0;
+ for(i = 0; i < dbstat->entries; i++)
+#if defined(C_INTERIX) || defined(C_OS2)
+ if(!strcmp(dbstat->statdname[i], dent->d_name)) {
+#else
+ if(dbstat->stattab[i].st_ino == sb.st_ino) {
+#endif
+ found = 1;
+ if(dbstat->stattab[i].st_mtime != sb.st_mtime) {
+ closedir(dd);
+ return 1;
+ }
+ }
+
+ if(!found) {
+ closedir(dd);
+ return 1;
+ }
+ }
+ }
+ }
+
+ closedir(dd);
+ return CL_SUCCESS;
+}
+
+int cl_statfree(struct cl_stat *dbstat)
+{
+
+ if(dbstat) {
+
+#if defined(C_INTERIX) || defined(C_OS2)
+ int i;
+
+ if(dbstat->statdname) {
+ for(i = 0; i < dbstat->entries; i++) {
+ if(dbstat->statdname[i])
+ free(dbstat->statdname[i]);
+ dbstat->statdname[i] = NULL;
+ }
+ free(dbstat->statdname);
+ dbstat->statdname = NULL;
+ }
+#endif
+
+ if(dbstat->stattab) {
+ free(dbstat->stattab);
+ dbstat->stattab = NULL;
+ }
+ dbstat->entries = 0;
+
+ if(dbstat->dir) {
+ free(dbstat->dir);
+ dbstat->dir = NULL;
+ }
+ } else {
+ cli_errmsg("cl_statfree(): Null argument passed\n");
+ return CL_ENULLARG;
+ }
+
+ return CL_SUCCESS;
+}
+
+void cl_free(struct cl_engine *engine)
+{
+ int i;
+ struct cli_md5_node *md5pt, *md5h;
+ struct cli_meta_node *metapt, *metah;
+ struct cli_matcher *root;
+
+
+ if(!engine) {
+ cli_errmsg("cl_free: engine == NULL\n");
+ return;
+ }
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&cli_ref_mutex);
+#endif
+
+ engine->refcount--;
+ if(engine->refcount) {
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+ return;
+ }
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+
+ if(engine->root) {
+ for(i = 0; i < CL_TARGET_TABLE_SIZE; i++) {
+ if((root = engine->root[i])) {
+ if(!root->ac_only)
+ cli_bm_free(root);
+ cli_ac_free(root);
+ free(root);
+ }
+ }
+ free(engine->root);
+ }
+
+ if(engine->md5_hlist) {
+ for(i = 0; i < 256; i++) {
+ md5pt = engine->md5_hlist[i];
+ while(md5pt) {
+ md5h = md5pt;
+ md5pt = md5pt->next;
+ free(md5h->md5);
+ free(md5h->virname);
+ free(md5h);
+ }
+ }
+ free(engine->md5_hlist);
+ }
+
+ if((root = engine->md5_sect)) {
+ cli_bm_free(root);
+ free(root->soff);
+ free(root);
+ }
+
+ metapt = engine->zip_mlist;
+ while(metapt) {
+ metah = metapt;
+ metapt = metapt->next;
+ free(metah->virname);
+ if(metah->filename)
+ free(metah->filename);
+ free(metah);
+ }
+
+ metapt = engine->rar_mlist;
+ while(metapt) {
+ metah = metapt;
+ metapt = metapt->next;
+ free(metah->virname);
+ if(metah->filename)
+ free(metah->filename);
+ free(metah);
+ }
+
+ if(((struct cli_dconf *) engine->dconf)->phishing & PHISHING_CONF_ENGINE)
+ phishing_done(engine);
+
+ if(engine->dconf)
+ free(engine->dconf);
+
+ cli_freelocks();
+ free(engine);
+}
+
+int cl_build(struct cl_engine *engine)
+{
+ int i, ret;
+ struct cli_matcher *root;
+
+
+ if((ret = cli_addtypesigs(engine)))
+ return ret;
+
+ for(i = 0; i < CL_TARGET_TABLE_SIZE; i++)
+ if((root = engine->root[i]))
+ cli_ac_buildtrie(root);
+ /* FIXME: check return values of cli_ac_buildtree */
+
+ cli_dconf_print(engine->dconf);
+
+ return CL_SUCCESS;
+}
+
+struct cl_engine *cl_dup(struct cl_engine *engine)
+{
+ if(!engine) {
+ cli_errmsg("cl_dup: engine == NULL\n");
+ return NULL;
+ }
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_lock(&cli_ref_mutex);
+#endif
+
+ engine->refcount++;
+
+#ifdef CL_THREAD_SAFE
+ pthread_mutex_unlock(&cli_ref_mutex);
+#endif
+
+ return engine;
+}
Added: test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c
URL: http://llvm.org/viewvc/llvm-project/test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c?rev=46573&view=auto
==============================================================================
--- test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c (added)
+++ test-suite/trunk/MultiSource/Applications/ClamAV/libclamav_rebuildpe.c Wed Jan 30 15:17:10 2008
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2004 aCaB <acab at clamav.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02110-1301, USA.
+ */
+
+/*
+** rebuildpe.c
+**
+** 28/07/2k4 - Moved out of petitep.c
+** 08/08/2k4 - Fixed typo for sects characteristics
+**
+*/
+
+/*
+** Rebuilds a fully parsable / *not runnable* PE file including
+** a fake MZ header, a piece of informational 16bit code,
+** lookalike PE & Optional headers, an array of structures and
+** of course the real content.
+** Sections characteristics will have all the bits set.
+*/
+
+#if HAVE_CONFIG_H
+#include "clamav-config.h"
+#endif
+
+#include <string.h>
+
+#include "rebuildpe.h"
+#include "others.h"
+
+#define EC32(x) le32_to_host(x) /* Convert little endian to host */
+#define EC16(x) le16_to_host(x) /* Convert little endian to host */
+#define PEALIGN(o,a) (((a))?(((o)/(a))*(a)):(o))
+#define PESALIGN(o,a) (((a))?(((o)/(a)+((o)%(a)!=0))*(a)):(o))
+
+
+struct IMAGE_PE_HEADER {
+ uint32_t Signature;
+ /* FILE HEADER */
+ uint16_t Machine;
+ uint16_t NumberOfSections;
+ uint32_t TimeDateStamp;
+ uint32_t PointerToSymbolTable;
+ uint32_t NumberOfSymbols;
+ uint16_t SizeOfOptionalHeader;
+ uint16_t Characteristics;
+ /* OPTIONAL HEADER */
+ uint16_t Magic;
+ uint8_t MajorLinkerVersion;
+ uint8_t MinorLinkerVersion;
+ uint32_t SizeOfCode;
+ uint32_t SizeOfInitializedData;
+ uint32_t SizeOfUninitializedData;
+ uint32_t AddressOfEntryPoint;
+ uint32_t BaseOfCode;
+ uint32_t BaseOfData;
+ /* NT additional fields. */
+ uint32_t ImageBase;
+ uint32_t SectionAlignment;
+ uint32_t FileAlignment;
+ uint16_t MajorOperatingSystemVersion;
+ uint16_t MinorOperatingSystemVersion;
+ uint16_t MajorImageVersion;
+ uint16_t MinorImageVersion;
+ uint16_t MajorSubsystemVersion;
+ uint16_t MinorSubsystemVersion;
+ uint32_t Win32VersionValue;
+ uint32_t SizeOfImage;
+ uint32_t SizeOfHeaders;
+ uint32_t CheckSum;
+ uint16_t Subsystem;
+ uint16_t DllCharacteristics;
+ uint32_t SizeOfStackReserve;
+ uint32_t SizeOfStackCommit;
+ uint32_t SizeOfHeapReserve;
+ uint32_t SizeOfHeapCommit;
+ uint32_t LoaderFlags;
+ uint32_t NumberOfRvaAndSizes;
+ /* IMAGE_DATA_DIRECTORY follows.... */
+};
+
+#define HEADERS "\
+\x4D\x5A\x90\x00\x02\x00\x00\x00\x04\x00\x0F\x00\xFF\xFF\x00\x00\
+\xB0\x00\x00\x00\x00\x00\x00\x00\x40\x00\x1A\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\
+\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xD0\x00\x00\x00\
+\x0E\x1F\xB4\x09\xBA\x0D\x00\xCD\x21\xB4\x4C\xCD\x21\x54\x68\x69\
+\x73\x20\x66\x69\x6C\x65\x20\x77\x61\x73\x20\x63\x72\x65\x61\x74\
+\x65\x64\x20\x62\x79\x20\x43\x6C\x61\x6D\x41\x56\x20\x66\x6F\x72\
+\x20\x69\x6E\x74\x65\x72\x6E\x61\x6C\x20\x75\x73\x65\x20\x61\x6E\
+\x64\x20\x73\x68\x6F\x75\x6C\x64\x20\x6E\x6F\x74\x20\x62\x65\x20\
+\x72\x75\x6E\x2E\x0D\x0A\x43\x6C\x61\x6D\x41\x56\x20\x2D\x20\x41\
+\x20\x47\x50\x4C\x20\x76\x69\x72\x75\x73\x20\x73\x63\x61\x6E\x6E\
+\x65\x72\x20\x2D\x20\x68\x74\x74\x70\x3A\x2F\x2F\x77\x77\x77\x2E\
+\x63\x6C\x61\x6D\x61\x76\x2E\x6E\x65\x74\x0D\x0A\x24\x00\x00\x00\
+\x50\x45\x00\x00\x4C\x01\xFF\xFF\x43\x4C\x41\x4D\x00\x00\x00\x00\
+\x00\x00\x00\x00\xE0\x00\x83\x8F\x0B\x01\x00\x00\x00\x10\x00\x00\
+\x00\x10\x00\x00\x00\x00\x00\x00\xFF\xFF\xFF\xFF\x00\x10\x00\x00\
+\x00\x10\x00\x00\xFF\xFF\xFF\xFF\x00\x10\x00\x00\x00\x02\x00\x00\
+\x01\x00\x00\x00\x00\x00\x00\x00\x03\x00\x0A\x00\x00\x00\x00\x00\
+\x00\x10\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\
+\x00\x00\x10\x00\x00\x10\x00\x00\x00\x00\x10\x00\x00\x10\x00\x00\
+\x00\x00\x00\x00\x10\x00\x00\x00\
+"
+
+int cli_rebuildpe(char *buffer, struct cli_exe_section *sections, int sects, uint32_t base, uint32_t ep, uint32_t ResRva, uint32_t ResSize, int file)
+{
+ uint32_t datasize=0, rawbase=PESALIGN(0x148+0x80+0x28*sects, 0x200);
+ char *pefile=NULL, *curpe;
+ struct IMAGE_PE_HEADER *fakepe;
+ int i, gotghost=(sections[0].rva > PESALIGN(rawbase, 0x1000));
+
+ if (gotghost) rawbase=PESALIGN(0x148+0x80+0x28*(sects+1), 0x200);
+
+ if(sects+gotghost > 96)
+ return 0;
+
+ for (i=0; i < sects; i++)
+ datasize+=PESALIGN(sections[i].rsz, 0x200);
+
+ if(datasize > CLI_MAX_ALLOCATION)
+ return 0;
+
+ if((pefile = (char *) cli_calloc(rawbase+datasize, 1))) {
+ memcpy(pefile, HEADERS, 0x148);
+
+ datasize = PESALIGN(rawbase, 0x1000);
+
+ fakepe = (struct IMAGE_PE_HEADER *)(pefile+0xd0);
+ fakepe->NumberOfSections = EC16(sects+gotghost);
+ fakepe->AddressOfEntryPoint = EC32(ep);
+ fakepe->ImageBase = EC32(base);
+ fakepe->SizeOfHeaders = EC32(rawbase);
+ memset(pefile+0x148, 0, 0x80);
+ cli_writeint32(pefile+0x148+0x10, ResRva);
+ cli_writeint32(pefile+0x148+0x14, ResSize);
+ curpe = pefile+0x148+0x80;
+
+ if (gotghost) {
+ snprintf(curpe, 8, "empty");
+ cli_writeint32(curpe+8, sections[0].rva-datasize); /* vsize */
+ cli_writeint32(curpe+12, datasize); /* rva */
+ cli_writeint32(curpe+0x24, 0xffffffff);
+ curpe+=40;
+ datasize+=PESALIGN(sections[0].rva-datasize, 0x1000);
+ }
+
+ for (i=0; i < sects; i++) {
+ snprintf(curpe, 8, ".clam%.2d", i+1);
+ cli_writeint32(curpe+8, sections[i].vsz);
+ cli_writeint32(curpe+12, sections[i].rva);
+ cli_writeint32(curpe+16, sections[i].rsz);
+ cli_writeint32(curpe+20, rawbase);
+ /* already zeroed
+ cli_writeint32(curpe+24, 0);
+ cli_writeint32(curpe+28, 0);
+ cli_writeint32(curpe+32, 0);
+ */
+ cli_writeint32(curpe+0x24, 0xffffffff);
+ memcpy(pefile+rawbase, buffer+sections[i].raw, sections[i].rsz);
+ rawbase+=PESALIGN(sections[i].rsz, 0x200);
+ curpe+=40;
+ datasize+=PESALIGN(sections[i].vsz, 0x1000);
+ }
+ fakepe->SizeOfImage = EC32(datasize);
+ } else {
+ return 0;
+ }
+
+ i = (cli_writen(file, pefile, rawbase)!=-1);
+ free(pefile);
+ return i;
+}
More information about the llvm-commits
mailing list