[cfe-dev] Clang GenericTaintChecker limitations

Artem Dergachev via cfe-dev cfe-dev at lists.llvm.org
Fri Aug 12 00:51:16 PDT 2016


All right, i see. Because the analyzer failed to initialize the global, 
upon analyzing get_item() as first command, it fails to inline 
get_item() because of the potentially infinite loop inside it. Then, 
upon analyzing get_item() after put_item(), it recalls that get_item() 
is too complex to inline, and skips the call as if no body is available 
(models the call "conservatively"). The return value of get_item() is 
conjured up, and therefore carries no taint.

So, long story short, this code is already too complex for our analyzer. 
Our default options are tweaked for maximum bugs-per-second in general 
case, but maybe we could make an option to analyze deeply, no matter how 
much time it takes.

For the reference, here's my test.c file and the way i patched 
GenericTaintChecker when tried to mimic your approach. I run it with 
debug.ExprInspection and without debug.TaintGeneric and produce a 
trimmed exploded graph with -analyzer-viz-egraph-graphviz -trim-egraph 
(the last option trims the exploded graph to keep only the path to the 
warning, which is in our case the debug.ExprInspection warning that says 
that the value we're analyzing is conjured rather than modeled properly; 
i added an extra variable to reduce the possible paths).

On 8/11/16 8:07 PM, Divya Muthukumaran wrote:
> Hi Artem,
>
> I'm not sure what the protocol is for posting code here. I was trying 
> to abstract the behaviour of a well-known
> in memory key-value store into the following program so this may be 
> too much code to post here. Let me know
> if you want me to give you an even more abstract version. And again, 
> thanks for your help!
>

-------------- next part --------------

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct _item {
  int key;
  char value;
  struct _item *next;
} item;

void taint_add(item **it);

item *global_item_list = NULL;
int global_state = 0;

item *alloc_item(int key, char value) {
  item *new_item = malloc(sizeof(item));
  new_item->key = key;
  new_item->value = value;
  new_item->next = NULL;
  return new_item;
}

item *get_item(int key) {
  item *list = global_item_list; //  ----(D)
  while (list) {
    if (key == list->key)
      return list;
    list = list->next;
  }
  return NULL;
}

void put_item(item *new_item) {
  if (!global_item_list) {
    global_item_list = new_item; // ---- - (C)
  } else {
    new_item->next = global_item_list;
    global_item_list = new_item;
  }
  return;
}

int dispatch_op(char *command_str) {
  char *token;
  const char s[2] = " ";
  token = strtok(command_str, s);
  int key;
  char value;
  item *it;

  if (token == NULL) {
    printf("[Error] No command entered."
           "Please enter a valid command\n");
    return 1;
  }

  if (!strcmp(token, "SET")) {
    /* Grab the key */
    int key;
    token = strtok(NULL, s);
    if (token == NULL) {
      printf("[Error] Invalid number of arguments."
             "Please reenter command with correct params\n");
      return 1;
    } else {
      key = atoi(token);
      if (key == 0) {
        printf("[Error] Key must be greater than 0\n");
        return 1;
      }
    }

    /* Grab the value */
    char value;
    token = strtok(NULL, s);
    if (token == NULL) {
      printf("[Error] Invalid number of arguments."
             "Please reenter command with correct params\n");
      return 1;
    } else {
      value = *token;
    }

    item *it = alloc_item(key, value);
    taint_add(&it); // ---- (A)
    put_item(it);   // ----- (B)
    global_state = 1;
    printf("[Success] Item added.\n");
    return 1;
  } else if (!strcmp(token, "GET")) {
    int local_state = global_state;
    /* Grab the key */
    int key;
    token = strtok(NULL, s);
    if (token == NULL) {
      printf("[Error] Invalid number of arguments."
             "Please reenter command with correct params\n");
      return 1;
    } else {
      key = atoi(token);
    }

    item *it = get_item(key); //-- - (E)
    if (local_state)
      clang_analyzer_explain(it);
    if (it != NULL)
      printf("[Success] Item Found: Key %d, Value %c\n", it->key, it->value);
    else
      printf("[Failure] Item Not found.\n");
    return 1;
  } else if (!strcmp(token, "QUIT")) {
    printf("GoodBye.\n");
    return 0;
  } else {
    printf("[Error] Unknown command: Enter a valid command\n");
    return 1;
  }
}

/* Command are GET, SET, QUIT */
void process_command() {
  char *buffer = NULL;
  int read;
  unsigned int len;
  do {
    do {
      printf("Please Enter a command: "
             "GET key::int(>0) | SET key::int(>0) val::char | LIST | QUIT\n");
      read = getline(&buffer, &len, stdin);
      buffer[strcspn(buffer, "\n")] = 0;
    } while (-1 == read);
  } while (dispatch_op(buffer));

  return;
}

int main(int argc, char **argv) {
  /*
  item * A = alloc_item(1,'A');
  item * B = alloc_item(2,'B');
  taint_add(&B); // -- (P)
  put_item(A);
  put_item(B);
  item * it = get_item(2); // -- (Q)
  printf("Value of %d is %c\n", it->key, it->value); // --(R)
  */
  global_state = 0;
  process_command();
  return 0;
}
-------------- next part --------------
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 8c8acc6..29a84a6 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -74,6 +74,7 @@ private:
                                                        CheckerContext &C) const;
   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
+  ProgramStateRef postTaintAdd(const CallExpr *CE, CheckerContext &C) const;
   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
 
   /// Taint the scanned input if the file is tainted.
@@ -387,6 +388,7 @@ void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
     .Case("freopen", &GenericTaintChecker::postRetTaint)
     .Case("getch", &GenericTaintChecker::postRetTaint)
     .Case("wgetch", &GenericTaintChecker::postRetTaint)
+    .Case("taint_add", &GenericTaintChecker::postTaintAdd)
     .Case("socket", &GenericTaintChecker::postSocket)
     .Default(nullptr);
 
@@ -529,6 +531,16 @@ ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
   return nullptr;
 }
 
+ProgramStateRef GenericTaintChecker::postTaintAdd(const CallExpr *CE,
+    CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  const Expr *Arg = CE->getArg(0);
+  SymbolRef Sym = getPointedToSymbol(C, Arg);
+  if (Sym)
+    State = State->addTaint(Sym);
+  return State;
+}
+
 
 // If argument 0(protocol domain) is network, the return value should get taint.
 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,


More information about the cfe-dev mailing list