AFL Reading Notes 1: Instrumentation, Initialization and Fork Server

0x00 Overview

Recently I am investigating AFL Fuzzer, and this is some of my notes about its source code. In this article I will discuss how AFL compiler instruments the target binary to be fuzzed and how some initialization of fuzzer is done. I will also discuss fork server, which enable AFL to not call execve each time the target program is run.

Disclaimer: since this is just my reading notes and my capability is limited, it is possible for the contents to be inaccurate or even wrong, and I will be glad if you can point out any mistake here.

0x01 Instrumentation

In AFL, instrumentation is done in compilation time. The instrumentation is done at assembly level: in other word, after C/C++ source code is compiled into assembly text, the instrumentation is done and a instrumented assembly text is generated, which is then used to generate the binary file.

For the instrumentation, I will only focus on how instrumentation is done instead of how instrumentation is implemented. The latter one might involve some assembly text processing, for example, which we might not be interested in as a person who only wants to learn some fuzzing techniques.

The main instrumentation logic is done in afl-as.c, and the codes to be instrumented, which is written in assembly, is in afl-as.h as string format.

Instrumented Code

The instrumented code is written in ATT assembly, but such assembly format is uncomfortable to read, so I will use afl-gcc to compile a binary and put it into IDA to read the instrumented code, with reference to the comments written in afl-as.h.

AFL instruments a piece of code on each basic block, shown below.

lea     rsp, [rsp-98h] ; allocate some stack space
mov     qword ptr [rsp+98h+input+0F80h], rdx
mov     qword ptr [rsp+98h+input+0F88h], rcx
mov     qword ptr [rsp+98h+input+0F90h], rax ; save registers
mov     rcx, 0FC50h ; rcx is the block identifier, generated randomly
call    __afl_maybe_log
mov     rax, qword ptr [rsp+98h+input+0F90h]
mov     rcx, qword ptr [rsp+98h+input+0F88h]
mov     rdx, qword ptr [rsp+98h+input+0F80h]
lea     rsp, [rsp+98h] ; recover stack and variables

Function __afl_maybe_log:

__afl_maybe_log:
lahf
seto    al ; store some flags into ax
mov     rdx, cs:__afl_area_ptr ; NULL for the first time
test    rdx, rdx
jz      short __afl_setup ; this will jump for the first time

__afl_setup is used to initialize shared memory pointer and start fork server, which will be detailed later.

__afl_setup:
cmp     cs:__afl_setup_failure, 0
jnz     short __afl_return
; do not retry setup if we had previous failures
lea     rdx, __afl_global_area_ptr
mov     rdx, [rdx]
test    rdx, rdx
jz      short __afl_setup_first
; use the global pointer if it is not NULL
mov     cs:__afl_area_ptr, rdx
jmp     short __afl_store

__afl_setup_first:
lea     rsp, [rsp-160h]
mov     [rsp+160h+var_160], rax
mov     [rsp+160h+var_158], rcx
mov     [rsp+160h+var_150], rdi
mov     [rsp+160h+var_140], rsi
mov     [rsp+160h+var_138], r8
mov     [rsp+160h+var_130], r9
mov     [rsp+160h+var_128], r10
mov     [rsp+160h+var_120], r11
movq    [rsp+160h+var_100], xmm0
movq    [rsp+160h+var_F0], xmm1
movq    [rsp+160h+var_E0], xmm2
movq    [rsp+160h+var_D0], xmm3
movq    [rsp+160h+var_C0], xmm4
movq    [rsp+160h+var_B0], xmm5
movq    [rsp+160h+var_A0], xmm6
movq    [rsp+160h+var_90], xmm7
movq    [rsp+160h+var_80], xmm8
movq    [rsp+160h+var_70], xmm9
movq    [rsp+160h+var_60], xmm10
movq    [rsp+160h+var_50], xmm11
movq    [rsp+160h+var_40], xmm12
movq    [rsp+160h+var_30], xmm13
movq    [rsp+160h+var_20], xmm14
movq    [rsp+160h+var_10], xmm15
; save registers into stack
push    r12
mov     r12, rsp
; save rsp
sub     rsp, 10h
and     rsp, 0FFFFFFFFFFFFFFF0h
; align rsp

lea     rdi, _AFL_SHM_ENV ; "__AFL_SHM_ID"
call    _getenv
test    rax, rax
; get shared memory id from environment var
jz      __afl_setup_abort
mov     rdi, rax        ; nptr
call    _atoi
xor     rdx, rdx        ; shmflg
xor     rsi, rsi        ; shmaddr
mov     rdi, rax        ; shmid
call    _shmat
; map shared memory into virtual space

cmp     rax, 0FFFFFFFFFFFFFFFFh
jz      __afl_setup_abort
mov     rdx, rax
mov     cs:__afl_area_ptr, rax
lea     rdx, __afl_global_area_ptr
mov     [rdx], rax
mov     rdx, rax
; save shared memory virtual address

At this point the shared memory pointer initialization is done, after then is the fork server. In simple words, fork server works by stopping at the beginning of the main function, and fork a child process if a new instance is needed for fuzzing. This technique significantly reduces the performance bottleneck exerted by execve call.

__afl_forkserver:
push    rdx
push    rdx
mov     rdx, 4          ; n
lea     rsi, __afl_temp ; buf
mov     rdi, 199        ; fd used to communicate with afl-fuzz, covered later
call    _write
; tell afl-fuzz that fork server is ready
; there is a corresponding `read` in afl-fuzz, covered later
; the specific byte contents does not matter
cmp     rax, 4
jnz     __afl_fork_resume

Then a loop is entered.

__afl_fork_wait_loop:   ; nbytes
mov     rdx, 4
lea     rsi, __afl_temp ; buf
mov     rdi, 198        ; status
call    _read
; wait for afl-fuzzer to start a new fuzzing process
; specific byte contents does not matter either
cmp     rax, 4
jnz     __afl_die

call    _fork ; fork a child process for fuzzing
cmp     rax, 0
jl      __afl_die
jz      short __afl_fork_resume

The child process will simply record the path coverage information into shared memory and return from this function.

__afl_fork_resume:      ; fd
mov     rdi, 198
call    _close
mov     rdi, 199       ; fd
call    _close
; close the fd
pop     rdx
pop     rdx
mov     rsp, r12
pop     r12
mov     rax, [rsp+160h+var_160]
mov     rcx, [rsp+160h+var_158]
mov     rdi, [rsp+160h+var_150]
mov     rsi, [rsp+160h+var_140]
mov     r8, [rsp+160h+var_138]
mov     r9, [rsp+160h+var_130]
mov     r10, [rsp+160h+var_128]
mov     r11, [rsp+160h+var_120]
movq    xmm0, [rsp+160h+var_100]
movq    xmm1, [rsp+160h+var_F0]
movq    xmm2, [rsp+160h+var_E0]
movq    xmm3, [rsp+160h+var_D0]
movq    xmm4, [rsp+160h+var_C0]
movq    xmm5, [rsp+160h+var_B0]
movq    xmm6, [rsp+160h+var_A0]
movq    xmm7, [rsp+160h+var_90]
movq    xmm8, [rsp+160h+var_80]
movq    xmm9, [rsp+160h+var_70]
movq    xmm10, [rsp+160h+var_60]
movq    xmm11, [rsp+160h+var_50]
movq    xmm12, [rsp+160h+var_40]
movq    xmm13, [rsp+160h+var_30]
movq    xmm14, [rsp+160h+var_20]
movq    xmm15, [rsp+160h+var_10]
lea     rsp, [rsp+160h]
; resume stack and registers
jmp     __afl_store

__afl_store:
; rcx is this_loc
xor     rcx, cs:__afl_prev_loc
xor     cs:__afl_prev_loc, rcx
; prev_loc == prev_loc ^ this_loc ^ prev_loc == this_loc
shr     cs:__afl_prev_loc, 1
; prev_loc = this_loc >> 1
inc     byte ptr [rdx+rcx]
; rcx is ((last_this_loc >> 1) ^ this_loc)
; increment corresponding bytes in shared memory
; which can record information about path cov
; "path A followed by path B is executed"
; although with possible collision

__afl_return:
add     al, 7Fh ; a hacky way to resume O flag :)
sahf
; resume flags
retn

The parent process send pid of child process to afl-fuzz, wait child process to finish, and send return status of the child process to afl-fuzz.

mov     cs:__afl_fork_pid, eax
mov     rdx, 4          ; n
lea     rsi, __afl_fork_pid ; buf
mov     rdi, 199        ; fd
call    _write
; write child pid to fd 199
mov     rdx, 0          ; options
lea     rsi, __afl_temp ; stat_loc
mov     rdi, qword ptr cs:__afl_fork_pid ; pid
call    _waitpid
; wait child process
cmp     rax, 0
jle     __afl_die

mov     rdx, 4          ; n
lea     rsi, __afl_temp ; buf
mov     rdi, 199        ; fd
call    _write
; write return status of child process to fd 199
jmp     __afl_fork_wait_loop
; jump back to the start of loop

When this function is executed for the second time, __afl_area_ptr or __afl_global_area_ptr is not NULL, so it will jump to __afl_store directly in order to record path coverage information for this particular control block.

0x02 Fork Server Initialization

The fork server initialization is done in init_forkserver, which is called in the first time the target is run.

EXP_ST void init_forkserver(char** argv) {

  static struct itimerval it;
  int st_pipe[2], ctl_pipe[2];
  int status;
  s32 rlen;

  ACTF("Spinning up the fork server...");

  if (pipe(st_pipe) || pipe(ctl_pipe)) PFATAL("pipe() failed");
  // initilize the pipe
  forksrv_pid = fork();
  // fork a process as fork server
  if (forksrv_pid < 0) PFATAL("fork() failed");
  /* ...to be covered later... */
}

In the child process:

if (!forksrv_pid) {

  /* ...some settings, which is not very intersting...*/

  dup2(dev_null_fd, 1);
  dup2(dev_null_fd, 2);
  // set stdout and stderr to /dev/null
  if (out_file) {

    dup2(dev_null_fd, 0);
    // if input is given by file (e.i. @@ in arguments)
    // stdin is also set to /dev/null
  } else {

    dup2(out_fd, 0);
    close(out_fd);
    // if input is given by stdin
    // stdin is set to out_fd, 
    // which is used to transmit testcase input data
  }

  /* Set up control and status pipes, close the unneeded original fds. */

  if (dup2(ctl_pipe[0], FORKSRV_FD) < 0) PFATAL("dup2() failed");
  if (dup2(st_pipe[1], FORKSRV_FD + 1) < 0) PFATAL("dup2() failed");
  // #define FORKSRV_FD 198
  // remember fd 198 and 199 that appears in instrumented code

  close(ctl_pipe[0]);
  close(ctl_pipe[1]);
  close(st_pipe[0]);
  close(st_pipe[1]);

  close(out_dir_fd);
  close(dev_null_fd);
  close(dev_urandom_fd);
  close(fileno(plot_file));
  // close fds
  /* This should improve performance a bit, since it stops the linker from
     doing extra work post-fork(). */

  if (!getenv("LD_BIND_LAZY")) setenv("LD_BIND_NOW", "1", 0);

  /* Set sane defaults for ASAN if nothing else specified. */

  setenv("ASAN_OPTIONS", "abort_on_error=1:"
                         "detect_leaks=0:"
                         "symbolize=0:"
                         "allocator_may_return_null=1", 0);

  /* MSAN is tricky, because it doesn't support abort_on_error=1 at this
     point. So, we do this in a very hacky way. */

  setenv("MSAN_OPTIONS", "exit_code=" STRINGIFY(MSAN_ERROR) ":"
                         "symbolize=0:"
                         "abort_on_error=1:"
                         "allocator_may_return_null=1:"
                         "msan_track_origins=0", 0);
  // set some environment variables
  execv(target_path, argv);
  // start the target binary, which will stop at fork server
  /* Use a distinctive bitmap signature to tell the parent about execv()
     falling through. */

  *(u32*)trace_bits = EXEC_FAIL_SIG;
  exit(0);

}

In parent process:

/* Close the unneeded endpoints. */

close(ctl_pipe[0]);
close(st_pipe[1]);

fsrv_ctl_fd = ctl_pipe[1];
fsrv_st_fd  = st_pipe[0]; //store pipe fd to globals

/* Wait for the fork server to come up, but don't wait too long. */

it.it_value.tv_sec = ((exec_tmout * FORK_WAIT_MULT) / 1000);
it.it_value.tv_usec = ((exec_tmout * FORK_WAIT_MULT) % 1000) * 1000;

setitimer(ITIMER_REAL, &it, NULL);

rlen = read(fsrv_st_fd, &status, 4);
// this corresponds to the first write before fork server loop
it.it_value.tv_sec = 0;
it.it_value.tv_usec = 0;

setitimer(ITIMER_REAL, &it, NULL);

/* If we have a four-byte "hello" message from the server, we're all set.
   Otherwise, try to figure out what went wrong. */

if (rlen == 4) {
  OKF("All right - fork server is up.");
  return; // if no error occurs, this function will return
}

/* ...error handlings, not interesting...*/

0x03 Instance Running

Before running an instance for fuzzing, afl-fuzz firstly write testcase input data into out_fd by function write_to_testcase and write_with_gap.

/* Write modified data to file for testing. If out_file is set, the old file
   is unlinked and a new one is created. Otherwise, out_fd is rewound and
   truncated. */

static void write_to_testcase(void* mem, u32 len) {

  s32 fd = out_fd;

  if (out_file) {

    unlink(out_file); /* Ignore errors. */

    fd = open(out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);

    if (fd < 0) PFATAL("Unable to create '%s'", out_file);

  } else lseek(fd, 0, SEEK_SET);

  ck_write(fd, mem, len, out_file); // write data into fd

  if (!out_file) {

    if (ftruncate(fd, len)) PFATAL("ftruncate() failed");
    lseek(fd, 0, SEEK_SET);

  } else close(fd);

}

/* The same, but with an adjustable gap. Used for trimming. */

static void write_with_gap(void* mem, u32 len, u32 skip_at, u32 skip_len) {

  s32 fd = out_fd;
  u32 tail_len = len - skip_at - skip_len;

  if (out_file) {

    unlink(out_file); /* Ignore errors. */

    fd = open(out_file, O_WRONLY | O_CREAT | O_EXCL, 0600);

    if (fd < 0) PFATAL("Unable to create '%s'", out_file);

  } else lseek(fd, 0, SEEK_SET);

  if (skip_at) ck_write(fd, mem, skip_at, out_file);
  if (tail_len) ck_write(fd, mem + skip_at + skip_len, tail_len, out_file);
  // write 2 chunks of data into fd

  if (!out_file) {

    if (ftruncate(fd, len - skip_len)) PFATAL("ftruncate() failed");
    lseek(fd, 0, SEEK_SET);

  } else close(fd);

}

Then, a function run_target is called to run the instance.

/* Execute target application, monitoring for timeouts. Return status
   information. The called program will update trace_bits[]. */

static u8 run_target(char** argv, u32 timeout) {

  static struct itimerval it;
  static u32 prev_timed_out = 0;

  int status = 0;
  u32 tb4;

  child_timed_out = 0;

  /* After this memset, trace_bits[] are effectively volatile, so we
     must prevent any earlier operations from venturing into that
     territory. */

  memset(trace_bits, 0, MAP_SIZE);
  MEM_BARRIER();

  /* If we're running in "dumb" mode, we can't rely on the fork server
     logic compiled into the target program, so we will just keep calling
     execve(). There is a bit of code duplication between here and 
     init_forkserver(), but c'est la vie. */

  if (dumb_mode == 1 || no_forkserver) {

  /* ...when there is no fork server, not interesting... */

  } else {

    s32 res;

    /* In non-dumb mode, we have the fork server up and running, so simply
       tell it to have at it, and then read back PID. */

    if ((res = write(fsrv_ctl_fd, &prev_timed_out, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    } // write 4 bytes, corresponds to `read`
      // at the start of fork server loop
      // so that fork server will start to execut
      // and fork a new instance 

    if ((res = read(fsrv_st_fd, &child_pid, 4)) != 4) {

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to request new process from fork server (OOM?)");

    } // get the pid of new forked instance

    if (child_pid <= 0) FATAL("Fork server is misbehaving (OOM?)");

  }

  /* Configure timeout, as requested by user, then wait for child to terminate. */

  it.it_value.tv_sec = (timeout / 1000);
  it.it_value.tv_usec = (timeout % 1000) * 1000;

  setitimer(ITIMER_REAL, &it, NULL);

  /* The SIGALRM handler simply kills the child_pid and sets child_timed_out. */

  if (dumb_mode == 1 || no_forkserver) { // not important

    if (waitpid(child_pid, &status, 0) <= 0) PFATAL("waitpid() failed");

  } else {

    s32 res;

    if ((res = read(fsrv_st_fd, &status, 4)) != 4) { // get return status

      if (stop_soon) return 0;
      RPFATAL(res, "Unable to communicate with fork server (OOM?)");

    }

  }

  if (!WIFSTOPPED(status)) child_pid = 0;

  it.it_value.tv_sec = 0;
  it.it_value.tv_usec = 0;

  setitimer(ITIMER_REAL, &it, NULL);

  total_execs++;

  /* Any subsequent operations on trace_bits must not be moved by the
     compiler below this point. Past this location, trace_bits[] behave
     very normally and do not have to be treated as volatile. */

  MEM_BARRIER();

  tb4 = *(u32*)trace_bits;

#ifdef __x86_64__
  classify_counts((u64*)trace_bits);
#else
  classify_counts((u32*)trace_bits);
#endif /* ^__x86_64__ */

  prev_timed_out = child_timed_out;

  /* Report outcome to caller. */

  if (WIFSIGNALED(status) && !stop_soon) {

    kill_signal = WTERMSIG(status);

    if (child_timed_out && kill_signal == SIGKILL) return FAULT_TMOUT;

    return FAULT_CRASH;

  }

  /* A somewhat nasty hack for MSAN, which doesn't support abort_on_error and
     must use a special exit code. */

  if (uses_asan && WEXITSTATUS(status) == MSAN_ERROR) {
    kill_signal = 0;
    return FAULT_CRASH;
  }

  if ((dumb_mode == 1 || no_forkserver) && tb4 == EXEC_FAIL_SIG)
    return FAULT_ERROR;

  return FAULT_NONE;
  // process the return status, todo: investigate in detail
}