AFL源码分析

基本信息

Makefile

这里是对应的项目名称、afl的版本、最终make install目录的前缀、bin目录、helper目录、doc目录和杂项的目录

PROGNAME    = afl
VERSION     = $(shell grep '^\#define VERSION ' config.h | cut -d '"' -f2)

PREFIX     ?= /usr/local
BIN_PATH    = $(PREFIX)/bin
HELPER_PATH = $(PREFIX)/lib/afl
DOC_PATH    = $(PREFIX)/share/doc/afl
MISC_PATH   = $(PREFIX)/share/afl

PROGS是会编译的文件名

SH_PROGS：sh脚本

CFLAGS：编译的flag，其中?=意思是如果没有指定编译flag的话，就使用这个；后面的DAFL_PATH、DDOC_PATH和DBIN_PATH是编译预处理阶段定义一个宏，后续代码中涉及到access都会与这个有关

PROGS       = afl-gcc afl-fuzz afl-showmap afl-tmin afl-gotcpu afl-analyze
SH_PROGS    = afl-plot afl-cmin afl-whatsup

CFLAGS     ?= -O3 -funroll-loops
CFLAGS     += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
        -DAFL_PATH=\"$(HELPER_PATH)\" -DDOC_PATH=\"$(DOC_PATH)\" \
        -DBIN_PATH=\"$(BIN_PATH)\"

如果是linux系统，那么LDFLAGS就会有-ldl

接着判断，如果$CC输出了clang那么CC 会被设置成afl-clang，反之就是afl-gcc

ifneq "$(filter Linux GNU%,$(shell uname))" ""
  LDFLAGS  += -ldl
endif

ifeq "$(findstring clang, $(shell $(CC) --version 2>/dev/null))" ""
  TEST_CC   = afl-gcc
else
  TEST_CC   = afl-clang
endif

这里就是一些公共的头文件

1	`COMM_HDR = alloc-inl.h config.h debug.h types.h`

make all的指令，依赖项在后面，执行顺序是从左到右

1	`all: test_x86 $(PROGS) afl-as test_build all_done`

test_x86用于判断当前平台是否可以生成x86的代码，即用于平台判断

ifndef AFL_NO_X86

test_x86:
  @echo "[*] Checking for the ability to compile x86 code..."
  @echo 'main() { __asm__("xorb %al, %al"); }' | $(CC) -w -x c - -o .test || ( echo; echo "Oops, looks like your compiler can't generate x86 code."; echo; echo "Don't panic! You can use the LLVM or QEMU mode, but see docs/INSTALL first."; echo "(To ignore this error, set AFL_NO_X86=1 and try again.)"; echo; exit 1 )
  @rm -f .test
  @echo "[+] Everything seems to be working, ready to compile."

else

test_x86:
  @echo "[!] Note: skipping x86 compilation checks (AFL_NO_X86 set)."

endif

afl-gcc：编译出afl-gcc，使用上方说到的编译flag；其中$@是自动变量，值为afl-gcc；编译完毕之后afl-g++、afl-clang、afl-clang++都软链接到afl-gcc，一种优化代码的方式，减少维护成本

afl-as：编译出afl-as；软链接as到afl-as

afl-gcc: afl-gcc.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
  set -e; for i in afl-g++ afl-clang afl-clang++; do ln -sf afl-gcc $$i; done

afl-as: afl-as.c afl-as.h $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)
  ln -sf afl-as as

下面的都是编译出对应的文件，但是就没有软链接这个步骤了

afl-fuzz: afl-fuzz.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)

afl-showmap: afl-showmap.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)

afl-tmin: afl-tmin.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)

afl-analyze: afl-analyze.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)

afl-gotcpu: afl-gotcpu.c $(COMM_HDR) | test_x86
  $(CC) $(CFLAGS) $@.c -o $@ $(LDFLAGS)

分析完毕上方的makefile，其实大致可以知道哪一些是需要重点分析的，然后makefile里剩下的不是特别重要，就不再分析了。

afl-gcc.c

核心函数只有find_as(argv[0])和edit_params(argc, argv)，当参数决定之后会调用最后执行execvp(cc_params[0], (char **)cc_params);

添加这一段逻辑之后，执行make，看到以下的输出，其实就和之前分析makefile对上了，只不过这里是testbuild，我们没有去分析，但是这些编译flag都是熟悉的

for (int i = 0; i < argc; i++) {
  printf("%s\n", argv[i]);
}
getchar();
find_as(argv[0]);

find_as(u8* argv0)

这个函数的作用是获得正确的as_path路径

先从环境变量AFL_PATH中获取afl_path，如果存在，并且/AFL_PATH/as存在且可执行，那么就会给as_path赋值为/AFL_PATH/as并退出
如果AFL_PATH不存在，那么就会去获取argv[0]这个参数路径下最后一个”/“后的afl-gcc（例如这个路径/home/flyyy/Desktop/workspace/fuzz/AFL/afl-gcc，那么获取的就是/afl-gcc），将这个值赋值给slash
如果slash存在，那么会获取afl-gcc前面的path（以前面的为例，就是/home/flyyy/Desktop/workspace/fuzz/AFL），赋值给dir，接着会去判断这个dir目录下是否存在afl-as，如果存在且可以执行，那么就会给as_path赋值为dir/afl-as并退出
如果slash不存在，那么会使用默认的AFL_PATH，AFL_PATH一般等于/usr/local/lib/afl，接着会判断这个目录下的as是否存在且可执行，可以则给as_path赋值为/usr/local/lib/afl/as，然后退出。反之则输出报错信息，并让用户重新设置AFL_PATH

edit_params(u32 argc, char** argv)

整体就是通过命令行参数来完善cc_params

如果是FreeBSD和x86_64，那么初始化m32_set = 0
获取argv[0]的最后一个/之后的值（包括/）赋值给name
- 如果name[0]为空的话则name = argv[0]，反之name++，也就是不要/了
如果name是afl-clang
- 初始化clang_mode = 1;设置CLANG_ENV_VAR为1
- 如果name是afl-clang++，并且设置了AFL_CXX，那么cc_params[0]=AFL_CXX；反之为clang++
- 如果name不是afl-clang++，并且设置了AFL_CC，那么cc_params[0]=AFL_CC；反之为clang
如果name不是afl-clang
- 为APPLE平台时
  - name为afl-g++，则cc_params[0] = getenv(“AFL_CXX”);
  - name为”afl-gcj，则cc_params[0] = getenv(“AFL_GCJ”);
  - 否则cc_params[0] = getenv(“AFL_CC”);
  - 如果cc_params[0]为空则报错
- 如果不是APPLE平台
  - 如果name是afl-g++，并且设置了AFL_CXX，那么cc_params[0]=AFL_CXX；反之则为g++
  - 如果name是afl-gcj，并且设置了AFL_GCJ，那么cc_params[0]=AFL_GCJ；反之则为gcj
  - 否则获取AFL_CC，如果不为空cc_params[0] = AFL_CC；反之则为gcc

下面进入一个大循环

依次argc—，并且u8* cur = *(++argv);
如果参数cur为”-B”，且”-B”紧邻的下一个字节为空，且还存在参数，则continue
如果参数cur为”-integrated-as”，则continue
如果参数cur为”-pipe”，则continue
如果为FreeBSD和x86_64，且参数cur为”-m32”，则m32_set = 1
如果参数cur为”-fsanitize=address”或者”-fsanitize=memory”，则asan_set = 1;
如果参数cur为”FORTIFY_SOURCE”，则fortify_set = 1;
以上判断都完毕之后，执行cc_params[cc_par_cnt++] = cur;

经过上方的while循环的判断

执行cc_params[cc_par_cnt++] = “-B”;cc_params[cc_par_cnt++] = as_path;
如果为clang mode的话
- 执行cc_params[cc_par_cnt++] = “-no-integrated-as”;
如果设置了环境变量AFL_HARDEN，那么cc_params[cc_par_cnt++] = “-fstack-protector-all”;
- 如果fortify_set≠1，那么cc_params[cc_par_cnt++] = “-D_FORTIFY_SOURCE=2”;
如果设置asan_set
- 那么设置AFL_USE_ASAN为1
如果没设置asan_set，但设置了AFL_USE_ASAN
- 如果设置了AFL_USE_MSAN，则报错
- 如果设置了AFL_HARDEN，则报错
- 执行cc_params[cc_par_cnt++] = “-U_FORTIFY_SOURCE”;cc_params[cc_par_cnt++] = “-fsanitize=address”;
如果没设置asan_set，当设置了AFL_USE_MSAN
- 如果设置了AFL_USE_ASAN，则报错
- 如果设置了AFL_HARDEN，则报错
如果没有设置AFL_DONT_OPTIMIZE
- 如果满足FreeBSD && x86_64
  - 满足(!clang_mode || !m32_set)==1，则cc_params[cc_par_cnt++] = “-g”;
- 如果不满足FreeBSD && x86_64
  - 则cc_params[cc_par_cnt++] = “-g”;
- 执行cc_params[cc_par_cnt++] = “-O3”;
- 执行cc_params[cc_par_cnt++] = “-funroll-loops”;
- 执行cc_params[cc_par_cnt++] = “-D__AFL_COMPILER=1”;
- 执行cc_params[cc_par_cnt++] = “-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1”;
如果设置了AFL_NO_BUILTIN
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strcmp”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strncmp”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strcasecmp”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strncasecmp”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-memcmp”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strstr”;
- 执行cc_params[cc_par_cnt++] = “-fno-builtin-strcasestr”;
执行 cc_params[cc_par_cnt] = NULL;，为了截断区分

afl-as.c

该文件的主要逻辑是通过一系列操作得到汇编需要的命令行参数，接着通过execvp执行得到.s汇编文件，接着对于汇编文件进行插桩

main(int argc, char** argv)

定义的变量

s32 pid;
u32 rand_seed;
int status;
u8* inst_ratio_str = getenv("AFL_INST_RATIO");

struct timeval tv;
struct timezone tz;

获取环境变量AFL_INST_RATIO，赋值给inst_ratio_str
获取环境变量CLANG_ENV_VAR，两次!!，然后赋值给clang_mode
获取随机数种子，rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
进入edit_params函数
设置环境变量AS_LOOP_ENV_VAR为1
条件判断，如果环境变量AFL_USE_ASAN或者AFL_USE_MSAN
- 设置sanitizer = 1;inst_ratio /= 3;
判断just_version是否为1，如果为1则不会对.s文件插桩
创建子进程，执行execvp(as_params[0], (char**)as_params);，父进程等待子进程结束
获取环境变量AFL_KEEP_ASSEMBLY，存在则保存modified_file，反之则删除（默认就是删除）
end

edit_params(int argc, char** argv)

获取TMPDIR环境变量的值，赋值给tmp_dir
获取AFL_AS环境变量的值，赋值给afl-as
判断tmp_dir是否为空，为空则分别环境变量TEMP的值，赋值给tmp_dir
判断tmp_dir是否为空，为空则分别环境变量TMP的值，赋值给tmp_dir
判断tmp_dir是否为空，为空则将"/tmp"赋值给tmp_dir
给as_params分配内存
- 如果变量afl-as的值不为空，则将其值赋值给as_params[0]
- 反之为空，则将默认值"as"赋值给as_params[0]
接着进入一个for 循环，for循环的次数是argc（也就是说这里只将用户输入的变量赋值，系统的环境变量不赋值）
- 如果argv存在”—64”字段，将use_64bit赋值为1；如果argv存在”—32”字段，将use_64bit赋值为0
- 将argv的值赋值给as_params
将命令行输入的最后一个参数赋值给变量input_file
- 判断input_file[0]包含”-“字段
  - 判断如果input_file+1存在”-version”字段
    - 变量just_version=1，变量modified_file=input_file，接着goto wrap_things_up
  - 不存在”-version”字段
    - 如果input_file[1]存在内容，则输出报错信息；反之input_file=NULL;
- input_file[0]不包含”-“字段，
  - 则依次比较input_file字段里是否存在tmp_dir的值、”/var/tmp”、”/tmp”，以上三个字段其中之一不符合，则将变量pass_thru赋值为1（该变量为1则说明此时使用户传入的文件，反之则是默认的文件处理）
modfiled_file被赋值，赋值的格式为”%s/.afl-%u-%u.s”，参数依次为tmp_dir、getpid()和time(NULL)
wrap_things_up：
- 将as_params的最后一个值赋值为modified_file，同时尾部置为NULL

add_instrumentation(void)

首先定义了一堆变量

static u8 line[MAX_LINE];

FILE* inf;
FILE* outf;
s32 outfd;
u32 ins_lines = 0;

u8  instr_ok = 0, skip_csect = 0, skip_next_label = 0,
    skip_intel = 0, skip_app = 0, instrument_next = 0

判断input_file是否存在，如果存在则以只读模式打开，并赋值给FILE* inf；反之则是复制stdin给FILE* inf
以「 O_WRONLY | O_EXCL | O_CREAT, 0600」模式打开modified_file文件，然后赋值给outfd
- 判断这个文件是否成功打开
接着使用fdopen共享outfd，赋值给FILE* outf
- 判断是否成功共享

下面是主要去插桩的地方

/* If we're in the right mood for instrumenting, check for function
   names or conditional labels. This is a bit messy, but in essence,
   we want to catch:

     ^main:      - function entry point (always instrumented)
     ^.L0:       - GCC branch label
     ^.LBB0_0:   - clang branch label (but only in clang mode)
     ^\tjnz foo  - conditional branches

   ...but not:

     ^# BB#0:    - clang comments
     ^ # BB#0:   - ditto
     ^.Ltmp0:    - clang non-branch labels
     ^.LC0       - GCC non-branch labels
     ^.LBB0_0:   - ditto (when in GCC mode)
     ^\tjmp foo  - non-conditional jumps

   Additionally, clang and GCC on MacOS X follow a different convention
   with no leading dots on labels, hence the weird maze of #ifdefs
   later on.

 *

从注释可以得知，main函数入口、gcc和clang的label 标签、条件跳转是插桩的地方。一些注释、无条件跳转、非label标签都是不插桩的地方

接着进入一个大循环，这个循环就是主要的插桩逻辑

进入循环，循环的条件是fgets(line, MAX_LINE, inf)，读取MAX_LINE-1行数据
条件判断(!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok && instrument_next && line[0] == ‘\t’ && isalpha(line[1]))
- 如果满足，则使用通过判断use_64bit这个变量来判断，使用fprintf将对应的64/32位的插桩汇编插入到当前位置
- 接着将instrument_next=0，然后ins_lines++
将读取的line，输入到outf中，也就是保存到这个文件里
如果变量pass_thru=1，则continue。这个变量为1说明文件不是用户指定的，然后回到循环开始，无法进入上面的if分支，也就是说，不是用户指定的文件，不进行插桩。
条件判断(line[0] == ‘\t’ && line[1] == ‘.’)
- 条件判断(!clang_mode && instr_ok && !strncmp(line + 2, “p2align “, 8) && isdigit(line[10]) && line[11] == ‘\n’)，满足skip_next_label=1
- 条件判断，如果line+2中存在text、section\t.text、section\t__TEXT,__ text、section TEXT, text字段，满足则instr_ok = 1，说明可以插桩，然后回到循环开头
- 条件判断，如果line+2中存在section\t、section、bss\n、data\n这些字段，满足则instr_ok=0，说明不可以插桩，然后回到循环开头
条件判断(strstr(line, “.code”))，这里是为了防止64位模式给32位程序插桩而产生的判断
- 如果line中存在.code32，则skip_csect = use_64bit
- 如果line中存在.code64，则skip_csect = !use_64bit
如果line中存在.intel_syntax字段，则skip_intel=1
如果line中存在.att_syntax字段，则skip_intel=0，说明插转不支持intel语法
条件判断(line[0] == ‘#’ || line[1] == ‘#’)，如果存在注释或者伪指令
- 如果line中存在#APP，则skip_app=1，说明这里不会对手写的汇编进行插桩
- 如果line中存在#NO_APP，则skip_app=0
条件判断，(skip_intel || skip_app || skip_csect || !instr_ok || line[0] == ‘#’ || line[0] == ‘ ‘)
- 如果存在intel语法、手写汇编、架构不一致、无法插桩、注释行、空行，则回到循环开头
条件判断(line[0] == ‘\t’)
- 条件判断(line[1] == ‘j’ && line[2] != ‘m’ && R(100) < inst_ratio)
  - 插桩，ins_lines++
- 回到循环开头
条件判断(strstr(line, “:”))
- 条件判断(line[0] == ‘.’)
  - 如果满足.L<num> / .LBB<num> 的形式，并且R(100) < inst_ratio，这里插桩的概率取决于inst_ratio的数值
    - 如果skip_next_label=0，那么instrument_next=1；
    - 如果skip_next_label=1，那么instrument_next=0；
  - 不符合，则instrument_next = 1;
循环结束

下面的代码很简单，直接贴出来

if (ins_lines)
  fputs(use_64bit ? main_payload_64 : main_payload_32, outf);

if (input_file) fclose(inf);
fclose(outf);

if (!be_quiet) {

  if (!ins_lines) WARNF("No instrumentation targets found%s.",
                        pass_thru ? " (pass-thru mode)" : "");
  else OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).",
           ins_lines, use_64bit ? "64" : "32",
           getenv("AFL_HARDEN") ? "hardened" : 
           (sanitizer ? "ASAN/MSAN" : "non-hardened"),
           inst_ratio);
 
}

如果这个.s文件被插桩，则会在尾部插入一段64/32位的payload
如果input_file存在，那么关闭inf
关闭outf
一些简单判断的输出

简单的调试

terminal 接受三个参数

第二个必须是架构？

第三个是输入文件

terminal 最后一个是输入的文件名称

如果最后一个是--version 那么就会输出对应的版本信息

afl-clang-fast.c

整体来说和afl-gcc的流程一致

find_obj(u8* argv0)

获取环境变量AFL_PATH的值，赋值给afl_path
如果afl_path存在
- 去找其目录下的afl-llvm-rt.o（如果afl_path为/home/flyyy/Desktop/workspace/fuzz/AFL，那么就去找这个目录下的afl-llvm-rt.o）
- 同时判断是否可达，则赋值给obj_path，然后return
获取”/“ 后面的执行文件，以上面为例，就是/alf-clang-fast，并赋值给slash
如果slash存在
- 获取前缀目录，赋值给dir，也就是dir=/home/flyyy/Desktop/workspace/fuzz/AFL
- 寻找dir目录下的afl-llvm-rt.o
- 同时判断是否可达，则赋值给obj_path，然后return
以上都不满足就去默认目录/usr/local/lib/afl下寻找afl-llvm-rt.o
- 判断是否可达，则赋值给obj_path，然后return

edit_params(u32 argc, char** argv)

函数内定义的变量

1
2
3

u8 fortify_set = 0, asan_set = 0, x_set = 0, bit_mode = 0;
u8 *name

获取agrv[0]变量中，包括”/“以及之后的值，赋值给name
如果name存在则name++，反之name=argv[0]
比较name
- 如果name == “afl-clang-fast++”，则去获取环境变量AFL_CXX，赋值给alt_cxx，如果alt_cxx存在则cc_params[0]=afl_cxx，反之cc_params[0]=”clang++”
- 如果name == “afl-clang-fast”，则去获取环境变量AFL_CC，赋值给alt_cc，如果alt_cc存在则cc_params[0]=afl_cc，反之cc_params[0]=”clang”
接着给cc_params添加几个参数，分别是 “-Xclang” “-loa” “-Xclang”和afl-llvm-pass.so的绝对路径（find_obj获取）
进入循环，判断传入的agrv数组内容，根据内容来给变量赋值
- u8cur=(++argv);
- 存在”-m32”或者”armv7a-linux-androideabi”，则bit_mode = 32;
- 存在”-m64”，则bit_mode = 64;
- 存在”-x”，则x_set=1;
- 存在”-fsanitize=address”或者”-fsanitize=memory”，则asan_set=1
- 存在”FORTIFY_SOURCE”，则fortify_set = 1;
- 存在 “-Wl,-z,defs”或者”-Wl,–no-undefined”，则回到循环开始
- cc_params[cc_par_cnt++] = cur
如果环境变量AFL_HARDE存在
- cc_param增加一个”-fstack-protector-all”
- 如果fortify_set==0，cc_param增加一个-D_FORTIFY_SOURCE=2
如果asan_set==0，并且环境变量AFL_USE_ASAN和AFL_USE_MSAN不同时存在
- AFL_USE_ASAN存在，cc_params增加两个变量”-U_FORTIFY_SOURCE”和”-fsanitize=address”
- AFL_USE_MSAN存在，cc_params增加两个变量”-U_FORTIFY_SOURCE”和”-fsanitize=memory”
如果环境变量AFL_DONT_OPTIMIZE存在
- cc_params增加三个变量，默认是o3优化

对于的解释-funroll-loops

如果环境变量AFL_NO_BUILTIN存在

统一添加几个参数

最后会添加一些参数，是为了避免插桩的代码被优化，这里采取了三种方法

  /* When the user tries to use persistent or deferred forkserver modes by
     appending a single line to the program, we want to reliably inject a
     signature into the binary (to be picked up by afl-fuzz) and we want
     to call a function from the runtime .o file. This is unnecessarily
     painful for three reasons:

     1) We need to convince the compiler not to optimize out the signature.
        This is done with __attribute__((used)).

     2) We need to convince the linker, when called with -Wl,--gc-sections,
        not to do the same. This is done by forcing an assignment to a
        'volatile' pointer.

     3) We need to declare __afl_persistent_loop() in the global namespace,
        but doing this within a method in a class is hard - :: and extern "C"
        are forbidden and __attribute__((alias(...))) doesn't work. Hence the
        __asm__ aliasing trick.

   */

  cc_params[cc_par_cnt++] = "-D__AFL_LOOP(_A)="
    "({ static volatile char *_B __attribute__((used)); "
    " _B = (char*)\"" PERSIST_SIG "\"; "
#ifdef __APPLE__
    "__attribute__((visibility(\"default\"))) "
    "int _L(unsigned int) __asm__(\"___afl_persistent_loop\"); "
#else
    "__attribute__((visibility(\"default\"))) "
    "int _L(unsigned int) __asm__(\"__afl_persistent_loop\"); "
#endif /* ^__APPLE__ */
    "_L(_A); })";

  cc_params[cc_par_cnt++] = "-D__AFL_INIT()="
    "do { static volatile char *_A __attribute__((used)); "
    " _A = (char*)\"" DEFER_SIG "\"; "
#ifdef __APPLE__
    "__attribute__((visibility(\"default\"))) "
    "void _I(void) __asm__(\"___afl_manual_init\"); "
#else
    "__attribute__((visibility(\"default\"))) "
    "void _I(void) __asm__(\"__afl_manual_init\"); "
#endif /* ^__APPLE__ */
    "_I(); } while (0)"

如果x_set存在

最后根据上面的代码来判断最后一个参数，根据64/32位来判断

非安卓平台都需要执行这个


#ifndef __ANDROID__
  switch (bit_mode) {

    case 0:
      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt.o", obj_path);
      break;

    case 32:
      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-32.o", obj_path);

      if (access(cc_params[cc_par_cnt - 1], R_OK))
        FATAL("-m32 is not supported by your compiler");

      break;

    case 64:
      cc_params[cc_par_cnt++] = alloc_printf("%s/afl-llvm-rt-64.o", obj_path);

      if (access(cc_params[cc_par_cnt - 1], R_OK))
        FATAL("-m64 is not supported by your compiler");

      break;

  }
#endif

  cc_params[cc_par_cnt] = NULL

main(int argc, char** argv)

afl-llvm-pass.so.cc

llvm-pass待学习

afl-llvm-rt.o.c

afl-fuzz.c

这个是主要的fuzz流程产生的函数

参数解析

首先是一些变量的定义，时间/随机种子的获取、文档路径的判断，接着就到了参数的解析，如下的一个循环，其中的+表示遇到非如下涉及到的参数则停止解析，参数后带:表示这个参数的后面需要跟一个参数

1	`while ((opt = getopt(argc, argv, "+i:o:f:m:b:t:T:dnCB:S:M:x:QV")) > 0)`

解释下各个参数

-i [in_dir]
- in_dir是种子的输入目录
-o [out_dir]
- out_dir是结果的输出目录
-M [sync_id]
- sync_id第一种情况，任意指定
- sync_id第二种情况，格式为sync_id:master_id/master_max;满足条件是(sscanf(c + 1, “%u/%u”, &master_id, &master_max) != 2 || !master_id || !master_max || master_id > master_max || master_max > 1000000)
-S [sync_id]
- sync_id任意指定
-f [out_file]
- out_file为目标文件
-x [extras_dir]
- extras_dir为字典目录
-t [exec_tmout][suffix]
- exec_tmout为%u，不能小于5，开头不能是-
- suffix为%c，可有可无，为’+’号是表示宽松模式，也就是执行超过exec_tmout时不fault而是warn
-m [mem_limit][suffix]
- mem_limit为%llu，不能以-开头
  - 不能小于5
  - 如果是32位系统，不能大于2000
- suffix其实就是单位，有TGkM，不同的会进行对应大小 */1024
-b [cpu_to_bind]
- cpu_to_bind为%u，但是不能小于1
-B [in_bitmap]
- in_bitmap解析无误之后，会调用read_bitmap(in_bitmap);
-n
- 就是dumb mode
-T [use_banner]
- use_banner用户指定的bannner
-Q
- 指定为qemu mode
- 如果没有内存限制的话，就会设置一个默认的内存限制，执行mem_limit = MEM_LIMIT_QEMU
-V
- 输出版本信息

setup_signal_handlers(void)

setup_signal_handlers 函数设置信号

check_asan_opts(void)

fix_up_sync(void)

这个函数为了设置sync_dir和out_dir，其中sync_dir是out_dir的上一层目录

如果开启了dumb mode，则fatal，因为-S -M与-n互斥
如果skip_deterministic=1，
- 如果force_deterministic=1，则说明-M和-d互斥，因此fatal（因为-M会设置force_deterministic=1，也就是好使用确定性变异，而skip_deterministic是跳过确定性变异）
- 如果force_deterministic≠1，则说明-S和-d冗余，因此fatal（-S和-d都会设置skip_deterministic=1，也就是跳过确定性变异）
检查后sync_id，只能包含数字和’-‘ ‘_‘，否则fatal
检测sync_id长度不能大于32，否则fatal
赋值sync_dir = out_dir
赋值out_dir = out_dir/sync_id
如果force_deterministic为0的话，也就是此时不是master
- 对应的就是slave，那么设置skip_deterministic = 1;use_splicing = 1;
- 也就是跳过确定性变异

save_cmdline(u32 argc, char** argv)

拷贝命令行参数并赋值给orig_cmdline，也就是保存初始的命令行参数

设置use_banner

如果use_banner为空
- 如果sync_id存在，则use_banner=sync_id
- 如果sync_id不存在，则use_banner=目标程序名称，例如上方命令行，也就是test
判断use_banner的长度，如果大于40则被截断

check_if_tty(void)

检查是否是tty设备

get_core_count(void)

bind_to_free_cpu(void)

check_crash_handling(void)

check_cpu_governor(void)

setup_post(void)

调用自己提前写的的so，去执行对应的逻辑

如果环境变量AFL_POST_LIBRARY不存在，则return
如果环境变量AFL_POST_LIBRARY存在
- 调用dlopen(fn, RTLD_NOW)加在so的动态库
- 在加载的库中查找名为 afl_postprocess 的函数，并将其地址存入 post_handler 函数指针
- 简单的测试下，调用post_handler(“hello”, &tlen);

其实这个so可以直接写，然后执行自己的逻辑

setup_shm(void)

申请一块共享内存，然后通过shmat将这一块内核的空间让afl进程也可以访问

其中的MAP_SIZE = 1 << 16

如果没有提前指定in_bitmap，那么初始化virgin_bits，memset(virgin_bits, 255, MAP_SIZE)
如果-B指定了in_bitmap，那么会跳过初始化virgin_bits
接着同样的方式初始化virgin_tmout和virgin_crash
申请一段共享内存，后续将这一段内存attach到进程的地址空间
- trace_bits = shmat(shm_id, NULL, 0);

下方是共享内存attach前后的变化

执行前

AFL进程地址空间:
┌─────────────────┐
│   代码段        │
│   数据段        │  
│   堆           │
│   栈           │
└─────────────────┘

共享内存: 存在于内核中，但AFL无法访问

执行后

AFL进程地址空间:
┌─────────────────┐
│   代码段        │
│   数据段        │  
│   堆           │
│ trace_bits ────┼──→ 共享内存段 (64KB)
│   栈           │     [0][0][0]...[0]
└─────────────────┘

init_count_class16(void)

这里是初始化count_class_lookup16

这里是主要的初始化逻辑

u32 b1, b2;

for (b1 = 0; b1 < 256; b1++) 
  for (b2 = 0; b2 < 256; b2++)
    count_class_lookup16[(b1 << 8) + b2] = 
      (count_class_lookup8[b1] << 8) |
      count_class_lookup8[b2];

这里为了避免噪音，因此对于命中的次数进行了分类

static const u8 count_class_lookup8[256] = {
  [0]           = 0,     // 0次执行 → 桶0
  [1]           = 1,     // 1次执行 → 桶1  
  [2]           = 2,     // 2次执行 → 桶2
  [3]           = 4,     // 3次执行 → 桶4
  [4 ... 7]     = 8,     // 4-7次执行 → 桶8
  [8 ... 15]    = 16,    // 8-15次执行 → 桶16
  [16 ... 31]   = 32,    // 16-31次执行 → 桶32
  [32 ... 127]  = 64,    // 32-127次执行 → 桶64
  [128 ... 255] = 128    // 128-255次执行 → 桶128
};

这里是初始化的一个例子。lookup16双字节检索，为了速度更快

// 例子：输入 0x1234 (b1=0x12=18, b2=0x34=52)
b1 = 18  → count_class_lookup8[18] = 32
b2 = 52  → count_class_lookup8[52] = 64

// 结果：(16 << 8) | 64 = 0x2040
count_class_lookup16[0x1234] = 0x2040

setup_dirs_fds(void)

创建out_dir下的一系列文件

如果sync_dir存在，则创建这个文件夹
接着创建out_dir
- 如果之前已经存在
  - 那么会判断状态是否是EEXIST，是这个状态则会fatal退出
  - 不是这个EEXIST则调用maybe_delete_out_dir()
- 如果之前不存在
  - 如果in_place_resume为1，则报错
  - 调用open打开文件，并赋值给out_dir_fd

接着创建一系列文件夹

%s => out_dir

%s/queue
%s/queue/.state/
%s/queue/.state/deterministic_done/
%s/queue/.state/auto_extras/
%s/queue/.state/redundant_edges/
%s/queue/.state/variable_behavior/

如果存在sync_id
- 创建%s/.synced/文件夹

接着继续创建文件夹

%s => out_dir

%s/crashes
%s/hangs

fprintf(plot_file, "# unix_time, cycles_done, cur_path, paths_total, "
                     "pending_total, pending_favs, map_size, unique_crashes, "
                     "unique_hangs, max_depth, execs_per_sec\n");

执行dev_null_fd = open(“/dev/null”, O_RDWR);
执行dev_urandom_fd = open(“/dev/urandom”, O_RDONLY);
创建%s/plot_data文件，调用fdopen打开该文件，返回值由plot_file保存，接着调用fprintf(plot_file, “xxxx”);将内容写入文件

read_testcases(void)

这个函数主要是对于in_dir目录下的种子进行操作，首先是判断shuffle_queue标志位，接着判断了id_dir下的种子是否发生了确定性变异，通过.state/deterministic_done/filename来判断。对于发生确定性变异的种子会在其添加到queue时，设置passed_det=1

分配新路径fn = alloc_printf(“%s/queue”, in_dir);，接着判断fn是否可达，如果可达，则设置in_dir = fn，否则就free掉
扫描in_dir下的文件，执行的语句是nl_cnt = scandir(in_dir, &nl, NULL, alphasort)，scandir可以保证的读出文件的顺序是固定的，同时文件的基本信息被保存在nl变量中
判断读出文件的数量
- 如果nl_cnt 小于0，则报错
- nl_cnt 大于1，且shuffle_queue=1，则进入shuffle_ptrs函数
- 如果shuffle_queue≠1，会进入下面的循环
  - 获取路径fn = alloc_printf(“%s/%s”, in_dir, nl[i]->d_name);
  - 获取dfn = alloc_printf(“%s/.state/deterministic_done/%s”, in_dir, nl[i]->d_name);
  - 初始化passed_det = 0，用于判断是否发生确定性变异
  - 如果dfn可达
    - 说明这个种子已经发生过确定性变异，接着执行passed_det=1
  - 执行add_to_queue(fn, st.st_size, passed_det);
- 初始化last_path_time = 0;
- 初始化queued_at_start = queued_paths;

add_to_queue(u8* fname, u32 len, u8 passed_det)

需要使用到的链表结构

struct queue_entry {

  u8* fname;                          /* File name for the test case      */
  u32 len;                            /* Input length                     */

  u8  cal_failed,                     /* Calibration failed?              */
      trim_done,                      /* Trimmed?                         */
      was_fuzzed,                     /* Had any fuzzing done yet?        */
      passed_det,                     /* Deterministic stages passed?     */
      has_new_cov,                    /* Triggers new coverage?           */
      var_behavior,                   /* Variable behavior?               */
      favored,                        /* Currently favored?               */
      fs_redundant;                   /* Marked as redundant in the fs?   */

  u32 bitmap_size,                    /* Number of bits set in bitmap     */
      exec_cksum;                     /* Checksum of the execution trace  */

  u64 exec_us,                        /* Execution time (us)              */
      handicap,                       /* Number of queue cycles behind    */
      depth;                          /* Path depth                       */

  u8* trace_mini;                     /* Trace bytes, if kept             */
  u32 tc_ref;                         /* Trace bytes ref count            */

  struct queue_entry *next,           /* Next element, if any             */
                     *next_100;       /* 100 elements ahead               */

};

有了上方的结构体，这里的queue的添加就很简单。下方实现了一个简单的单链表，然后还有一个next_100的指针，每一次可以跳过100项，增加便利的速度

每执行一次都会queued_paths++;pending_not_fuzzed++，当添加完毕之后会有一个获取添加表项的时间，同时更新last_path_time的值

static void add_to_queue(u8* fname, u32 len, u8 passed_det) {

  struct queue_entry* q = ck_alloc(sizeof(struct queue_entry));

  q->fname        = fname;
  q->len          = len;
  q->depth        = cur_depth + 1;
  q->passed_det   = passed_det;

  if (q->depth > max_depth) max_depth = q->depth;

  if (queue_top) {

    queue_top->next = q;
    queue_top = q;

  } else q_prev100 = queue = queue_top = q;

  queued_paths++;
  pending_not_fuzzed++;

  cycles_wo_finds = 0;

  /* Set next_100 pointer for every 100th element (index 0, 100, etc) to allow faster iteration. */
  if ((queued_paths - 1) % 100 == 0 && queued_paths > 1) {

    q_prev100->next_100 = q;
    q_prev100 = q;

  }

  last_path_time = get_cur_time();

}

简图表示如下

shuffle_ptrs(void** ptrs, u32 cnt)

作用是将一个数组里的指针顺序随机打乱

load_auto(void)

读如字典文件的一个wrapper，字典长度无误之后会进一步评估字典内容

进入一个循环，循环次数为USE_AUTO_EXTRAS，即50次，定义在config.h中
- 获取字典路径，格式是这样alloc_printf(“%s/.state/auto_extras/auto _%06u”, in_dir, i);
- open文件，将文件读如tmp中，同时保存每次读完文件后的长度，记为len
- 长度如果满足(len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA)
  - 调用maybe_add_auto(tmp, len);

maybe_add_auto(u8* mem, u32 len)

该函数主要是评估该字典是否可以为一个有效的字典

一些预定义的边界值

static s8  interesting_8[]  = { INTERESTING_8 };
static s16 interesting_16[] = { INTERESTING_8, INTERESTING_16 };
static s32 interesting_32[] = { INTERESTING_8, INTERESTING_16, INTERESTING_32 };

#define INTERESTING_8 \
  -128,          /* Overflow signed 8-bit when decremented  */ \
  -1,            /*                                         */ \
   0,            /*                                         */ \
   1,            /*                                         */ \
   16,           /* One-off with common buffer size         */ \
   32,           /* One-off with common buffer size         */ \
   64,           /* One-off with common buffer size         */ \
   100,          /* One-off with common buffer size         */ \
   127           /* Overflow signed 8-bit when incremented  */

#define INTERESTING_16 \
  -32768,        /* Overflow signed 16-bit when decremented */ \
  -129,          /* Overflow signed 8-bit                   */ \
   128,          /* Overflow signed 8-bit                   */ \
   255,          /* Overflow unsig 8-bit when incremented   */ \
   256,          /* Overflow unsig 8-bit                    */ \
   512,          /* One-off with common buffer size         */ \
   1000,         /* One-off with common buffer size         */ \
   1024,         /* One-off with common buffer size         */ \
   4096,         /* One-off with common buffer size         */ \
   32767         /* Overflow signed 16-bit when incremented */

#define INTERESTING_32 \
  -2147483648LL, /* Overflow signed 32-bit when decremented */ \
  -100663046,    /* Large negative number (endian-agnostic) */ \
  -32769,        /* Overflow signed 16-bit                  */ \
   32768,        /* Overflow signed 16-bit                  */ \
   65535,        /* Overflow unsig 16-bit when incremented  */ \
   65536,        /* Overflow unsig 16 bit                   */ \
   100663045,    /* Large positive number (endian-agnostic) */ \
   2147483647    /* Overflow signed 32-bit when incremented */

字典的结构体

struct extra_data {
  u8* data;                           /* Dictionary token data            */
  u32 len;                            /* Dictionary token length          */
  u32 hit_cnt;                        /* Use count in the corpus          */
};

static struct extra_data* extras;     /* Extra tokens to fuzz with        */
static u32 extras_cnt;                /* Total number of tokens read      */

static struct extra_data* a_extras;   /* Automatically selected extras    */
static u32 a_extras_cnt;              /* Total number of tokens available */

if(!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS)
- 则return
如果字典内容是完全重复的字符串或内容与interesting_16/interesting_32存在相同的内容
- return
接着从extras数组中找出第一个待评估字典长度相等的项，判断内容是否一致，
- 一致则return
设置auto_changed=1
遍历a_extras，找到第一个与len相等且内容完全一致的
- a_extras[i].hit_cnt++;
- 同时goto排序a_extras的逻辑
如果此时的a_extras_cnt小于MAX_AUTO_EXTRAS
- 那么将当前的mem，添加为a_extras新表项，也就是新的字典
如果此时的a_extras_cnt大于MAX_AUTO_EXTRAS
- 此时会随机淘汰一个位于MAX_AUTO_EXTRAS / 2之后的，也就是hit频率较低的字典，并使用当前的字典替代
这里是排序a_extras的逻辑
- 首先按照hit_cnt大小，从高到低拍，也就是按照hit频率排序
- 接着按照长度从短到长排序

pivot_inputs(void)

初始化一些值，还有一个prefix

  struct queue_entry* q = queue;
  u32 id = 0;
  
#ifndef SIMPLE_FILES
#  define CASE_PREFIX "id:"
#else
#  define CASE_PREFIX "id_"
#endif /* ^!SIMPLE_FILES */

进入一个循环
- 获取当前q对应的文件名，赋值给rsl，后续通过操作去掉文件名开头的’/‘
- 如果满足(!strncmp(rsl, CASE_PREFIX, 3) && sscanf(rsl + 3, “%06u”, &orig_id) == 1 && orig_id == id)
  - 设置resuming_fuzz = 1;
  - 分配一个路径alloc_printf(“%s/queue/%s”, out_dir, rsl);，赋值给nfn
  - 调用strchr(rsl + 3, ‘:’)，赋值给src_str
  - 如果满足(src_str && sscanf(src_str + 1, “%06u”, &src_id) == 1)
    - 接着通过src_id遍历到queue，得到s，此时的s就是父级queue
    - 接着将其变异的深度+1
    - 如果s的深度+1之后大于max_depth，则更新max_depth
- 如果不满足(!strncmp(rsl, CASE_PREFIX, 3) && sscanf(rsl + 3, “%06u”, &orig_id) == 1 && orig_id == id)
  - 定义了SIMPLE_FILES
    - 执行strstr(rsl, “,orig:”)，返回值赋值给use_name，接着操作获得:之后的实际内容，更新use_name的值
    - 分配路径alloc_printf(“%s/queue/id:%06u,orig:%s”, out_dir, id, use_name)，赋值给nfn
  - 没定义SIMPLE_FILES
    - 直接分配alloc_printf(“%s/queue/id_%06u”, out_dir, id)，赋值给nfn
- link_or_copy(q->fname, nfn);
  - 创建一个硬连接，将q->fname硬连接到nfn
  - 如果失败则创建文件，并将q->fname的内容写入到nfn对应的文件中
- 更新q->fname = nfn
- 如果当前q为确定性变异
  - mark_as_det_done(q);
    - 获取当前的文件名，strrchr(q->fname, ‘/‘)，赋值给fn
    - 更新fn为alloc_printf(“%s/queue/.state/deterministic_done/%s”, out_dir, fn + 1)
    - 创建这个文件，设置q->passed_det = 1;
- q = q->next;id++;
如果设置了in_place_resume
- nuke_resume_dir();
  - 删除out_dir/_ resume/.state/deterministic_done
  - 删除out_dir/_ resume/.state/auto_extras
  - 删除out_dir/_ resume/.state/redundant_edges
  - 删除out_dir/_ resume/.state/variable_behavior
  - 删除out_dir/_ resume/.state
  - 删除out_dir/_ resume
  - 如果删除失败，则fatal

load_extras(u8* dir)

如果使用了-x参数指定了字典的话，载入字典

find_timeout(void)

读取fuzzer_stats文件里与exec_timeout相关的参数，通过文件中匹配exec_timeout的特征，获取偏移，然后atoi读exec_timeout的具体值

如果没有定义resuming_fuzz，则rerutrn
如果定义了in_place_resume = 1，fn = alloc_printf(“%s/fuzzer_stats”, out_dir)，反之fn = alloc_printf(“%s/../fuzzer_stats”, in_dir);
接着open fn，然后匹配字符串”exec_timeout : “，将:后面的值转化为int，然后赋值给exec_tmout，并同时设置timeout_given = 3;

detect_file_args(char** argv)

查找是否存在@@占位符，如果存在则替换为out_dir/.cur_input

获取当前执行的AFl根目录的绝对路径，赋值给cwd
获取strstr(argv[i], “@@”)，赋值给aa_loc
如果aa_loc存在
- 如果没有指定out_file
  - 那么out_file = alloc_printf(“%s/.cur_input”, out_dir);
- 如果指定了，当时不是绝对路径的话
  - aa_subst = alloc_printf(“%s/%s”, cwd, out_file);
- 如果指定了，且路径为绝对路径
  - aa_subst = out_file;
- 最后会将@@替换为$cwd/out_dir/.cur_input

setup_stdio_file(void)

针对于没有指定outfile，且也不存在@@的情况。先unlink掉同名的文件out_dir/.cur_input，然后创建outdir/.cur_input文件，同时文件描述符由out_fd保存

check_binary(u8* fname)

检查最后指定的，需要插桩的binary是否存在，还会去检查ELF header这类

perform_dry_run(char** argv)

遍历queue中的case，将case读到内存里，然后进入calibrate_case判断这个case是否有问题

根据calibrate_case函数执行的返回结果，会分为几种情况，根据这几种情况会有一个判断，然后一直判断，直到程序fault或者case全部判断完毕

最后会有一个case的质量检查，如果fail的次数和queued_paths相等，那么会直接fault；fail的占比高于百分之20，那么会发出警告

涉及到的枚举值

enum {
  /* 00 */ FAULT_NONE,
  /* 01 */ FAULT_TMOUT,
  /* 02 */ FAULT_CRASH,
  /* 03 */ FAULT_ERROR,
  /* 04 */ FAULT_NOINST,
  /* 05 */ FAULT_NOBITS
};

初始话一些变量

1
2
3

struct queue_entry* q = queue;
u32 cal_failures = 0;
u8* skip_crashes = getenv("AFL_SKIP_CRASHES");

进入循环
- 获取q的filename，赋值给fn，接着open，然后将内容写到use_mem中，use_mem就是需要评估的样本内容
- 调用res = calibrate_case(argv, q, use_mem, 0, 1)，其中res是返回结果，状态见上方的枚举值
- 接着进入一个switch
  - res==FAULT_NONE
    - 如果是q == queue，那么也就是第一次执行，会调用check_map_coverage()
    - 如果满足crash_mode≠0，会fatal
  - res==FAULT_TMOUT
    - 如果手动设定了timeout
      - 如果是宽松模式，也即是timeout具体值后面多了一个’+’
        
        设置当前样本的call_failed，q->cal_failed = CAL_CHANCES;
        
        cal_failures++
      - 如果是严格模式
        
        输出一些信息，然后fatal
    - 如果没有手动设定timeout
      - 输出一些信息，然后fatal
  - res==FAULT_CRASH
    - 如果设置了crash_mode
      - break
    - 如果设置了skip_crashes
      - 设置当前样本的call_failed，q->cal_failed = CAL_CHANCES;
      - cal_failures++
    - 接着针对是否设置了mem_limit，产生一些针对性的输出
    - fatal
  - res==FAULT_ERROR
    - fatal
  - res==FAULT_ERROR
    - fatal，因为没有插桩
  - res==FAULT_NOBITS
    - useless_at_start++
    - 如果满足(!in_bitmap && !shuffle_queue)，会输出”No new instrumentation output, test case may be useless.”
- 如果满足q->var_behavior≠0，输出”Instrumentation output varies across runs.”
- q = q->next;
如果cal_failures≠0
- 如果cal_failures的次数与queued_paths相等，说明样本执行全部失败了，则fatal
- cal_failures的比例，超过20%，则fatal

calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,u32 handicap, u8 from_queue)

初始化一系列变量

static u8 first_trace[MAP_SIZE];

u8  fault = 0, new_bits = 0, var_detected = 0, hnb = 0,
    first_run = (q->exec_cksum == 0);

u64 start_us, stop_us;

s32 old_sc = stage_cur, old_sm = stage_max;
u32 use_tmout = exec_tmout;
u8* old_sn = stage_name;

if (!from_queue || resuming_fuzz)
- 设置use_tmout
默认q->cal_failed++，认为是最坏的情况，样本执行失败
设置stage_name = “calibration”;
stage_max = fast_cal ? 3 : CAL_CYCLES，这里的stage_max默认一开始是8，当开始第二轮的时候，往往会被设置成40，也就是一个样本正常执行的次数是40
if (dumb_mode != 1 && !no_forkserver && !forksrv_pid)
- init_forkserver(argv);初始化forkserver，其中forksrv_pid是反之重复初始化
如果当前样本的exec_cksum，那么说明这个样本不是第一次执行
- 将trace_bits内容拷贝到first_trace，其中的trace_bits是样本每一次执行后的路径情况
- 调用has_new_bits(virgin_bits);判断是否产生了新的路径
  - 返回0说明，没有新路径
  - 返回1说明，产生的路径没有大于一个字节，也就是路径变化不大
  - 返回2说明，产生的路径大于等于1个字节，路径变化很大
- if (hnb > new_bits) new_bits = hnb;设置new_bits，用于后续判读该样本是否产生了新路径
获取开始时间，赋值给start_us
进入for 循环，次数是stage_max
- if (!first_run && !(stage_cur % stats_update_freq))
  - 会调用show_stats()，输出状态
- 接着调用write_to_testcase(use_mem, q->len)，将样本内容写到out_dir/.cur_input中
- 调用fault = run_target(argv, use_tmout);
- 满足if (stop_soon || fault != crash_mode)，也就是crtl c会退出，或者crahs_mode对不上
  - goto abort_calibration;
- 计算hash值，调用hash32(trace_bits, MAP_SIZE, HASH_CONST)，赋值给cksum
- 如果当前执行样本的q->exec_cksum和cksum不等
  - 判断virgin_bits的是否产生新路径
  - 如果q->exec_cksum存在，说明不是第一次执行的时候会进入下面的逻辑
    - 进入循环
      - 如果当前的trace_bits和first_trace不一致，说明可能发生了路径上的变化
        
        设置var_bytes[i] = 1;
        
        stage_max = CAL_CYCLES_LONG，也就是延长了当前样本的执行次数
      - 设置var_detected = 1，说明执行了这个样本，发生了变化
  - 如果q->exec_cksum不存在
    - q->exec_cksum = cksum;
    - 将trace_bits内容拷贝到first_trace
获取结束时间，赋值给stop_us
计算一下执行时间和执行轮数，分别赋值给total_cal_us和total_cal_cycles
进行样本信息的赋值
- q->exec_us=执行时间/执行轮数，q->bitmap_size = count_bytes(trace_bits)，q->handicap = handicap，q->cal_failed = 0。也就是执行到这个地方说明样本并没有执行失败
- 接着增加总的bitmap的大小和数量，对应逻辑是total_bitmap_size += q->bitmap_size;total_bitmap_entries++
执行update_bitmap_score(q);
如果满足(!dumb_mode && first_run && !fault && !new_bits)
- fault = FAULT_NOBITS，这种情况就是说样本正常执行，当时没有产生新的路径
进入abort_calibration标签
- 如果满足(new_bits == 2 && !q->has_new_cov)，说明产生了全新的覆盖率
  - 设置q->has_new_cov = 1;queued_with_cov++;
- 如果检测出了变化，也就是满足var_detected==1
  - 计算变化的数量var_byte_count = count_bytes(var_bytes);
  - 如果满足(!q->var_behavior)
    - 执行mark_as_variable(q);
    - queued_variable++;
- 最后是开头的一些变量的更新，和状态的输出
最后return fault

init_forkserver(char** argv)

初始化基本的变量

#define FORKSRV_FD          198
static struct itimerval it;
int st_pipe[2], ctl_pipe[2];
int status;
s32 rlen;

初始化管道pipe(st_pipe);pipe(ctl_pipe)
执行forksrv_pid = fork()，fork一个子进程，然后子进程pid赋值给forksrv_pid
接着如果满足if (!forksrv_pid)，下面执行的逻辑是子进程执行的
- 设置当前进程的fd数量上限到200
- 接着设置内存的上限
- 执行setsid()，脱离终端控制，反之被挂起
- 将stdout、stderr重定向到dev_null_fd
- 如果定义了outfile
  - 将stdin也重定向到dev_null_fd
- 将stdin重定向到out_fd，也就是将种子当成输入
- 接着执行dup2(ctl_pipe[0], FORKSRV_FD);dup2(st_pipe[1], FORKSRV_FD + 1)
  - 子进程通过198这个fd来读取来自父进程的信息，同时通过199这个fd来将信息传递给父进程
- 设置一系列的环境变量，最后执行execv(target_path, argv);
  - 如果正常的话其实会跑到插桩的代码，后续与父进程交互，这个部分后面写
  - 如果失败的话，执行exit，退出
父进程这里也设置了fd
- 父进程通过198来将信息传递给子进程，同时通过199读取来自子进程的消息
设定一个超时的计时器
- 阻塞，直到可以读取来自子进程的消息rlen = read(fsrv_st_fd, &status, 4)，正常来说应该是hello
如果没有超时的话，这个计时器会被清空
判断rlen的长度，等于4的话说明forkserver正常启动了。如果child_timed_out存在，说吗子进程执行超时。还存在waitpid(forksrv_pid, &status, 0)判断子进程的返回状态是否正常。
后续还有一些判断，不是特别重要，有时间再补吧

show_stats(void)

has_new_bits(u8* virgin_map)

根据机器字长，设置current和virgin，其中的 >>3/2就是除以8/4

#ifdef WORD_SIZE_64

  u64* current = (u64*)trace_bits;
  u64* virgin  = (u64*)virgin_map;

  u32  i = (MAP_SIZE >> 3);//0x2000

#else

  u32* current = (u32*)trace_bits;
  u32* virgin  = (u32*)virgin_map;

  u32  i = (MAP_SIZE >> 2);//0x4000

#endif /* ^WORD_SIZE_64 */

u8   ret = 0;

进入while(i—)循环
- 当current不为0且( current & *virgin)不为0的时候，说明发现了新路径，或者说某一条路径多执行了一次
  - ret<2时
    - 取单字节的current为cur，单字节的virgin为vir，每次判断的idx都是8个增长。也就是将八个字节拆封成8个单字节判断
    - 这里说个例子，判断如果cur[0] && vir[0] == 0xff为1那么说明产生了新路径，ret=2。需要注意的是==的优先级大于 &&，这里踩坑了。因为vir初始化就是0xff，所以==0xff说明还没被执行到，因此cur[0]只要不为0，那么就会产生新路径
    - 如果不为1的话，那么ret=1
    - 执行*virgin &= ~*current，这里讲current为1的bit的值取反，然后&操作，这样就可以消除vir上的1位。例子看下面
- current++;
- virgin++;
满足if (ret && virgin_map == virgin_bits)，需要注意的是fuzz时存在多个virgin，分别是virgin_bits、virgin_tmout和virgin_crash，这里的bitmap_changed针对于virgin_bits
- 设置bitmap_changed=1
return ret;

write_to_testcase(void* mem, u32 len)

如果out_file存在的话，将当前样本的内容，写到out_file中；如果不存在的话，通过out_fd写进去

run_target(char** argv, u32 timeout)

初始化一些值

static struct itimerval it;
static u32 prev_timed_out = 0;
static u64 exec_ms = 0;

int status = 0;
u32 tb4;

child_timed_out = 0;

/* After this memset, trace_bits[] are effectively volatile, so we
   must prevent any earlier operations from venturing into that
   territory. */

memset(trace_bits, 0, MAP_SIZE);

如果是dumb_mode或者no_forkserver，这个情况我不讨论，后续有时间再续

如果是正常的forkserver模式
- 首先父进程向fsrv_ctl_fd管道写入上一次程序执行的状态，其中prev_timed_out=0代表上一次执行没有超时。
- 阻塞进程，等待forkserver向fsrv_st_fd管道写入child_pid，父进程等待读取
- 如果child_pid<0，则fatal
根据传入的timeout参数，设置计时器
父进程等待从fsrv_st_fd管道读如forkserver子进程执行状态
如果子进程已经退出或者死亡，则child_pid=0
通过timer计算出子进程执行的时间exec_ms，接着清空timer，total_execs++;
根据word_size，调用classify_counts((u64 )trace_bits)或者classify_counts((u32)trace_bits);
由于fuzz启动之前重新写过信号，所以当超时的时候会讲child_timed_out赋值为1，并kill(child_pid, SIGKILL)，因此这里当timer重置之后，会讲这个child_timed_out的结果赋值给prev_timed_out，那么下一次执行的时候这个结果就会被反馈给forkserver了
根据forkserver传递的status来判断返回值类型
- WIFSIGNALED(status)当这个为真时，也就是子进程异常退出，通过调用WTERMSIG(status)获得子进程结束之前的signal，赋值给kill_signal
  - 如果child_timed_out=1且kill_signal == SIGKILL，也就是超时导致了异常退出，那么return FAULT_TMOUT
  - 如果没有超时，那么就意味着crash，所以return FAULT_CRASH;
- 如果使用了asan，且WEXITSTATUS(status) == MSAN_ERROR检测到了使用未初始化的内存
  - kill_signal = 0
  - return FAULT_CRASH;
- 如果满足这个条件(!(timeout > exec_tmout) && (slowest_exec_ms < exec_ms))
  - 这里是统计执行的最慢时间，所以会执行slowest_exec_ms = exec_ms
return FAULT_NONE;

classify_counts(u64* mem)

遍历完mem
- 八个字节为一次，一次分为4组，每组两个bytes，通过调用count_class_lookup16[mem16[i]]来检索count_class_lookup16数组中对应的值，然后再填回mem16[i]

count_bytes(u8* mem)

遍历完mem
- 四个字节为一次，一次分为4组，每组一个字节，讲每一个字节都&0xff，值如果不为0的话，则ret++
return ret;

update_bitmap_score(struct queue_entry* q)

初始化变量，这个fav_factor就是后面判断进入top_rated[i]的依据，执行时间*文件长度

1 2	`u32 i; u64 fav_factor = q->exec_us * q->len;`

进入for循环，执行MAP_SIZE次数
- 如果当前idx的trace_bits[idx]存在
  - 如果top_rated[i]不为空
    - 如果当前的fav_factor>top_rated[i]的fav_factor，那么continue，说明这个无法加入
    - 如果满足(!–top_rated[i]->tc_ref)，即top_rated[i]->tc_ref=1，也就是之前是添加到top_rated数组里的，且目前q的fav_factor<top_rated[i]的fav_factor
      - 释放掉原本top_rated[i]->trace_mini，并清空，后续会为这个新的q分配trace_mini
  - 直接把当前的case放到这个数组，top_rated[i] = q
  - q->tc_ref++;
  - 如果当前的q不存在trace_mini的话
    - 先分配对应的内存，然后调用minimize_bits(q->trace_mini, trace_bits)，讲trace_bits压缩到q->trace_mini
  - 设置score_changed = 1;

minimize_bits(u8* dst, u8* src)

代码量很少，其实用到一个压缩的算法

这里就是把src里的单个字节映射到dst到bit，因此缩小了8倍的大小。下面的逻辑就是如果*src存在的话，就讲dst中对应bit位赋值为1

u32 i = 0;

while (i < MAP_SIZE) {

  if (*(src++)) dst[i >> 3] |= 1 << (i & 7);
  i++;

}

mark_as_variable(struct queue_entry* q)

创建out_dir/queue/.state/variable_behavior/fname的符号链接，同时设置q->var_behavior = 1;

cull_queue(void)

基本值的初始化

struct queue_entry* q;
static u8 temp_v[MAP_SIZE >> 3];
u32 i;

if (dumb_mode || !score_changed) return;

score_changed = 0;

memset(temp_v, 255, MAP_SIZE >> 3);

queued_favored  = 0;
pending_favored = 0;

q = queue;

循环遍历queue，将q->favored = 0都设置成0
进入循环，执行次数是MAP_SIZE
- 如果当前的top_rated[i]存在，且(temp_v[i >> 3] & (1 << (i & 7)))不为0
  - j = MAP_SIZE >> 3;
  - 进入while (j–)循环
    - 如果top_rated[i]->trace_mini[j]存在
      - 执行temp_v[j] &= ~top_rated[i]->trace_mini[j]，意思就是说将trace_mini按位取反，然后与temp_v[j]进行&运算，同时遍历了temp_v数组。循环执行完毕后，temp_v[j]中不存在当前top_rated[i]涉及到的路径
      - 当执行完毕所有的top_rated数组的时候，获得的temp_v[j]是一个不包含所有样本路径的最小子集（但这个没用）。同时由于这个判断的存在(temp_v[i >> 3] & (1 << (i & 7)))，所以这里就是一个top_rated数组去重的过程
  - top_rated[i]->favored = 1;
  - 然后queued_favored++;
  - top_rated[i]->was_fuzzed，意思是当前样本如果没有被fuzz过，fuzz的数量会变+1
    - pending_favored++;
设置q=queue，也就是到队列开头
while (q)
- 遍历执行mark_as_redundant(q, !q->favored);

mark_as_redundant(struct queue_entry* q, u8 state)

如果state 和q->fs_redundant相等，就return
q->fs_redundant = state;
为fn分配路径为out_dir/queue/.state/redundant_edges/fname
如果state为1，也就是redundant的情况
- 创建这个fn路径的文件
如果state为0，也就是属于favored的情况
- 删除这个fn路径的文件

show_init_stats

find_start_position(void)

针对于开启了resuming_fuzz的情况，为了获取上一次执行样本的位置

如果resuming_fuzz为0，则return
如果in_place_resume=1
- fn为out_dir/fuzzer_stats
如果in_place_resume=0
- fn为in_dir/../fuzzer_stats
打开这个文件，将内容读到tmp里，接着匹配”cur_path : “字符在这文件内的偏移，然后将”:”后的值转化为int，赋值给ret，最后return ret
这个返回值会赋值给seek_to

write_stats_file(double bitmap_cvg, double stability, double eps)

save_auto(void)

将自动生成的一些字典保存到out_dir下面

如果auto_changed=0，则return
设置auto_changed=0
for循环，次数是MIN(USE_AUTO_EXTRAS, a_extras_cnt)
- 获取fn路径，为alloc_printf(“%s/queue/.state/auto_extras/auto _%06u”, out_dir, i);
- 打开这个文件，将a_extras数组的内容写入

fuzz执行的流程

进入循环while (1)
- 定义skipped_fuzz
- 执行cull_queue()精简top_rated数组
- 如果queue_cur为空的话，其实也就是对应着初始fuzz开始状态
  - queue_cycle++;
  - current_entry = 0;
  - cur_skipped_paths = 0;
  - queue_cur = queue;
  - while (seek_to)
    - 通过seek_to来找到之前fuzz的queue
  - 执行函数show_stats，打印状态
  - 如果queued_paths于prev_queued相等
    - 同时采用字符拼接的策略的话
      - cycles_wo_finds++
    - 否则use_splicing = 1，开启字符拼接
  - 否则cycles_wo_finds = 0
  - 如果是第一轮执行，同时是M/S模式，并且存在环境变量AFL_IMPORT_FIRST
    - 执行sync_fuzzers(use_argv);
- 执行fuzz_one(use_argv)，返回值赋值给skipped_fuzz
- 对于没有停止、处于M/S模式并且没有被fuzz的样本
  - 如果满足(!(sync_interval_cnt++ % SYNC_INTERVAL))
    - 执行sync_fuzzers(use_argv);
- 执行queue_cur = queue_cur->next;current_entry++，下一个样本和数量++

sync_fuzzers(char** argv)

作用其实是在开启了M/S的情况下，同步不同sync_id目录下的样本

初始化一些变量

1
2
3

DIR* sd;
struct dirent* sd_ent;
u32 sync_cnt = 0;

执行sd = opendir(sync_dir);
进入循环while ((sd_ent = readdir(sd)))，是遍历sync_dir目录下的所有内容
- 如果此时的遍历项是”.”开头或者等于当前的sync_id，说明是隐藏文件或者自己的fuzz目录，因此执行continue跳过
- 接着qd_path=”sync_dir/sd_ent->d_name/queue”，也就是获取到其他sync_id/queue目录下的路径
- 调用qd = opendir(qd_path)，获取内容。同时也是判断当前的目录下是否存在queue，如果不存在的话，说明这个不是另外sync_id的目录，因为目录下还可能存在.cur_input、fuzz_bitmap、fuzzer_stats等等
- qd_synced_path=”out_dir/.synced/sd_ent->d_name”，到了这里也就是说已经获取了其他并行fuzzer的样本目录了，然后这里是获取，为了后续创建.synced的目录
- id_fd = open(qd_synced_path, O_RDWR | O_CREAT, 0600)，创建文件
- 将当前的文件内容读到min_accept，然后赋值next_min_accept = min_accept;
- 执行sprintf(stage_tmp, “sync %u”, ++sync_cnt);
- 进入循环while ((qd_ent = readdir(qd)))
  - 如果当前遍历到的项是隐藏文件，或者格式不满足类似于这种”id:000000,orig:seed”，或者当前获取到的syncing_case<next_min_accept，说明当前同步的样本重复了，所以continue
  - 如果syncing_case >= next_min_accept，则next_min_accept = syncing_case + 1;
  - 分配path=”qd_path/qd_ent->d_name”，然后fd = open(path, O_RDONLY);
  - 接着调用mmap将当前文件内容映射到内存，然后将内容写到.cur_input中，调用run_target跑下这个case，保存执行完毕该样本的状态，用于后续判断是否可以被保存为interesting case

暂时搁置分析的函数

fuzz_one(char** argv)

如果当前样本的变异深度≥1则return 1，说明此时的这个样本已经被fuzz过了
如果pending_favored不为0，则说明top_rated数组里还存在样本没fuzz
- 如果当前样本被fuzz过了，或者当前样本并不是favored。同时UR(100) < SKIP_TO_NEW_PROB满足，则return 1。前方的SKIP_TO_NEW_PROB为99，也就是对于被fuzz过的样本，且不是favored的样本，有%99的概率直接返回
如果pending_favored为0
- 如果不是dumb_mode且当前样本不是favored且queued_paths大于10
  - 如果不是第一轮，且当前样本没有被fuzz过
    - %75的概率reutrn 1
  - 如果是第一轮或者被fuzz过了
    - %95概率直接return 1
获取当前样本的内容，调用mmap映射到内存，分别赋值给orig_in 和in_buf，同时获取样本内容的长度，赋值为len
如果当前的样本执行失败过
- 如果次数<3,那么会给重新执行的机会，然后判断返回状态，如果还是FAULT_ERROR则fatal
- 如果≥3，同时满足(stop_soon || res != crash_mode)的话，cur_skipped_paths++，然后goto abandon_entry;
如果不是dumb_mode，且当前的样本还没有被trim过
- 调用u8 res = trim_case(argv, queue_cur, in_buf)，其实就是精简下样本
- 如果是FAULT_ERROR，则fatal
- 接着赋值queue_cur->trim_done = 1
- 更新queue_cur→len，更新out_buf
调用calculate_score(queue_cur)，为当前的样本评分，赋值给orig_perf和perf_score
如果当前的样本不跳过确定性变异，或者没有被fuzz过，或者没有被设置过passed_det的标志位
- goto havoc_stage，执行随机性变异
如果满足(master_max && (queue_cur->exec_cksum % master_max) != master_id - 1)
- goto havoc_stage，执行随机性变异
设置doing_det = 1，执行到这里说明当前没发生过执行随机性变异，下方会先执行确定性变异，然后后续的变异流程以后再分析。。。。。（待填坑

样本的确定性变异/随机性变异/字符拼接

由于写ast fuzz其实用不到，因为会破坏文法，所以这一部分暂时不分析，以后有需要再分析

trim_case(char** argv, struct queue_entry* q, u8* in_buf)

calculate_score(struct queue_entry* q)

common_fuzz_stuff(char** argv, u8* out_buf, u32 len)

fuzzer与forkserver间的通信

我这里采用的思路是使用afl-gcc编译一个，然后把编译后的文件拉到ida里直接看，下方分析的是64位的程序

一些重要的值

__afl_area_ptr
- 共享内存的起始地址
__afl_prev_loc
- 上一次执行，边的位置
__afl_fork_pid
- forkserver fork出来的子进程的pid
__afl_temp
- 存放fuzzer与forkserver之间通信的临时数据
__afl_setup_failure
- 子进程超时或者执行失败这个值会被赋值为1，如果为0则是正常运行
__afl_global_area_ptr
- 存放共享内存的起始地址

_afl_maybe_log(unsigned int cur_loc)

这里是完整的逆向完毕的代码

// AFL instrumentation function - handles shared memory setup and coverage tracking
void __fastcall _afl_maybe_log(unsigned int cur_loc)
{
  __int64 v1; // rcx
  __int64 v2; // rdx
  __int64 v3; // rcx
  char *shm_id_str; // rax
  int v5; // eax
  void *area_ptr; // rax
  int edge_id; // edi
  __int64 child_pid; // rax
  __int64 v9; // rax
  __int64 shm_id; // [rsp-10h] [rbp-180h]
  __int64 shm_ptr; // [rsp+18h] [rbp-158h]

  v2 = _afl_area_ptr;
  if ( _afl_area_ptr )                          // Check if AFL shared memory area is already initialized
  {
__afl_store:
    v3 = _afl_prev_loc ^ v1;                    // Calculate edge ID: prev_loc XOR cur_loc for coverage tracking
    _afl_prev_loc ^= v3;                        // Update prev_loc for next basic block
    _afl_prev_loc = _afl_prev_loc >> 1;         // Shift prev_loc right by 1 to distinguish A->B from B->A
    ++*(v2 + v3);                               // Increment hit count for this edge in shared memory bitmap
    return;
  }
  if ( !_afl_setup_failure )                    // If setup failed before, return early
  {
    v2 = _afl_global_area_ptr;
    if ( _afl_global_area_ptr )                 // Check if another thread already initialized the shared memory
    {
      _afl_area_ptr = _afl_global_area_ptr;     // Reuse already initialized global area pointer
    }
    else
    {
      shm_ptr = v1;
      shm_id_str = getenv("__AFL_SHM_ID");      // Get shared memory ID from environment variable __AFL_SHM_ID
      if ( !shm_id_str || (v5 = atoi(shm_id_str), area_ptr = shmat(v5, 0, 0), area_ptr == -1LL) )// Attach to AFL shared memory using shmat()
      {
        ++_afl_setup_failure;                   // Mark setup as failed if shmat fails
        return;
      }
      _afl_area_ptr = area_ptr;
      _afl_global_area_ptr = area_ptr;
      shm_id = area_ptr;
      if ( write(199, &_afl_temp, 4u) == 4 )    // AFL Fork Server: Write to pipe 199 to signal ready
      {
        while ( 1 )
        {
          edge_id = 198;                        // AFL_FORKSRV_FD (198) - Fork server control pipe
          if ( read(198, &_afl_temp, 4u) != 4 )
            break;                              // Fork server loop: Read from pipe 198 to get fuzz command
          LODWORD(child_pid) = fork();          // Fork a child process for each fuzzing iteration
          if ( child_pid < 0 )
            break;
          if ( !child_pid )
            goto __afl_fork_resume;             // Child process continues to __afl_fork_resume
          _afl_fork_pid = child_pid;
          write(199, &_afl_fork_pid, 4u);       // Parent: Send child PID to fuzzer via pipe 199
          edge_id = _afl_fork_pid;
          LODWORD(v9) = waitpid(_afl_fork_pid, &_afl_temp, 0);// Parent: Wait for child process to finish
          if ( v9 <= 0 )
            break;
          write(199, &_afl_temp, 4u);           // Parent: Send child exit status to fuzzer via pipe 199
        }
        _exit(edge_id);
      }
__afl_fork_resume:
      close(198);                               // Child: Close fork server pipes (fd 198, 199)
      close(199);
      v2 = shm_id;
      v1 = shm_ptr;
    }
    goto __afl_store;
  }
}

_afl_area_ptr如果不为空的话，说明此时共享内存已经被初始化
- __afl_store的label，下方的逻辑经过优化，下方的trace_bits其实就是共享内存
  - edge_id = _afl_prev_loc ^ [complie_time_random]
  - trace_bits[edge_id]++
  - _afl_prev_loc = [complie_time_random] >> 1
_afl_area_ptr如果为空，这说明还没有初始化共享内存
- 如果_afl_setup_failure为0，也就是forkserver fork出来的子进程没有运行失败
  - v2 = _afl_global_area_ptr;
  - 如果_afl_global_area_ptr不为空，说明共享内存已经初始化，所以下面需要给_afl_area_ptr赋值
    - _afl_area_ptr = _afl_global_area_ptr;
  - 如果_afl_global_area_ptr为空
    - 获取环境变量__AFL_SHM_ID的值，赋值给shm_id_str
    - 接着通过这个shm_id将共享内存attach到进程空间中，成功的话会将地址的返回值赋值给area_ptr
      - 如果attach失败了
        
        ++_afl_setup_failure，然后return
    - 接着_afl_area_ptr和_afl_global_area_ptr都被赋值为area_ptr，也就是上方的attach的共享内存的地址
    - 执行write(199, &_afl_temp, 4u) == 4，向fuzzer传递4字节信息，执行成功进入下方逻辑
      - 进入while ( 1 )循环
        
        读取来自父进程的消息，其实就是prev_timeout的值
        
        fork子进程，保存其child_pid
        
        如果child_pid < 0说明fork失败，那么就break
        
        如果child_pid=0，就goto __afl_fork_resume，这里其实对应的是子进程的逻辑
        
        进入到这一步说明是forkserver的逻辑，因为上方子进程已经goto __afl_fork_resume了。接着forkserver将子进程的child_pid传递给fuzzer，forkserver等待子进程执行完毕后，将执行完毕的信息存储在_afl_temp，后续通过管道发送给fuzzer，让fuzzer判断子进程是否执行成功，后续forkserver会一次持续的这个循环，不断的fork子进程执行，返回pid，判断状态
      - exit
  - 这里是__afl_fork_resume的label
    - 关闭管道，赋值v2 = shm_id;v1 = shm_ptr;
    - goto __afl_store;

complie_time_random的由来

插桩代码中存在着一段逻辑，这里会将outf的内容插入到每一个基本块

1 2	`fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32, R(MAP_SIZE));`

跟进trampoline_fmt_64，这里主要就是调用了__afl_maybe_log函数，然后存在这样一段汇编”movq $0x%08x, %%rcx\n”，其实这里就是上方的R(MAP_SIZE)，R是一个随机数的宏，定义是这样define R(x) (random() % (x))

assembly

static const u8* trampoline_fmt_64 =

  "\n"
  "/* --- AFL TRAMPOLINE (64-BIT) --- */\n"
  "\n"
  ".align 4\n"
  "\n"
  "leaq -(128+24)(%%rsp), %%rsp\n"
  "movq %%rdx,  0(%%rsp)\n"
  "movq %%rcx,  8(%%rsp)\n"
  "movq %%rax, 16(%%rsp)\n"
  "movq $0x%08x, %%rcx\n"
  "call __afl_maybe_log\n"
  "movq 16(%%rsp), %%rax\n"
  "movq  8(%%rsp), %%rcx\n"
  "movq  0(%%rsp), %%rdx\n"
  "leaq (128+24)(%%rsp), %%rsp\n"
  "\n"
  "/* --- END --- */\n"
  "\n";

ref

https://eternalsakura13.com/2020/08/23/afl/

https://dwfault.github.io/2019/12/20/afl-fuzz%E6%9A%B4%E5%8A%9B%E6%95%88%E7%8E%87%E6%B5%81%E5%AE%9E%E8%B7%B5/

http://rk700.github.io/2017/12/28/afl-internals/

Fuzz

#AFL #Fuzz #llvm

AFL源码分析

https://flyyy.top/2025/11/10/AFL源码分析/

Author

flyyy

Posted on

November 10, 2025

Licensed under

libprotobuf-mutator的简单学习 Previous

第八届“强网”拟态防御国际精英挑战赛 - WIN！致敬mt 复现 Next

AFL源码分析

AFL源码分析

目录

基本信息

Makefile

afl-gcc.c

find_as(u8* argv0)

edit_params(u32 argc, char** argv)

afl-as.c

main(int argc, char** argv)

edit_params(int argc, char** argv)

add_instrumentation(void)

简单的调试

afl-clang-fast.c

find_obj(u8* argv0)

edit_params(u32 argc, char** argv)

main(int argc, char** argv)

afl-llvm-pass.so.cc

afl-llvm-rt.o.c

afl-fuzz.c

参数解析

setup_signal_handlers(void)

check_asan_opts(void)

fix_up_sync(void)

save_cmdline(u32 argc, char** argv)

fix_up_banner(u8* name)

check_if_tty(void)

get_core_count(void)

bind_to_free_cpu(void)

check_crash_handling(void)

check_cpu_governor(void)

setup_post(void)

setup_shm(void)

init_count_class16(void)

setup_dirs_fds(void)

read_testcases(void)

add_to_queue(u8* fname, u32 len, u8 passed_det)

shuffle_ptrs(void** ptrs, u32 cnt)

load_auto(void)

maybe_add_auto(u8* mem, u32 len)

pivot_inputs(void)

load_extras(u8* dir)

find_timeout(void)

detect_file_args(char** argv)

setup_stdio_file(void)

check_binary(u8* fname)

perform_dry_run(char** argv)

calibrate_case(char** argv, struct queue_entry* q, u8* use_mem,u32 handicap, u8 from_queue)

init_forkserver(char** argv)

show_stats(void)

has_new_bits(u8* virgin_map)

write_to_testcase(void* mem, u32 len)

run_target(char** argv, u32 timeout)

classify_counts(u64* mem)

count_bytes(u8* mem)

update_bitmap_score(struct queue_entry* q)

minimize_bits(u8* dst, u8* src)

mark_as_variable(struct queue_entry* q)

cull_queue(void)

mark_as_redundant(struct queue_entry* q, u8 state)

show_init_stats

find_start_position(void)

write_stats_file(double bitmap_cvg, double stability, double eps)

save_auto(void)

fuzz执行的流程

sync_fuzzers(char** argv)

暂时搁置分析的函数

fuzz_one(char** argv)

样本的确定性变异/随机性变异/字符拼接

trim_case(char** argv, struct queue_entry* q, u8* in_buf)

calculate_score(struct queue_entry* q)

common_fuzz_stuff(char** argv, u8* out_buf, u32 len)

fuzzer与forkserver间的通信

一些重要的值

_afl_maybe_log(unsigned int cur_loc)

complie_time_random的由来

ref