/vld

Forked from https://github.com/derickr/vld.git, add json dump support

Primary LanguageCBSD 2-Clause "Simplified" LicenseBSD-2-Clause

VLD With JSON Support

此仓库fork自derickr/vld,在尽量不改变已有代码的前提下,添加了json输出支持。

原README

移步至README.rst

起因

为了对PHP源码做XSS检测,需要提取vld输出信息做分析。vld的原版信息输出格式不方便做提取,故自行为其添加json输出。

JSON输出

现有test.php内容如下

<!DOCTYPE html>
<html>
<head/>
<body>
<?php

function tainte($src)
{
    $dst = $src + 0;
    return "<div id='". $dst."'>content</div>";
}

$array = array();
$array[] = 'safe' ;
$array[] = $_GET['userData'] ;
$array[] = 'safe' ;
$tainted = $array[1] ;

$tainted = tainte($tainted);

echo $tainted;

?>
<h1>Hello World!</h1>
</body>
</html>

执行php -dvld.active=1 -dvld.execute=0 -dvld.dump_json=1 -dvld.format test.php得到以下输出。

[
{
     "class": null,
     "filename": "/home/dev/test.php",
     "function name": null,
     "number of ops": 20,
     "compiled vars": ["array", "tainted"],
     "ops": {
          "line": [1, 7, 13, 14, null, 15, null, null, null, 16, null, 17, null, 19, null, null, null, 21, 24, 27],
          "#": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
          "*": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null],
          "E": ["E", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null],
          "I": [">", null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null],
          "O": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, ">"],
          "op_code": [40, 0, 38, 147, 137, 80, 81, 147, 137, 147, 137, 81, 38, 61, 117, 60, 38, 40, 40, 62],
          "op": ["ECHO", "NOP", "ASSIGN", "ASSIGN_DIM", "OP_DATA", "FETCH_R", "FETCH_DIM_R", "ASSIGN_DIM", "OP_DATA", "ASSIGN_DIM", "OP_DATA", "FETCH_DIM_R", "ASSIGN", "INIT_FCALL", "SEND_VAR", "DO_FCALL", "ASSIGN", "ECHO", "ECHO", "RETURN"],
          "fetch": ["", "", "", "", "", "global", "", "", "", "", "", "", "", "", "", "", "", "", "", ""],
          "ext": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, 0, null, null, null, null],
          "return_type": [null, null, null, null, null, "IS_TMP_VAR", "IS_TMP_VAR", null, null, null, null, "IS_TMP_VAR", null, null, null, "IS_VAR", null, null, null, null],
          "return": [null, null, null, null, null, "~5", "~6", null, null, null, null, "~8", null, null, null, "$10", null, null, null, null],
          "op1_type": ["IS_CONST (40)", null, "IS_CV", "IS_CV", "IS_CONST (34)", "IS_CONST (33)", "IS_TMP_VAR", "IS_CV", "IS_TMP_VAR", "IS_CV", "IS_CONST (25)", "IS_CV", "IS_CV", "IS_UNUSED", "IS_CV", "IS_UNUSED", "IS_CV", "IS_CV", "IS_CONST (12)", "IS_CONST (11)"],
          "op1": ["%3C%21DOCTYPE+html%3E%0A%3Chtml%3E%0A%3Chead%2F%3E%0A%3Cbody%3E%0A", null, "!0", "!0", "safe", "_GET", "~5", "!0", "~6", "!0", "safe", "!0", "!1", null, "!1", null, "!1", "!1", "%3Ch1%3EHello+World%21%3C%2Fh1%3E%0A%3C%2Fbody%3E%0A%3C%2Fhtml%3E%0A", 1],
          "op2_type": [null, null, "IS_CONST (37)", "IS_UNUSED", "IS_UNUSED", null, "IS_CONST (32)", "IS_UNUSED", "IS_UNUSED", "IS_UNUSED", "IS_UNUSED", "IS_CONST (24)", "IS_TMP_VAR", "IS_CONST (21)", "IS_UNUSED", null, "IS_VAR", null, null, null],
          "op2": [null, null, "<array>", null, null, null, "userData", null, null, null, null, 1, "~8", "tainte", null, null, "$10", null, null, null],
          "ext_op_type": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null],
          "ext_op": [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null]
     },
     "path": [[0]],
     "branch": {
          "sline": [1],
          "eline": [27],
          "sop": [0],
          "eop": [19],
          "outs": [[-2]]
     }
},
{
     "class": null,
     "filename": "/home/dev/test.php",
     "function name": "tainte",
     "number of ops": 7,
     "compiled vars": ["src", "dst"],
     "ops": {
          "line": [7, 9, null, 10, null, null, 11],
          "#": [0, 1, 2, 3, 4, 5, 6],
          "*": [null, null, null, null, null, null, "*"],
          "E": ["E", null, null, null, null, null, null],
          "I": [">", null, null, null, null, null, null],
          "O": [null, null, null, null, null, ">", ">"],
          "op_code": [63, 1, 38, 8, 8, 62, 62],
          "op": ["RECV", "ADD", "ASSIGN", "CONCAT", "CONCAT", "RETURN", "RETURN"],
          "fetch": ["", "", "", "", "", "", ""],
          "ext": [null, null, null, null, null, null, null],
          "return_type": ["IS_CV", "IS_TMP_VAR", null, "IS_TMP_VAR", "IS_TMP_VAR", null, null],
          "return": ["!0", "~2", null, "~4", "~5", null, null],
          "op1_type": ["IS_UNUSED", "IS_CV", "IS_CV", "IS_CONST (9)", "IS_TMP_VAR", "IS_TMP_VAR", "IS_CONST (5)"],
          "op1": [null, "!0", "!1", "%3Cdiv+id%3D%27", "~4", "~5", null],
          "op2_type": [null, "IS_CONST (12)", "IS_TMP_VAR", "IS_CV", "IS_CONST (8)", null, null],
          "op2": [null, 0, "~2", "!1", "%27%3Econtent%3C%2Fdiv%3E", null, null],
          "ext_op_type": [null, null, null, null, null, null, null],
          "ext_op": [null, null, null, null, null, null, null]
     },
     "path": [[0]],
     "branch": {
          "sline": [7],
          "eline": [11],
          "sop": [0],
          "eop": [6],
          "outs": [[]]
     }
}]

相比原版纯文本输出,对编程调用更为友好。

脚本调用

现假设我们需要对一批(10000+)独立的php脚本进行分析,且工作目录结构如下图所示。

pwd
|--samples
   |--good
   |  |--g0001.php
   |  |--g0002.php
   |     ```
   |--bad
      |--b0001.php
      |--b0002.php
         ```

本仓库的utils子目录提供了一个用于快速生成同构路径信息的使用程序,自行编译并复制至系统PATH

$ gcc utils/chloc.c -o utils/chloc && cp utils/chloc /usr/local/bin/
# 复制文件至/usr/local/bin/需要root权限或sudo提取

然后,手动创建同结构的子目录(opcodes)用于存放vld的输出(如下图所示)。

pwd
|--samples
|--opcodes
   |--good
   |--bad

使用findawk批量构建vld调用命令,具体命令如下。

$ export vld="php -dvld.active=1 -dvld.execute=0 -dvld.dump_json=1 -dvld.format=1 -dvld.verbosity=3"
# 通过环境变量向awk传递vld命令
$ find . -wholename "./samples/good/*.php" -or -wholename "./samples/bad/*.php"\
  |awk '{cmd="chloc . ./opcodes .json "$1;
  cmd|getline dst;
  print vld, $1, ">",dst;
  close(cmd);
  print "echo -ne \r No.",NR," "}' vld="$VLD_COMMAND"|bash && \
  echo -e "\ndone"

以上命令可以在指定的子目录生成对应的json格式的vld分析报告,并附带进度显示。

TODO

  • 补上原版输出中的branch info内容。
  • 添加对class_table和function_table的支持。
  • 提供调用脚本demo(项目于2020/7月下旬开始了重构,原脚本作废)
  • 对大体积json输出做内存优化,防内存错误。
  • 等待新需求出现(欢迎提交issues)。