Ghidra 的 Headless 模式可以在命令行下跑自动化分析,配合 GhidraScript 可以批量提取二进制文件的函数信息、字符串、导入表等,然后输出结构化报告。记录一下实际使用的方法。
Ghidra Headless 模式
Ghidra 提供了 analyzeHeadless 命令行工具,不需要打开 GUI 就能导入和分析二进制文件:
# 基本用法
$GHIDRA_HOME/support/analyzeHeadless \
/path/to/project_dir \ # 项目目录
MyProject \ # 项目名
-import /path/to/binary \ # 导入二进制
-postScript my_script.py \ # 分析后执行脚本
-scriptPath /path/to/scripts \ # 脚本搜索路径
-deleteProject # 分析完删除项目(批量场景用)
核心参数说明:
-import:导入二进制文件,自动完成反汇编和反编译-process:处理已导入的文件(不重新导入)-preScript/-postScript:分析前/后执行脚本-deleteProject:一次性分析场景用,避免项目堆积-analysisTimeoutPerFile:设置超时,避免大文件卡住
GhidraScript 编写
GhidraScript 可以用 Java 或 Python(Jython)写。Python 版更方便,直接用 Ghidra 提供的 flat API:
# extract_info.py - 提取函数信息
# @category Analysis
# @runtime Jython
import json
import os
def extract_functions():
'''提取所有函数的基本信息'''
fm = currentProgram.getFunctionManager()
functions = []
for func in fm.getFunctions(True):
functions.append({
"name": func.getName(),
"entry": str(func.getEntryPoint()),
"size": func.getBody().getNumAddresses(),
"param_count": func.getParameterCount(),
"is_thunk": func.isThunk(),
"calling_convention": func.getCallingConventionName(),
})
return functions
def extract_strings():
'''提取已识别的字符串'''
from ghidra.program.util import DefinedDataIterator
strings = []
for data in DefinedDataIterator.definedStrings(currentProgram):
val = data.getValue()
if val and len(str(val)) > 3: # 过滤太短的
strings.append({
"address": str(data.getAddress()),
"value": str(val),
"length": len(str(val)),
})
return strings
def extract_imports():
'''提取导入表'''
sm = currentProgram.getSymbolTable()
imports = []
for sym in sm.getExternalSymbols():
imports.append({
"name": sym.getName(),
"library": str(sym.getParentNamespace()),
"address": str(sym.getAddress()),
})
return imports
def extract_sections():
'''提取段信息'''
memory = currentProgram.getMemory()
sections = []
for block in memory.getBlocks():
sections.append({
"name": block.getName(),
"start": str(block.getStart()),
"size": block.getSize(),
"permissions": {
"read": block.isRead(),
"write": block.isWrite(),
"execute": block.isExecute(),
}
})
return sections
# 主逻辑
program_name = currentProgram.getName()
report = {
"file": program_name,
"language": str(currentProgram.getLanguageID()),
"compiler": str(currentProgram.getCompilerSpec().getCompilerSpecID()),
"image_base": str(currentProgram.getImageBase()),
"functions": extract_functions(),
"strings": extract_strings(),
"imports": extract_imports(),
"sections": extract_sections(),
}
# 输出 JSON 报告
output_dir = os.environ.get("REPORT_DIR", "/tmp/ghidra_reports")
if not os.path.exists(output_dir):
os.makedirs(output_dir)
output_path = os.path.join(output_dir, program_name + ".json")
with open(output_path, "w") as f:
json.dump(report, f, indent=2)
print("[+] Report saved to: " + output_path)
print("[+] Functions: %d, Strings: %d, Imports: %d" % (
len(report["functions"]),
len(report["strings"]),
len(report["imports"]),
))
批量分析脚本
用 Shell 脚本驱动 Headless 模式批量分析一个目录下的所有二进制文件:
#!/bin/bash
# batch_analyze.sh - 批量 Ghidra 分析
GHIDRA_HOME="/opt/ghidra"
BINARY_DIR="$1"
REPORT_DIR="${2:-/tmp/ghidra_reports}"
SCRIPT_PATH="$(dirname $0)/scripts"
PROJECT_DIR="/tmp/ghidra_projects"
mkdir -p "$REPORT_DIR" "$PROJECT_DIR"
export REPORT_DIR
for binary in "$BINARY_DIR"/*; do
[ -f "$binary" ] || continue
filename=$(basename "$binary")
echo "=== Analyzing: $filename ==="
"$GHIDRA_HOME/support/analyzeHeadless" \
"$PROJECT_DIR" \
"BatchProject" \
-import "$binary" \
-postScript extract_info.py \
-scriptPath "$SCRIPT_PATH" \
-deleteProject \
-analysisTimeoutPerFile 300 \
-log "$REPORT_DIR/${filename}.log" \
2>&1 | tail -5
echo ""
done
echo "=== All reports saved to: $REPORT_DIR ==="
# 生成汇总
python3 - <<'PYEOF'
import json, glob, os
report_dir = os.environ["REPORT_DIR"]
summary = []
for f in sorted(glob.glob(os.path.join(report_dir, "*.json"))):
with open(f) as fh:
data = json.load(fh)
summary.append({
"file": data["file"],
"functions": len(data["functions"]),
"strings": len(data["strings"]),
"imports": len(data["imports"]),
})
with open(os.path.join(report_dir, "summary.json"), "w") as fh:
json.dump(summary, fh, indent=2)
print("Summary: %d files analyzed" % len(summary))
PYEOF
进阶用法
提取反编译伪代码(需要更多时间,但信息量大):
# decompile_all.py
from ghidra.app.decompiler import DecompInterface
decomp = DecompInterface()
decomp.openProgram(currentProgram)
fm = currentProgram.getFunctionManager()
for func in fm.getFunctions(True):
if func.isThunk():
continue
result = decomp.decompileFunction(func, 30, monitor)
if result.depiledFunction():
code = result.getDecompiledFunction().getC()
# 保存或处理伪代码...
交叉引用分析——找到所有调用某个危险函数的位置:
# find_dangerous_calls.py
from ghidra.program.model.symbol import RefType
dangerous = ["strcpy", "sprintf", "gets", "strcat"]
fm = currentProgram.getFunctionManager()
for func_name in dangerous:
funcs = getGlobalFunctions(func_name)
for func in funcs:
refs = getReferencesTo(func.getEntryPoint())
callers = [r for r in refs if r.getReferenceType().isCall()]
if callers:
print("[!] %s called from %d locations:" % (func_name, len(callers)))
for ref in callers:
caller_func = fm.getFunctionContaining(ref.getFromAddress())
caller_name = caller_func.getName() if caller_func else "unknown"
print(" %s @ %s" % (caller_name, ref.getFromAddress()))
实际应用场景
- 恶意软件批量分析:拿到一批样本后快速提取函数数量、字符串、导入表,做初步分类
- 固件安全审计:提取固件中所有 ELF 文件,批量分析导入表查找危险函数
- 版本对比:同一软件的不同版本分别提取函数列表,diff 找变化
- 漏洞模式匹配:基于已知漏洞模式写脚本,批量扫描代码库
Headless 模式配合 CI/CD 也很方便,可以在代码提交时自动对编译产物做安全扫描。