Can Phasar be used to identify control-dependent instructions
Mohannadcse opened this issue · 6 comments
I'm using phasar taint analysis to identify data-dependent and control-dependent instructions on argv
.
psr::IDEExtendedTaintAnalysis<1, false> TaintAnalysis(&IR, &T, &I, &P,
Config);
TaintAnalysis.setIFDSIDESolverConfig(SolverConfig);
std::stringstream SolverConfigStr;
SolverConfigStr << "Using solver config: "
<< TaintAnalysis.getIFDSIDESolverConfig() << '\n';
llvm::outs() << SolverConfigStr.str();
psr::IDESolver Solver(TaintAnalysis);
llvm::outs() << "Solving data-flow analysis ...\n";
Solver.solve();
llvm::outs() << "Data-flow analysis has been solved.\n";
auto SolverRes = Solver.getSolverResults();
auto AllResEntries = SolverRes.getAllResultEntries();
for (auto &Res : AllResEntries) {
const llvm::Instruction *Inst = Res.getRowKey();
auto ResAtInst = SolverRes.resultsAt(Inst);
for ([[maybe_unused]] const auto &Op : Inst->operands()) {
for (auto &[Fact, Value] : ResAtInst) {
llvm::Value *PotentialGepPointerOp = nullptr;
if (auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(Op)) {
PotentialGepPointerOp = Gep->getPointerOperand();
}
if (Op == Fact->base() || (PotentialGepPointerOp &&
PotentialGepPointerOp == Fact->base())) {
NeckCandidates.push_back(
const_cast<llvm::Instruction *>(Inst)); // NOLINT ;-)
}
}
}
The IDESolver
results are as follows:
***************************************************************
* Raw IDESolver results *
***************************************************************
============ Results for function '__psrCRuntimeGlobalCtorsModel' ============
N: ret void | ID: -1
--------------------
D: (<ZERO>; Offsets={ } #0) | V: Top
N: call void @__psrCRuntimeGlobalDtorsModel() | ID: -1
------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Top
============ Results for function 'main' ============
N: %3 = alloca i32, align 4, !psr.id !20 | ID: 124
--------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %4 = alloca i32, align 4, !psr.id !21 | ID: 125
--------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %5 = alloca i8**, align 8, !psr.id !22 | ID: 126
---------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %6 = alloca i32, align 4, !psr.id !23 | ID: 127
--------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %7 = alloca i32, align 4, !psr.id !24 | ID: 128
--------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %8 = alloca i32, align 4, !psr.id !25 | ID: 129
--------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %9 = alloca i32, align 4, !psr.id !26 | ID: 130
--------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %10 = alloca i32, align 4, !psr.id !27 | ID: 131
---------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %11 = alloca [1024 x i8], align 16, !psr.id !28 | ID: 132
------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: store i32 0, i32* %3, align 4, !psr.id !29 | ID: 133
-------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: store i32 %0, i32* %4, align 4, !psr.id !30 | ID: 134
--------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: store i8** %1, i8*** %5, align 8, !psr.id !34 | ID: 136
----------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: store i32 0, i32* %6, align 4, !dbg !39, !psr.id !41 | ID: 139
-----------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: store i32 0, i32* %7, align 4, !dbg !43, !psr.id !45 | ID: 141
-----------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: store i32 0, i32* %8, align 4, !dbg !47, !psr.id !49 | ID: 143
-----------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: store i32 0, i32* %9, align 4, !dbg !51, !psr.id !53 | ID: 145
-----------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: store i32 1, i32* %10, align 4, !dbg !56, !psr.id !58 | ID: 147
------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: br label %12, !dbg !59, !psr.id !60 | ID: 148
------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %13 = load i32, i32* %10, align 4, !dbg !61, !psr.id !63 | ID: 149
---------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: %14 = load i32, i32* %4, align 4, !dbg !64, !psr.id !65 | ID: 150
--------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %15 = icmp slt i32 %13, %14, !dbg !66, !psr.id !67 | ID: 151
---------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: br i1 %15, label %16, label %51, !dbg !68, !psr.id !69 | ID: 152
-------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %17 = load i8**, i8*** %5, align 8, !dbg !70, !psr.id !73 | ID: 153
----------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: %18 = load i32, i32* %10, align 4, !dbg !74, !psr.id !75 | ID: 154
---------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %19 = sext i32 %18 to i64, !dbg !70, !psr.id !76 | ID: 155
-------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %20 = getelementptr inbounds i8*, i8** %17, i64 %19, !dbg !70, !psr.id !77 | ID: 156
---------------------------------------------------------------------------------------
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %21 = load i8*, i8** %20, align 8, !dbg !70, !psr.id !78 | ID: 157
---------------------------------------------------------------------
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %22 = getelementptr inbounds i8, i8* %21, i64 0, !dbg !70, !psr.id !79 | ID: 158
-----------------------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %23 = load i8, i8* %22, align 1, !dbg !70, !psr.id !80 | ID: 159
-------------------------------------------------------------------
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %24 = sext i8 %23 to i32, !dbg !70, !psr.id !81 | ID: 160
------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %25 = icmp eq i32 %24, 45, !dbg !82, !psr.id !83 | ID: 161
-------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: br i1 %25, label %26, label %45, !dbg !84, !psr.id !85 | ID: 162
-------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
N: %27 = load i8**, i8*** %5, align 8, !dbg !86, !psr.id !88 | ID: 163
----------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %28 = load i32, i32* %10, align 4, !dbg !89, !psr.id !90 | ID: 164
---------------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %29 = sext i32 %28 to i64, !dbg !86, !psr.id !91 | ID: 165
-------------------------------------------------------------
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
how to interpret IDESolver
raw results to identify control-dependent instructions. For example, N: %17 = load i8**, i8*** %5, align 8, !dbg !70, !psr.id !73 | ID: 153
uses %5 = alloca i8**, align 8, !psr.id !22 | ID: 126
, which uses argv
, but how can I determine that %25 = icmp eq i32 %24, 45, !dbg !82, !psr.id !83 | ID: 161
is also depends on %17
?
While %88 = icmp ne i8* %87, null, !dbg !209, !psr.id !212 | ID: 231
isn't dependent on argv
, however, its dataflow facts look similar to %25
N: br label %86, !dbg !206, !psr.id !208 | ID: 229
--------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: %87 = getelementptr inbounds [1024 x i8], [1024 x i8]* %11, i64 0, i64 0, !dbg !209, !psr.id !211 | ID: 230
--------------------------------------------------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
N: %88 = icmp ne i8* %87, null, !dbg !209, !psr.id !212 | ID: 231
-----------------------------------------------------------------
D: (<ZERO>; Offsets={ } #0) | V: Bottom
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
N: br i1 %88, label %89, label %91, !dbg !213, !psr.id !214 | ID: 232
---------------------------------------------------------------------
D: (i8** %1 | ID: main.1; Offsets={ 0 } #0) | V: NotSanitized
D: (i8*** %5 | ID: 126; Offsets={ 0 } #0) | V: NotSanitized
D: (<ZERO>; Offsets={ } #0) | V: Bottom
Here is my program
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv) {
int count_chars = 0, count_lines = 0;
int total_chars = 0, total_lines = 0;
for (int i = 1; i < argc; i++) {
if (argv[i][0] == '-') { // Check for `-`
switch (argv[i][1]) {
case 'c':
count_chars = 1;
break;
case 'l':
count_lines = 1;
break;
default:
printf("wrong flag %s\n", argv[i]);
exit(0);
}
} else {
printf("Provide correct args");
}
}
char buffer[1024];
while (fgets(buffer, 1024, stdin)) {
if (count_chars)
total_chars += strlen(buffer);
if (count_lines)
total_lines++;
}
if (count_chars)
printf("Total chars = %d\n", total_chars);
if (count_lines)
printf("Total lines = %d\n", total_lines);
if (buffer) {
printf("dump\n");
}
return 0;
}
Stmt if (argv[i][0] == '-')
corresponds to %15
and if (buffer)
corresponds to %88
Hi Mohannad,
I am not sure whether I understand your question correctly. How do you mean your notion of "control dependent"? Are you referring to the problem "which values influence which controlflow decision"? The examples you referred to look like dataflow properties though.
For example, N: %17 = load i8**, i8*** %5, align 8, !dbg !70, !psr.id !73 | ID: 153 uses %5 = alloca i8**, align 8, !psr.id !22 | ID: 126
The extended taint analysis is optimized particularly for identifying flows that reach a sink, choosing to not generate facts at loads but instead following back def-use chains at sinks, with the idea of reducing the size of the ESG and also to simplify the implementation of the field-sensitivity. With that, unfortunately you don't see the dataflow facts at the individual statements.
There is another taint analysis, the IFDS taint analysis, in which this kind of optimization is not present and it should report the dataflow that you asked for. The same applies to the other flow you asked for in the same post. Downside of that analysis is the lack of field sensitivity, but at least in the example you provided it does not seem to matter that much.
Generally, the report of the IFDS/IDE solver lists for each program statement which dataflow facts hold before executing that statement. In the taint analysis, it should list the variables that are tainted there. It does not describe control dependency.
We have another analysis in phasar, the InstInteractionAnalysis, which describes interactions between instructions, but not just for one particular seed, but for all instructions. I am not sure to what extent this might be helpful for your purposes though.
Hope that helps.
Best regards
Martin
Let's close this,
Thanks a lot @MMory
@MMory I used IFDSSolver
as we discussed and dumped the facts. Now, focusing on
Stmt if (argv[i][0] == '-') corresponds to %15 and if (buffer) corresponds to %88
Here are the corresponding facts to the instructions%15
and%88
, how to figure out that%15
should be tainted while%88
shouldn't?
N: %15 = icmp slt i32 %13, %14, !dbg !66, !psr.id !67 | ID: 151
---------------------------------------------------------------
D: %27 = load i8**, i8*** %5, align 8, !dbg !86, !psr.id !88 | ID: 163 | V: TOP
D: %23 = load i8, i8* %22, align 1, !dbg !70, !psr.id !80 | ID: 159 | V: TOP
D: %31 = load i8*, i8** %30, align 8, !dbg !86, !psr.id !93 | ID: 167 | V: TOP
D: %24 = sext i8 %23 to i32, !dbg !70, !psr.id !81 | ID: 160 | V: TOP
D: br i1 %25, label %26, label %45, !dbg !84, !psr.id !85 | ID: 162 | V: TOP
D: %21 = load i8*, i8** %20, align 8, !dbg !70, !psr.id !78 | ID: 157 | V: TOP
D: @zero_value = constant i2 0, align 4 | ID: -1 | V: BOTTOM
D: %34 = sext i8 %33 to i32, !dbg !86, !psr.id !96 | ID: 170 | V: TOP
D: %33 = load i8, i8* %32, align 1, !dbg !86, !psr.id !95 | ID: 169 | V: TOP
D: %32 = getelementptr inbounds i8, i8* %31, i64 1, !dbg !86, !psr.id !94 | ID: 168 | V: TOP
D: %25 = icmp eq i32 %24, 45, !dbg !82, !psr.id !83 | ID: 161 | V: TOP
D: %30 = getelementptr inbounds i8*, i8** %27, i64 %29, !dbg !86, !psr.id !92 | ID: 166 | V: TOP
D: %22 = getelementptr inbounds i8, i8* %21, i64 0, !dbg !70, !psr.id !79 | ID: 158 | V: TOP
D: i8** %1 | ID: main.1 | V: TOP
D: %20 = getelementptr inbounds i8*, i8** %17, i64 %19, !dbg !70, !psr.id !77 | ID: 156 | V: TOP
D: switch i32 %34, label %37 [
i32 99, label %35
i32 108, label %36
], !dbg !97, !psr.id !98 | ID: 171 | V: TOP
D: %17 = load i8**, i8*** %5, align 8, !dbg !70, !psr.id !73 | ID: 153 | V: TOP
D: %5 = alloca i8**, align 8, !psr.id !22 | ID: 126 | V: TOP
N: %88 = icmp ne i8* %87, null, !dbg !209, !psr.id !212 | ID: 231
-----------------------------------------------------------------
D: %22 = getelementptr inbounds i8, i8* %21, i64 0, !dbg !70, !psr.id !79 | ID: 158 | V: TOP
D: %17 = load i8**, i8*** %5, align 8, !dbg !70, !psr.id !73 | ID: 153 | V: TOP
D: %27 = load i8**, i8*** %5, align 8, !dbg !86, !psr.id !88 | ID: 163 | V: TOP
D: %21 = load i8*, i8** %20, align 8, !dbg !70, !psr.id !78 | ID: 157 | V: TOP
D: %23 = load i8, i8* %22, align 1, !dbg !70, !psr.id !80 | ID: 159 | V: TOP
D: %31 = load i8*, i8** %30, align 8, !dbg !86, !psr.id !93 | ID: 167 | V: TOP
D: %24 = sext i8 %23 to i32, !dbg !70, !psr.id !81 | ID: 160 | V: TOP
D: br i1 %25, label %26, label %45, !dbg !84, !psr.id !85 | ID: 162 | V: TOP
D: i8** %1 | ID: main.1 | V: TOP
D: %20 = getelementptr inbounds i8*, i8** %17, i64 %19, !dbg !70, !psr.id !77 | ID: 156 | V: TOP
D: switch i32 %34, label %37 [
i32 99, label %35
i32 108, label %36
], !dbg !97, !psr.id !98 | ID: 171 | V: TOP
D: %5 = alloca i8**, align 8, !psr.id !22 | ID: 126 | V: TOP
D: %30 = getelementptr inbounds i8*, i8** %27, i64 %29, !dbg !86, !psr.id !92 | ID: 166 | V: TOP
D: %25 = icmp eq i32 %24, 45, !dbg !82, !psr.id !83 | ID: 161 | V: TOP
D: %32 = getelementptr inbounds i8, i8* %31, i64 1, !dbg !86, !psr.id !94 | ID: 168 | V: TOP
D: %33 = load i8, i8* %32, align 1, !dbg !86, !psr.id !95 | ID: 169 | V: TOP
D: %34 = sext i8 %33 to i32, !dbg !86, !psr.id !96 | ID: 170 | V: TOP
D: @zero_value = constant i2 0, align 4 | ID: -1 | V: BOTTOM