I am trying to track how many times theget operation is performed on the return object. However, my current CodeQL query is unable to correctly distinguish between different numbers of get operations. The functions below demonstrate my expected flow states: def test1(obj, key): # Source: Key, SourceKeyFlowState for _ in range(2): obj = obj.get(key) # if key has SourceKeyFlowState, obj.get(key) -> ObjectLayerOneFlowState # if key has SourceKeyFlowState and obj has ObjectLayerOneFlowState, obj.get(key) -> ObjectLayerTwoFlowState return obj # Sink: obj, ObjectLayerTwoFlowStatedef test2(obj, key): # Source: Key, SourceKeyFlowState for _ in range(1): obj = obj.get(key) # if key has SourceKeyFlowState, obj.get(key) -> ObjectLayerOneFlowState # if key has SourceKeyFlowState and obj has ObjectLayerOneFlowState, obj.get(key) -> ObjectLayerTwoFlowState return obj # Sink: obj, ObjectLayerOneFlowState
Expected Taint Flows: test1:SourceKeyFlowState →ObjectFlowState-MoreThanOnetest2:SourceKeyFlowState →ObjectFlowState-One
However, my current CodeQL query does not correctly distinguish between these cases and instead selects all possible four flows. import pythonimport semmle.python.ApiGraphsimport codeql.dataflow.DataFlowimport semmle.python.dataflow.new.DataFlowimport semmle.python.dataflow.new.TaintTrackingimport semmle.python.dataflow.new.internal.DataFlowPublicimport semmle.python.dataflow.new.internal.TaintTrackingPublicmodule TempTest {abstract class FlowState extends string { bindingset[this] FlowState() { any() }}class SourceKeyFlowState extends TempTest::FlowState { SourceKeyFlowState() { this = "SourceKeyFlowState" }}class Occurrence extends string { Occurrence() { this = "One" or this = "MoreThanOne" }}class ObjectFlowState extends TempTest::FlowState { Occurrence occur; ObjectFlowState() { this = "ObjectFlowState" + "-" + occur }}module TrackingGetOperationConfiguration implements DataFlow::StateConfigSig { class FlowState = TempTest::FlowState; predicate isSource(DataFlow::Node source, FlowState state) { exists (Function func, Parameter param | func.getArg(1) = param and source.asExpr() = param and func.getName().matches("test%") ) and ( state instanceof SourceKeyFlowState ) } predicate isSink(DataFlow::Node sink, FlowState state) { exists (Function func, Return ret | ret.getScope() = func.getEvaluatingScope() and ret.contains(sink.asExpr()) and func.getName().matches("test%") ) and ( state instanceof ObjectFlowState ) } predicate isAdditionalFlowStep(DataFlow::Node fromNode, FlowState fromState, DataFlow::Node toNode, FlowState toState) { exists( MethodCallNode callNode, Call call | callNode.asExpr() = call and callNode.getMethodName() = "get" and callNode.asExpr() = toNode.asExpr() and call.getArg(0) = fromNode.asExpr() ) and fromState instanceof SourceKeyFlowState and toState instanceof ObjectFlowState }}module TrackingGetOperationFlow = DataFlow::GlobalWithState<TrackingGetOperationConfiguration>;module Flow = TrackingGetOperationFlow; // For shortening the namepredicate run(Flow::PathNode source, Flow::PathNode sink, FlowState state) { Flow::flowPath(source, sink) and sink.getState() = state}}
I think the key problem here is that in theisAdditionalFlowStep predicate, I cannot select the path node beside thefromNode andtoNode, and use its flow state information to determine the flow state oftoNode. In my case, the taint propagation step should take into account both the flow state of the key (fromNode) and the flow state of the base object to correctly determine the flow state oftoNode. Could anyone provide suggestions on how to fix this issue? Any insights would be greatly appreciated! |