diff --git a/python/ql/lib/LegacyPointsTo.qll b/python/ql/lib/LegacyPointsTo.qll index 45f4a80e8d1c..ffea2d93b66c 100644 --- a/python/ql/lib/LegacyPointsTo.qll +++ b/python/ql/lib/LegacyPointsTo.qll @@ -430,3 +430,179 @@ private predicate exits_early(BasicBlock b) { f.getACall().getBasicBlock() = b ) } + +/** The metrics for a function that require points-to analysis */ +class FunctionMetricsWithPointsTo extends FunctionMetrics { + /** + * Gets the cyclomatic complexity of the function: + * The number of linearly independent paths through the source code. + * Computed as E - N + 2P, + * where + * E = the number of edges of the graph. + * N = the number of nodes of the graph. + * P = the number of connected components, which for a single function is 1. + */ + int getCyclomaticComplexity() { + exists(int e, int n | + n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and + e = + count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 | + b1 = this.getABasicBlock() and + b1.likelyReachable() and + b2 = this.getABasicBlock() and + b2.likelyReachable() and + b2 = b1.getASuccessor() and + not b1.unlikelySuccessor(b2) + ) + | + result = e - n + 2 + ) + } + + private BasicBlock getABasicBlock() { + result = this.getEntryNode().getBasicBlock() + or + exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) + } + + /** + * Dependency of Callables + * One callable "this" depends on another callable "result" + * if "this" makes some call to a method that may end up being "result". + */ + FunctionMetricsWithPointsTo getADependency() { + result != this and + not non_coupling_method(result) and + exists(Call call | call.getScope() = this | + exists(FunctionObject callee | callee.getFunction() = result | + call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) + ) + or + exists(Attribute a | call.getFunc() = a | + unique_root_method(result, a.getName()) + or + exists(Name n | a.getObject() = n and n.getId() = "self" | + result.getScope() = this.getScope() and + result.getName() = a.getName() + ) + ) + ) + } + + /** + * Afferent Coupling + * the number of callables that depend on this method. + * This is sometimes called the "fan-in" of a method. + */ + int getAfferentCoupling() { + result = count(FunctionMetricsWithPointsTo m | m.getADependency() = this) + } + + /** + * Efferent Coupling + * the number of methods that this method depends on + * This is sometimes called the "fan-out" of a method. + */ + int getEfferentCoupling() { + result = count(FunctionMetricsWithPointsTo m | this.getADependency() = m) + } + + override string getAQlClass() { result = "FunctionMetrics" } +} + +/** The metrics for a class that require points-to analysis */ +class ClassMetricsWithPointsTo extends ClassMetrics { + private predicate dependsOn(Class other) { + other != this and + ( + exists(FunctionMetricsWithPointsTo f1, FunctionMetricsWithPointsTo f2 | + f1.getADependency() = f2 + | + f1.getScope() = this and f2.getScope() = other + ) + or + exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | + c.getFunc().(ExprWithPointsTo).refersTo(cls) and + cls.getPyClass() = other + ) + ) + } + + /** + * Gets the afferent coupling of a class -- the number of classes that + * directly depend on it. + */ + int getAfferentCoupling() { result = count(ClassMetricsWithPointsTo t | t.dependsOn(this)) } + + /** + * Gets the efferent coupling of a class -- the number of classes that + * it directly depends on. + */ + int getEfferentCoupling() { result = count(ClassMetricsWithPointsTo t | this.dependsOn(t)) } + + /** Gets the depth of inheritance of the class. */ + int getInheritanceDepth() { + exists(ClassObject cls | cls.getPyClass() = this | result = max(classInheritanceDepth(cls))) + } + + override string getAQlClass() { result = "ClassMetrics" } +} + +private int classInheritanceDepth(ClassObject cls) { + /* Prevent run-away recursion in case of circular inheritance */ + not cls.getASuperType() = cls and + ( + exists(ClassObject sup | cls.getABaseType() = sup | result = classInheritanceDepth(sup) + 1) + or + not exists(cls.getABaseType()) and + ( + major_version() = 2 and result = 0 + or + major_version() > 2 and result = 1 + ) + ) +} + +/** The metrics for a module that require points-to analysis */ +class ModuleMetricsWithPointsTo extends ModuleMetrics { + /** + * Gets the afferent coupling of a module -- the number of modules that + * directly depend on it. + */ + int getAfferentCoupling() { result = count(ModuleMetricsWithPointsTo t | t.dependsOn(this)) } + + /** + * Gets the efferent coupling of a module -- the number of modules that + * it directly depends on. + */ + int getEfferentCoupling() { result = count(ModuleMetricsWithPointsTo t | this.dependsOn(t)) } + + private predicate dependsOn(Module other) { + other != this and + ( + exists(FunctionMetricsWithPointsTo f1, FunctionMetricsWithPointsTo f2 | + f1.getADependency() = f2 + | + f1.getEnclosingModule() = this and f2.getEnclosingModule() = other + ) + or + exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | + c.getFunc().(ExprWithPointsTo).refersTo(cls) and + cls.getPyClass().getEnclosingModule() = other + ) + ) + } + + override string getAQlClass() { result = "ModuleMetrics" } +} + +/** Helpers for coupling */ +predicate unique_root_method(Function func, string name) { + name = func.getName() and + not exists(FunctionObject f, FunctionObject other | + f.getFunction() = func and + other.getName() = name + | + not other.overrides(f) + ) +} diff --git a/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md b/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md new file mode 100644 index 000000000000..ed64206bd3b3 --- /dev/null +++ b/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- The `Metrics` library no longer contains code that depends on the points-to analysis. The removed functionality has instead been moved to the `LegacyPointsTo` module, to classes like `ModuleMetricsWithPointsTo` etc. If you depend on any of these classes, you must now remember to import `LegacyPointsTo`, and use the appropriate types in order to use the points-to-based functionality. diff --git a/python/ql/lib/python.qll b/python/ql/lib/python.qll index 54306408a333..d127e297dbb4 100644 --- a/python/ql/lib/python.qll +++ b/python/ql/lib/python.qll @@ -14,7 +14,7 @@ import semmle.python.Patterns import semmle.python.Keywords import semmle.python.Comprehensions import semmle.python.Flow -private import semmle.python.Metrics +import semmle.python.Metrics import semmle.python.Constants import semmle.python.Scope import semmle.python.Comment diff --git a/python/ql/lib/semmle/python/Metrics.qll b/python/ql/lib/semmle/python/Metrics.qll index 4959a06317ff..26560bad25c9 100644 --- a/python/ql/lib/semmle/python/Metrics.qll +++ b/python/ql/lib/semmle/python/Metrics.qll @@ -1,5 +1,5 @@ import python -private import LegacyPointsTo +private import semmle.python.SelfAttribute /** The metrics for a function */ class FunctionMetrics extends Function { @@ -18,76 +18,6 @@ class FunctionMetrics extends Function { /** Gets the number of lines of docstring in the function */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - /** - * Gets the cyclomatic complexity of the function: - * The number of linearly independent paths through the source code. - * Computed as E - N + 2P, - * where - * E = the number of edges of the graph. - * N = the number of nodes of the graph. - * P = the number of connected components, which for a single function is 1. - */ - int getCyclomaticComplexity() { - exists(int e, int n | - n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and - e = - count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 | - b1 = this.getABasicBlock() and - b1.likelyReachable() and - b2 = this.getABasicBlock() and - b2.likelyReachable() and - b2 = b1.getASuccessor() and - not b1.unlikelySuccessor(b2) - ) - | - result = e - n + 2 - ) - } - - private BasicBlock getABasicBlock() { - result = this.getEntryNode().getBasicBlock() - or - exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) - } - - /** - * Dependency of Callables - * One callable "this" depends on another callable "result" - * if "this" makes some call to a method that may end up being "result". - */ - FunctionMetrics getADependency() { - result != this and - not non_coupling_method(result) and - exists(Call call | call.getScope() = this | - exists(FunctionObject callee | callee.getFunction() = result | - call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) - ) - or - exists(Attribute a | call.getFunc() = a | - unique_root_method(result, a.getName()) - or - exists(Name n | a.getObject() = n and n.getId() = "self" | - result.getScope() = this.getScope() and - result.getName() = a.getName() - ) - ) - ) - } - - /** - * Afferent Coupling - * the number of callables that depend on this method. - * This is sometimes called the "fan-in" of a method. - */ - int getAfferentCoupling() { result = count(FunctionMetrics m | m.getADependency() = this) } - - /** - * Efferent Coupling - * the number of methods that this method depends on - * This is sometimes called the "fan-out" of a method. - */ - int getEfferentCoupling() { result = count(FunctionMetrics m | this.getADependency() = m) } - int getNumberOfParametersWithoutDefault() { result = this.getPositionalParameterCount() - @@ -116,36 +46,6 @@ class ClassMetrics extends Class { /** Gets the number of lines of docstrings in the class */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - private predicate dependsOn(Class other) { - other != this and - ( - exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 | - f1.getScope() = this and f2.getScope() = other - ) - or - exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | - c.getFunc().(ExprWithPointsTo).refersTo(cls) and - cls.getPyClass() = other - ) - ) - } - - /** - * Gets the afferent coupling of a class -- the number of classes that - * directly depend on it. - */ - int getAfferentCoupling() { result = count(ClassMetrics t | t.dependsOn(this)) } - - /** - * Gets the efferent coupling of a class -- the number of classes that - * it directly depends on. - */ - int getEfferentCoupling() { result = count(ClassMetrics t | this.dependsOn(t)) } - - int getInheritanceDepth() { - exists(ClassObject cls | cls.getPyClass() = this | result = max(classInheritanceDepth(cls))) - } - /* -------- CHIDAMBER AND KEMERER LACK OF COHESION IN METHODS ------------ */ /* * The aim of this metric is to try and determine whether a class @@ -245,21 +145,6 @@ class ClassMetrics extends Class { int getLackOfCohesionHM() { result = count(int line | this.unionSubgraph(_, line)) } } -private int classInheritanceDepth(ClassObject cls) { - /* Prevent run-away recursion in case of circular inheritance */ - not cls.getASuperType() = cls and - ( - exists(ClassObject sup | cls.getABaseType() = sup | result = classInheritanceDepth(sup) + 1) - or - not exists(cls.getABaseType()) and - ( - major_version() = 2 and result = 0 - or - major_version() > 2 and result = 1 - ) - ) -} - class ModuleMetrics extends Module { /** Gets the total number of lines (including blank lines) in the module */ int getNumberOfLines() { py_alllines(this, result) } @@ -272,43 +157,6 @@ class ModuleMetrics extends Module { /** Gets the number of lines of docstrings in the module */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - - /** - * Gets the afferent coupling of a class -- the number of classes that - * directly depend on it. - */ - int getAfferentCoupling() { result = count(ModuleMetrics t | t.dependsOn(this)) } - - /** - * Gets the efferent coupling of a class -- the number of classes that - * it directly depends on. - */ - int getEfferentCoupling() { result = count(ModuleMetrics t | this.dependsOn(t)) } - - private predicate dependsOn(Module other) { - other != this and - ( - exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 | - f1.getEnclosingModule() = this and f2.getEnclosingModule() = other - ) - or - exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | - c.getFunc().(ExprWithPointsTo).refersTo(cls) and - cls.getPyClass().getEnclosingModule() = other - ) - ) - } -} - -/** Helpers for coupling */ -predicate unique_root_method(Function func, string name) { - name = func.getName() and - not exists(FunctionObject f, FunctionObject other | - f.getFunction() = func and - other.getName() = name - | - not other.overrides(f) - ) } predicate non_coupling_method(Function f) { diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 9ed9e7d7a2b0..1620d1b9b26b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -30,7 +30,9 @@ module Builtins { "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError", "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError", // Added for compatibility - "exec" + "exec", + // Added by the `site` module (available by default unless `-S` is used) + "copyright", "credits", "exit", "quit" ] or // Built-in constants shared between Python 2 and 3 @@ -49,8 +51,8 @@ module Builtins { or // Python 2 only result in [ - "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr", - "unicode", "xrange" + "apply", "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", + "unichr", "unicode", "xrange" ] } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index b04b83be83ec..5fd59eb12184 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1921,3 +1921,138 @@ private module OutNodes { * `kind`. */ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) } + +/** + * Provides predicates for approximating type properties of user-defined classes + * based on their structure (method declarations, base classes). + * + * This module should _not_ be used in the call graph computation itself, as parts of it may depend + * on layers that themselves build upon the call graph (e.g. API graphs). + */ +module DuckTyping { + private import semmle.python.ApiGraphs + + /** + * Holds if `cls` or any of its resolved superclasses declares a method with the given `name`. + */ + predicate hasMethod(Class cls, string name) { + cls.getAMethod().getName() = name + or + hasMethod(getADirectSuperclass(cls), name) + } + + /** + * Holds if `cls` has a base class that cannot be resolved to a user-defined class + * and is not just `object`, meaning it may inherit methods from an unknown class. + */ + predicate hasUnresolvedBase(Class cls) { + exists(Expr base | base = cls.getABase() | + not base = classTracker(_).asExpr() and + not base = API::builtin("object").getAValueReachableFromSource().asExpr() + ) + } + + /** + * Holds if `cls` supports the container protocol, i.e. it declares + * `__contains__`, `__iter__`, or `__getitem__`. + */ + predicate isContainer(Class cls) { + hasMethod(cls, "__contains__") or + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterable protocol, i.e. it declares + * `__iter__` or `__getitem__`. + */ + predicate isIterable(Class cls) { + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterator protocol, i.e. it declares + * both `__iter__` and `__next__`. + */ + predicate isIterator(Class cls) { + hasMethod(cls, "__iter__") and + hasMethod(cls, "__next__") + } + + /** + * Holds if `cls` supports the context manager protocol, i.e. it declares + * both `__enter__` and `__exit__`. + */ + predicate isContextManager(Class cls) { + hasMethod(cls, "__enter__") and + hasMethod(cls, "__exit__") + } + + /** + * Holds if `cls` supports the descriptor protocol, i.e. it declares + * `__get__`, `__set__`, or `__delete__`. + */ + predicate isDescriptor(Class cls) { + hasMethod(cls, "__get__") or + hasMethod(cls, "__set__") or + hasMethod(cls, "__delete__") + } + + /** + * Holds if `cls` directly assigns to an attribute named `name` in its class body. + * This covers attribute assignments like `x = value`, but not method definitions. + */ + predicate declaresAttribute(Class cls, string name) { exists(getAnAttributeValue(cls, name)) } + + /** + * Gets the value expression assigned to attribute `name` directly in the class body of `cls`. + */ + Expr getAnAttributeValue(Class cls, string name) { + exists(Assign a | + a.getScope() = cls and + a.getATarget().(Name).getId() = name and + result = a.getValue() + ) + } + + /** + * Holds if `cls` is callable, i.e. it declares `__call__`. + */ + predicate isCallable(Class cls) { hasMethod(cls, "__call__") } + + /** + * Holds if `cls` supports the mapping protocol, i.e. it declares + * `__getitem__` and `__keys__`, or `__getitem__` and `__iter__`. + */ + predicate isMapping(Class cls) { + hasMethod(cls, "__getitem__") and + (hasMethod(cls, "keys") or hasMethod(cls, "__iter__")) + } + + /** + * Holds if `cls` is a new-style class. In Python 3, all classes are new-style. + * In Python 2, a class is new-style if it (transitively) inherits from `object`, + * or has a declared `__metaclass__`, or has an unresolved base class. + */ + predicate isNewStyle(Class cls) { + major_version() = 3 + or + major_version() = 2 and + ( + cls.getABase() = API::builtin("object").getAValueReachableFromSource().asExpr() + or + isNewStyle(getADirectSuperclass(cls)) + or + hasUnresolvedBase(cls) + or + exists(cls.getMetaClass()) + or + // Module-level __metaclass__ = type makes all classes in the module new-style + exists(Assign a | + a.getScope() = cls.getEnclosingModule() and + a.getATarget().(Name).getId() = "__metaclass__" + ) + ) + } +} diff --git a/python/ql/src/Classes/InconsistentMRO.ql b/python/ql/src/Classes/InconsistentMRO.ql index aa319d56114e..73f6bf9240b8 100644 --- a/python/ql/src/Classes/InconsistentMRO.ql +++ b/python/ql/src/Classes/InconsistentMRO.ql @@ -12,19 +12,24 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -ClassObject left_base(ClassObject type, ClassObject base) { - exists(int i | i > 0 and type.getBaseType(i) = base and result = type.getBaseType(i - 1)) +/** + * Gets the `i`th base class of `cls`, if it can be resolved to a user-defined class. + */ +Class getBaseType(Class cls, int i) { cls.getBase(i) = classTracker(result).asExpr() } + +Class left_base(Class type, Class base) { + exists(int i | i > 0 and getBaseType(type, i) = base and result = getBaseType(type, i - 1)) } -predicate invalid_mro(ClassObject t, ClassObject left, ClassObject right) { - t.isNewStyle() and +predicate invalid_mro(Class t, Class left, Class right) { + DuckTyping::isNewStyle(t) and left = left_base(t, right) and - left = right.getAnImproperSuperType() + left = getADirectSuperclass*(right) } -from ClassObject t, ClassObject left, ClassObject right +from Class t, Class left, Class right where invalid_mro(t, left, right) select t, "Construction of class " + t.getName() + diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.ql b/python/ql/src/Classes/PropertyInOldStyleClass.ql index 2fd7b1d14cf2..80a5ef06bbee 100644 --- a/python/ql/src/Classes/PropertyInOldStyleClass.ql +++ b/python/ql/src/Classes/PropertyInOldStyleClass.ql @@ -11,10 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from PropertyObject prop, ClassObject cls -where cls.declaredAttribute(_) = prop and not cls.failedInference() and not cls.isNewStyle() +from Function prop, Class cls +where + prop.getScope() = cls and + prop.getADecorator().(Name).getId() = "property" and + not DuckTyping::isNewStyle(cls) select prop, "Property " + prop.getName() + " will not work properly, as class " + cls.getName() + " is an old-style class." diff --git a/python/ql/src/Classes/ShouldBeContextManager.ql b/python/ql/src/Classes/ShouldBeContextManager.ql index 6aec0f0e0ab0..9a50d841a740 100644 --- a/python/ql/src/Classes/ShouldBeContextManager.ql +++ b/python/ql/src/Classes/ShouldBeContextManager.ql @@ -14,10 +14,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassValue c -where not c.isBuiltin() and not c.isContextManager() and exists(c.declaredAttribute("__del__")) +from Class c +where + not DuckTyping::isContextManager(c) and + DuckTyping::hasMethod(c, "__del__") select c, "Class " + c.getName() + " implements __del__ (presumably to release some resource). Consider making it a context manager." diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.ql b/python/ql/src/Classes/SlotsInOldStyleClass.ql index 2f91a88cf64e..70a7997113e6 100644 --- a/python/ql/src/Classes/SlotsInOldStyleClass.ql +++ b/python/ql/src/Classes/SlotsInOldStyleClass.ql @@ -12,9 +12,11 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassObject c -where not c.isNewStyle() and c.declaresAttribute("__slots__") and not c.failedInference() +from Class c +where + not DuckTyping::isNewStyle(c) and + DuckTyping::declaresAttribute(c, "__slots__") select c, "Using '__slots__' in an old style class just creates a class attribute called '__slots__'." diff --git a/python/ql/src/Classes/SuperInOldStyleClass.ql b/python/ql/src/Classes/SuperInOldStyleClass.ql index a6a272b1b3b7..bc6541052de0 100644 --- a/python/ql/src/Classes/SuperInOldStyleClass.ql +++ b/python/ql/src/Classes/SuperInOldStyleClass.ql @@ -11,14 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate uses_of_super_in_old_style_class(Call s) { - exists(Function f, ClassObject c | + exists(Function f, Class c | s.getScope() = f and - f.getScope() = c.getPyClass() and - not c.failedInference() and - not c.isNewStyle() and + f.getScope() = c and + not DuckTyping::isNewStyle(c) and s.getFunc().(Name).getId() = "super" ) } diff --git a/python/ql/src/Classes/UselessClass.ql b/python/ql/src/Classes/UselessClass.ql index 740c74bf96d9..229e42fd292f 100644 --- a/python/ql/src/Classes/UselessClass.ql +++ b/python/ql/src/Classes/UselessClass.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate fewer_than_two_public_methods(Class cls, int methods) { (methods = 0 or methods = 1) and @@ -25,13 +25,8 @@ predicate does_not_define_special_method(Class cls) { } predicate no_inheritance(Class c) { - not exists(ClassValue cls, ClassValue other | - cls.getScope() = c and - other != ClassValue::object() - | - other.getABaseType() = cls or - cls.getABaseType() = other - ) and + not exists(getADirectSubclass(c)) and + not exists(getADirectSuperclass(c)) and not exists(Expr base | base = c.getABase() | not base instanceof Name or base.(Name).getId() != "object" ) diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql index de8c38795675..a1b989020586 100644 --- a/python/ql/src/Expressions/ContainsNonContainer.ql +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -12,25 +12,21 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch -predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { - exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | +predicate rhs_in_expr(Expr rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs | op instanceof In or op instanceof NotIn ) } -from - ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin +from Compare cmp, DataFlow::LocalSourceNode origin, DataFlow::Node rhs, Class cls where - rhs_in_expr(non_seq, cmp) and - non_seq.pointsTo(_, v, origin) and - v.getClass() = cls and - not Types::failedInference(cls, _) and - not cls.hasAttribute("__contains__") and - not cls.hasAttribute("__iter__") and - not cls.hasAttribute("__getitem__") and - not cls = ClassValue::nonetype() and - not cls = Value::named("types.MappingProxyType") + origin = classInstanceTracker(cls) and + origin.flowsTo(rhs) and + not DuckTyping::isContainer(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and + rhs_in_expr(rhs.asExpr(), cmp) select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, "target", cls, cls.getName() diff --git a/python/ql/src/Expressions/HashedButNoHash.ql b/python/ql/src/Expressions/HashedButNoHash.ql index 705806bf3605..93edc7c9c6cc 100644 --- a/python/ql/src/Expressions/HashedButNoHash.ql +++ b/python/ql/src/Expressions/HashedButNoHash.ql @@ -12,76 +12,97 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.ApiGraphs -/* - * This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing. - * For sequences, the index must be an int, which are hashable, so we don't need to treat them specially. - * For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially. +/** + * Holds if `cls` explicitly sets `__hash__` to `None`, making instances unhashable. */ - -predicate numpy_array_type(ClassValue na) { - exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" | - na.getASuperType() = np.attr("ndarray") - ) +predicate setsHashToNone(Class cls) { + DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None } -predicate has_custom_getitem(Value v) { - v.getClass().lookup("__getitem__") instanceof PythonFunctionValue +/** + * Holds if `cls` is a user-defined class whose instances are unhashable. + * A new-style class without `__hash__` is unhashable, as is one that explicitly + * sets `__hash__ = None`. + */ +predicate isUnhashableUserClass(Class cls) { + DuckTyping::isNewStyle(cls) and + not DuckTyping::hasMethod(cls, "__hash__") and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) or - numpy_array_type(v.getClass()) + setsHashToNone(cls) } -predicate explicitly_hashed(ControlFlowNode f) { - exists(CallNode c, GlobalVariable hash | - c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash" +/** + * Gets the name of a builtin type whose instances are unhashable. + */ +string getUnhashableBuiltinName() { result = ["list", "set", "dict", "bytearray"] } + +/** + * Holds if `origin` is a local source node tracking an unhashable instance that + * flows to `node`, with `clsName` describing the class for the alert. + */ +predicate isUnhashable(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Class c | + isUnhashableUserClass(c) and + origin = classInstanceTracker(c) and + origin.flowsTo(node) and + clsName = c.getName() ) + or + clsName = getUnhashableBuiltinName() and + origin = API::builtin(clsName).getAnInstance().asSource() and + origin.flowsTo(node) +} + +predicate explicitly_hashed(DataFlow::Node node) { + node = API::builtin("hash").getACall().getArg(0) } -predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) { - is_unhashable(f, c, origin) and - exists(SubscriptNode sub | sub.getIndex() = f | - exists(Value custom_getitem | - sub.getObject().(ControlFlowNodeWithPointsTo).pointsTo(custom_getitem) and - not has_custom_getitem(custom_getitem) - ) +/** + * Holds if the subscript object in `sub[...]` is known to use hashing for indexing, + * i.e. it does not have a custom `__getitem__` that could accept unhashable indices. + */ +predicate subscriptUsesHashing(Subscript sub) { + DataFlow::exprNode(sub.getObject()) = + API::builtin("dict").getAnInstance().getAValueReachableFromSource() + or + exists(Class cls | + classInstanceTracker(cls) + .(DataFlow::LocalSourceNode) + .flowsTo(DataFlow::exprNode(sub.getObject())) and + not DuckTyping::hasMethod(cls, "__getitem__") ) } -predicate is_unhashable(ControlFlowNodeWithPointsTo f, ClassValue cls, ControlFlowNode origin) { - exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls | - not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle() - or - cls.lookup("__hash__") = Value::named("None") +predicate unhashable_subscript(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Subscript sub | + node = DataFlow::exprNode(sub.getIndex()) and + subscriptUsesHashing(sub) + | + isUnhashable(origin, node, clsName) ) } /** - * Holds if `f` is inside a `try` that catches `TypeError`. For example: - * - * try: - * ... f ... - * except TypeError: - * ... - * - * This predicate is used to eliminate false positive results. If `hash` - * is called on an unhashable object then a `TypeError` will be thrown. - * But this is not a bug if the code catches the `TypeError` and handles - * it. + * Holds if `e` is inside a `try` that catches `TypeError`. */ -predicate typeerror_is_caught(ControlFlowNode f) { +predicate typeerror_is_caught(Expr e) { exists(Try try | - try.getBody().contains(f.getNode()) and - try.getAHandler().getType().(ExprWithPointsTo).pointsTo(ClassValue::typeError()) + try.getBody().contains(e) and + try.getAHandler().getType() = API::builtin("TypeError").getAValueReachableFromSource().asExpr() ) } -from ControlFlowNode f, ClassValue c, ControlFlowNode origin +from DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName where - not typeerror_is_caught(f) and + not typeerror_is_caught(node.asExpr()) and ( - explicitly_hashed(f) and is_unhashable(f, c, origin) + explicitly_hashed(node) and isUnhashable(origin, node, clsName) or - unhashable_subscript(f, c, origin) + unhashable_subscript(origin, node, clsName) ) -select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName() +select node, "This $@ of $@ is unhashable.", origin, "instance", origin, clsName diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql index 2012f2d93618..f1068eca837c 100644 --- a/python/ql/src/Expressions/UseofApply.ql +++ b/python/ql/src/Expressions/UseofApply.ql @@ -10,9 +10,10 @@ */ import python -private import LegacyPointsTo -private import semmle.python.types.Builtins +private import semmle.python.ApiGraphs -from CallNode call, ControlFlowNodeWithPointsTo func -where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply")) +from CallNode call +where + major_version() = 2 and + call = API::builtin("apply").getACall().asCfgNode() select call, "Call to the obsolete builtin function 'apply'." diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.ql b/python/ql/src/Functions/OverlyComplexDelMethod.ql index 12776db1b60f..6b08a40fa290 100644 --- a/python/ql/src/Functions/OverlyComplexDelMethod.ql +++ b/python/ql/src/Functions/OverlyComplexDelMethod.ql @@ -18,6 +18,6 @@ from FunctionValue method where exists(ClassValue c | c.declaredAttribute("__del__") = method and - method.getScope().(FunctionMetrics).getCyclomaticComplexity() > 3 + method.getScope().(FunctionMetricsWithPointsTo).getCyclomaticComplexity() > 3 ) select method, "Overly complex '__del__' method." diff --git a/python/ql/src/Imports/DeprecatedModule.ql b/python/ql/src/Imports/DeprecatedModule.ql index 3d529943fb8f..62d772313343 100644 --- a/python/ql/src/Imports/DeprecatedModule.ql +++ b/python/ql/src/Imports/DeprecatedModule.ql @@ -11,7 +11,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs /** * Holds if the module `name` was deprecated in Python version `major`.`minor`, @@ -80,7 +80,7 @@ where name = imp.getName() and deprecated_module(name, instead, _, _) and not exists(Try try, ExceptStmt except | except = try.getAHandler() | - except.getType().(ExprWithPointsTo).pointsTo(ClassValue::importError()) and + except.getType() = API::builtin("ImportError").getAValueReachableFromSource().asExpr() and except.containsInScope(imp) ) select imp, deprecation_message(name) + replacement_message(name) diff --git a/python/ql/src/Metrics/CLinesOfCode.ql b/python/ql/src/Metrics/CLinesOfCode.ql index 66a107a521ff..274d45e84155 100644 --- a/python/ql/src/Metrics/CLinesOfCode.ql +++ b/python/ql/src/Metrics/CLinesOfCode.ql @@ -10,7 +10,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics f select f, f.getNumberOfLinesOfCode() as n order by n desc diff --git a/python/ql/src/Metrics/ClassAfferentCoupling.ql b/python/ql/src/Metrics/ClassAfferentCoupling.ql index 3faf714d09c9..69983f564ec6 100644 --- a/python/ql/src/Metrics/ClassAfferentCoupling.ql +++ b/python/ql/src/Metrics/ClassAfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ClassMetrics cls +from ClassMetricsWithPointsTo cls select cls, cls.getAfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/ClassEfferentCoupling.ql b/python/ql/src/Metrics/ClassEfferentCoupling.ql index b4c5a29696bd..08016a838891 100644 --- a/python/ql/src/Metrics/ClassEfferentCoupling.ql +++ b/python/ql/src/Metrics/ClassEfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ClassMetrics cls +from ClassMetricsWithPointsTo cls select cls, cls.getEfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/CommentRatio.ql b/python/ql/src/Metrics/CommentRatio.ql index 38394c1bf4fd..5886c9d09e73 100644 --- a/python/ql/src/Metrics/CommentRatio.ql +++ b/python/ql/src/Metrics/CommentRatio.ql @@ -12,7 +12,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics mm where mm.getNumberOfLines() > 0 diff --git a/python/ql/src/Metrics/CyclomaticComplexity.ql b/python/ql/src/Metrics/CyclomaticComplexity.ql index 3d9ca10dd99b..1d9874ac12c6 100644 --- a/python/ql/src/Metrics/CyclomaticComplexity.ql +++ b/python/ql/src/Metrics/CyclomaticComplexity.ql @@ -15,6 +15,6 @@ import python private import LegacyPointsTo -from FunctionMetrics func, int complexity +from FunctionMetricsWithPointsTo func, int complexity where complexity = func.getCyclomaticComplexity() select func, complexity order by complexity desc diff --git a/python/ql/src/Metrics/DocStringRatio.ql b/python/ql/src/Metrics/DocStringRatio.ql index 824ff5a35093..3c763c7742b1 100644 --- a/python/ql/src/Metrics/DocStringRatio.ql +++ b/python/ql/src/Metrics/DocStringRatio.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics mm where mm.getNumberOfLines() > 0 diff --git a/python/ql/src/Metrics/FLines.ql b/python/ql/src/Metrics/FLines.ql index dc5418711c9c..01ea5a04f77f 100644 --- a/python/ql/src/Metrics/FLines.ql +++ b/python/ql/src/Metrics/FLines.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLines() diff --git a/python/ql/src/Metrics/FLinesOfCode.ql b/python/ql/src/Metrics/FLinesOfCode.ql index 8159eb86c572..72131d3b74f8 100644 --- a/python/ql/src/Metrics/FLinesOfCode.ql +++ b/python/ql/src/Metrics/FLinesOfCode.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLinesOfCode() diff --git a/python/ql/src/Metrics/FLinesOfComments.ql b/python/ql/src/Metrics/FLinesOfComments.ql index ac4b295003a1..33523054f0ca 100644 --- a/python/ql/src/Metrics/FLinesOfComments.ql +++ b/python/ql/src/Metrics/FLinesOfComments.ql @@ -10,7 +10,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLinesOfComments() + m.getNumberOfLinesOfDocStrings() diff --git a/python/ql/src/Metrics/FunctionNumberOfCalls.ql b/python/ql/src/Metrics/FunctionNumberOfCalls.ql index 60171c790bfb..fb4dfe5a9d2e 100644 --- a/python/ql/src/Metrics/FunctionNumberOfCalls.ql +++ b/python/ql/src/Metrics/FunctionNumberOfCalls.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getNumberOfCalls() as n order by n desc diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepth.ql b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql index 94f7e355750e..ab40cc6068dd 100644 --- a/python/ql/src/Metrics/FunctionStatementNestingDepth.ql +++ b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getStatementNestingDepth() as n order by n desc diff --git a/python/ql/src/Metrics/History/HChurn.ql b/python/ql/src/Metrics/History/HChurn.ql index dba288436709..f9d90d5eb197 100644 --- a/python/ql/src/Metrics/History/HChurn.ql +++ b/python/ql/src/Metrics/History/HChurn.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HLinesAdded.ql b/python/ql/src/Metrics/History/HLinesAdded.ql index 51a0af62460b..8d13986988d9 100644 --- a/python/ql/src/Metrics/History/HLinesAdded.ql +++ b/python/ql/src/Metrics/History/HLinesAdded.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HLinesDeleted.ql b/python/ql/src/Metrics/History/HLinesDeleted.ql index 3016c9f128b2..a1e3ac0275cb 100644 --- a/python/ql/src/Metrics/History/HLinesDeleted.ql +++ b/python/ql/src/Metrics/History/HLinesDeleted.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HNumberOfAuthors.ql b/python/ql/src/Metrics/History/HNumberOfAuthors.ql index ff00773d98ce..1bc1dbb70b00 100644 --- a/python/ql/src/Metrics/History/HNumberOfAuthors.ql +++ b/python/ql/src/Metrics/History/HNumberOfAuthors.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where exists(m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Metrics/History/HNumberOfCoCommits.ql b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql index 4192ecf57acd..e998aaa66d4a 100644 --- a/python/ql/src/Metrics/History/HNumberOfCoCommits.ql +++ b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo int committedFiles(Commit commit) { result = count(commit.getAnAffectedFile()) } diff --git a/python/ql/src/Metrics/History/HNumberOfReCommits.ql b/python/ql/src/Metrics/History/HNumberOfReCommits.ql index f12c51840dc9..14330692ff93 100644 --- a/python/ql/src/Metrics/History/HNumberOfReCommits.ql +++ b/python/ql/src/Metrics/History/HNumberOfReCommits.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo predicate inRange(Commit first, Commit second) { first.getAnAffectedFile() = second.getAnAffectedFile() and diff --git a/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql index 9b787f529576..6469b393dcea 100644 --- a/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql +++ b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where exists(m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql index 501094907af0..9ec94df7e763 100644 --- a/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql +++ b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql index f06b050827cd..c0ef582c32b6 100644 --- a/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql +++ b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ClassMetrics cls select cls, cls.getLackOfCohesionCK() as n order by n desc diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql index 1456b76b286f..5cc77ecfb4f7 100644 --- a/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql +++ b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ClassMetrics cls select cls, cls.getLackOfCohesionHM() as n order by n desc diff --git a/python/ql/src/Metrics/ModuleAfferentCoupling.ql b/python/ql/src/Metrics/ModuleAfferentCoupling.ql index d61cc61e4daf..7203d12ae843 100644 --- a/python/ql/src/Metrics/ModuleAfferentCoupling.ql +++ b/python/ql/src/Metrics/ModuleAfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ModuleMetrics m +from ModuleMetricsWithPointsTo m select m, m.getAfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/ModuleEfferentCoupling.ql b/python/ql/src/Metrics/ModuleEfferentCoupling.ql index bfabf4b60128..a392a44e7519 100644 --- a/python/ql/src/Metrics/ModuleEfferentCoupling.ql +++ b/python/ql/src/Metrics/ModuleEfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ModuleMetrics m +from ModuleMetricsWithPointsTo m select m, m.getEfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql index e5164499331c..00a4c1bf0db0 100644 --- a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql +++ b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getNumberOfParametersWithoutDefault() as n order by n desc diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index e4791a410f7a..82529cbd6d0b 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -12,10 +12,10 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs -predicate originIsLocals(ControlFlowNodeWithPointsTo n) { - n.pointsTo(_, _, Value::named("locals").getACall()) +predicate originIsLocals(ControlFlowNode n) { + API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n } predicate modification_of_locals(ControlFlowNode f) { @@ -37,5 +37,5 @@ where // in module level scope `locals() == globals()` // see https://docs.python.org/3/library/functions.html#locals // FP report in https://github.com/github/codeql/issues/6674 - not a.getScope() instanceof ModuleScope + not a.getScope() instanceof Module select a, "Modification of the locals() dictionary will have no effect on the local variables." diff --git a/python/ql/src/Statements/NonIteratorInForLoop.ql b/python/ql/src/Statements/NonIteratorInForLoop.ql index f8e6e51b55ff..44dfa967bff9 100644 --- a/python/ql/src/Statements/NonIteratorInForLoop.ql +++ b/python/ql/src/Statements/NonIteratorInForLoop.ql @@ -12,16 +12,15 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin +from For loop, Expr iter, Class cls where - loop.getIter().getAFlowNode() = iter and - iter.pointsTo(_, v, origin) and - v.getClass() = t and - not t.isIterable() and - not t.failedInference(_) and - not v = Value::named("None") and - not t.isDescriptorType() -select loop, "This for-loop may attempt to iterate over a $@ of class $@.", origin, - "non-iterable instance", t, t.getName() + iter = loop.getIter() and + classInstanceTracker(cls).asExpr() = iter and + not DuckTyping::isIterable(cls) and + not DuckTyping::isDescriptor(cls) and + not (loop.isAsync() and DuckTyping::hasMethod(cls, "__aiter__")) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) +select loop, "This for-loop may attempt to iterate over a $@ of class $@.", iter, + "non-iterable instance", cls, cls.getName() diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index eb5cf9237d57..20bf053f6daa 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -23,18 +23,12 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -predicate points_to_context_manager(ControlFlowNodeWithPointsTo f, ClassValue cls) { - forex(Value v | f.pointsTo(v) | v.getClass() = cls) and - cls.isContextManager() -} - -from Call close, Try t, ClassValue cls +from Call close, Try t, Class cls where only_stmt_in_finally(t, close) and calls_close(close) and - exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() | - points_to_context_manager(f, cls) - ) + classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and + DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", cls, cls.getName() diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 902b6c4c9c16..7ac96030c04e 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate func_with_side_effects(Expr e) { exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() | @@ -24,11 +24,11 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - e.getAFlowNode() = Value::named("subprocess.call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_output").getACall() + e.getAFlowNode() = + API::moduleImport("subprocess") + .getMember(["call", "check_call", "check_output"]) + .getACall() + .asCfgNode() } predicate probable_side_effect(Expr e) { diff --git a/python/ql/src/Statements/TopLevelPrint.ql b/python/ql/src/Statements/TopLevelPrint.ql index 12095f7a484d..1896d41908e3 100644 --- a/python/ql/src/Statements/TopLevelPrint.ql +++ b/python/ql/src/Statements/TopLevelPrint.ql @@ -12,7 +12,6 @@ */ import python -private import LegacyPointsTo predicate main_eq_name(If i) { exists(Name n, StringLiteral m, Compare c | @@ -32,10 +31,19 @@ predicate is_print_stmt(Stmt s) { ) } +/** + * Holds if module `m` is likely used as a module (imported by another module), + * as opposed to being exclusively used as a script. + */ +predicate is_used_as_module(Module m) { + m.isPackageInit() + or + exists(ImportingStmt i | i.getAnImportedModuleName() = m.getName()) +} + from Stmt p where is_print_stmt(p) and - // TODO: Need to discuss how we would like to handle ModuleObject.getKind in the glorious future - exists(ModuleValue m | m.getScope() = p.getScope() and m.isUsedAsModule()) and + is_used_as_module(p.getScope()) and not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p) select p, "Print statement may execute during import." diff --git a/python/ql/src/Statements/UnnecessaryDelete.ql b/python/ql/src/Statements/UnnecessaryDelete.ql index c7b80ecc66ac..da239f814054 100644 --- a/python/ql/src/Statements/UnnecessaryDelete.ql +++ b/python/ql/src/Statements/UnnecessaryDelete.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate isInsideLoop(AstNode node) { node.getParentNode() instanceof While @@ -33,9 +33,9 @@ where not isInsideLoop(del) and // False positive: calling `sys.exc_info` within a function results in a // reference cycle, and an explicit call to `del` helps break this cycle. - not exists(FunctionValue ex | - ex = Value::named("sys.exc_info") and - ex.getACall().getScope() = f + not exists(API::CallNode call | + call = API::moduleImport("sys").getMember("exc_info").getACall() and + call.getScope() = f ) select del, "Unnecessary deletion of local variable $@ in function $@.", e, e.toString(), f, f.getName() diff --git a/python/ql/src/Statements/UnreachableCode.ql b/python/ql/src/Statements/UnreachableCode.ql index 55582ed2f061..200e073cff6b 100644 --- a/python/ql/src/Statements/UnreachableCode.ql +++ b/python/ql/src/Statements/UnreachableCode.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate typing_import(ImportingStmt is) { exists(Module m | @@ -34,11 +34,7 @@ predicate unique_yield(Stmt s) { /** Holds if `contextlib.suppress` may be used in the same scope as `s` */ predicate suppression_in_scope(Stmt s) { exists(With w | - w.getContextExpr() - .(Call) - .getFunc() - .(ExprWithPointsTo) - .pointsTo(Value::named("contextlib.suppress")) and + w.getContextExpr() = API::moduleImport("contextlib").getMember("suppress").getACall().asExpr() and w.getScope() = s.getScope() ) } diff --git a/python/ql/src/Statements/UnusedExceptionObject.ql b/python/ql/src/Statements/UnusedExceptionObject.ql index 9a6a3650b7e6..890cdc963aca 100644 --- a/python/ql/src/Statements/UnusedExceptionObject.ql +++ b/python/ql/src/Statements/UnusedExceptionObject.ql @@ -12,11 +12,49 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.Builtins +private import semmle.python.ApiGraphs -from Call call, ClassValue ex +/** + * Holds if `cls` is a user-defined exception class, i.e. it transitively + * extends one of the builtin exception base classes. + */ +predicate isUserDefinedExceptionClass(Class cls) { + cls.getABase() = + API::builtin(["BaseException", "Exception"]).getAValueReachableFromSource().asExpr() + or + isUserDefinedExceptionClass(getADirectSuperclass(cls)) +} + +/** + * Gets the name of a builtin exception class. + */ +string getBuiltinExceptionName() { + result = Builtins::getBuiltinName() and + ( + result.matches("%Error") or + result.matches("%Exception") or + result.matches("%Warning") or + result = + ["GeneratorExit", "KeyboardInterrupt", "StopIteration", "StopAsyncIteration", "SystemExit"] + ) +} + +/** + * Holds if `call` is an instantiation of an exception class. + */ +predicate isExceptionInstantiation(Call call) { + exists(Class cls | + classTracker(cls).asExpr() = call.getFunc() and + isUserDefinedExceptionClass(cls) + ) + or + call.getFunc() = API::builtin(getBuiltinExceptionName()).getAValueReachableFromSource().asExpr() +} + +from Call call where - call.getFunc().(ExprWithPointsTo).pointsTo(ex) and - ex.getASuperType() = ClassValue::exception() and + isExceptionInstantiation(call) and exists(ExprStmt s | s.getValue() = call) select call, "Instantiating an exception, but not raising it, has no effect." diff --git a/python/ql/src/Statements/UseOfExit.ql b/python/ql/src/Statements/UseOfExit.ql index 437ff93b5371..2310a839f67b 100644 --- a/python/ql/src/Statements/UseOfExit.ql +++ b/python/ql/src/Statements/UseOfExit.ql @@ -12,10 +12,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs from CallNode call, string name -where call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::siteQuitter(name)) +where + name = ["exit", "quit"] and + call = API::builtin(name).getACall().asCfgNode() select call, "The '" + name + "' site.Quitter object may not exist if the 'site' module is not loaded or is modified." diff --git a/python/ql/src/Summary/LinesOfCode.ql b/python/ql/src/Summary/LinesOfCode.ql index 04cb33a34515..24f1f38fbd70 100644 --- a/python/ql/src/Summary/LinesOfCode.ql +++ b/python/ql/src/Summary/LinesOfCode.ql @@ -10,6 +10,5 @@ */ import python -private import LegacyPointsTo select sum(ModuleMetrics m | | m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Summary/LinesOfUserCode.ql b/python/ql/src/Summary/LinesOfUserCode.ql index f920ebb51f41..331350dae4e8 100644 --- a/python/ql/src/Summary/LinesOfUserCode.ql +++ b/python/ql/src/Summary/LinesOfUserCode.ql @@ -14,7 +14,6 @@ import python import semmle.python.filters.GeneratedCode -private import LegacyPointsTo select sum(ModuleMetrics m | exists(m.getFile().getRelativePath()) and diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql index 87900c48fc58..18c832406675 100644 --- a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -12,7 +12,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs import Definition predicate is_increment(Stmt s) { @@ -41,23 +41,16 @@ predicate one_item_only(For f) { ) } -predicate points_to_call_to_range(ControlFlowNode f) { - /* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */ - exists(Value range | - range = Value::named("range") or - range = Value::named("xrange") - | - f = range.getACall() - ) +/** Holds if `node` is a call to `range`, `xrange`, or `list(range(...))`. */ +predicate call_to_range(DataFlow::Node node) { + node = API::builtin(["range", "xrange"]).getACall() or - /* In case points-to fails due to 'from six.moves import range' or similar. */ - exists(string range | f.getNode().(Call).getFunc().(Name).getId() = range | - range = "range" or range = "xrange" - ) + /* Handle 'from six.moves import range' or similar. */ + node = API::moduleImport("six").getMember("moves").getMember(["range", "xrange"]).getACall() or /* Handle list(range(...)) and list(list(range(...))) */ - f.(CallNode).(ControlFlowNodeWithPointsTo).pointsTo().getClass() = ClassValue::list() and - points_to_call_to_range(f.(CallNode).getArg(0)) + node = API::builtin("list").getACall() and + call_to_range(node.(DataFlow::CallCfgNode).getArg(0)) } /** Whether n is a use of a variable that is a not effectively a constant. */ @@ -102,8 +95,8 @@ from For f, Variable v, string msg where f.getTarget() = v.getAnAccess() and not f.getAStmt().contains(v.getAnAccess()) and - not points_to_call_to_range(f.getIter().getAFlowNode()) and - not points_to_call_to_range(get_comp_iterable(f)) and + not call_to_range(DataFlow::exprNode(f.getIter())) and + not call_to_range(DataFlow::exprNode(get_comp_iterable(f).getNode())) and not name_acceptable_for_unused_variable(v) and not f.getScope().getName() = "genexpr" and not empty_loop(f) and diff --git a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ebec9bb61e0b..93d12fc0a454 100644 --- a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | diff --git a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected index 9fb3e582cb7b..1e5be51fbaff 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected @@ -1 +1 @@ -| property_old_style.py:8:6:8:13 | Property piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | +| property_old_style.py:9:5:9:20 | Function piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | diff --git a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected index ccad85bd3846..14d30913b93a 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected @@ -1 +1 @@ -| newstyle_test.py:4:1:4:16 | class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | +| newstyle_test.py:4:1:4:16 | Class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | diff --git a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ca0a64f70e1b..93d12fc0a454 100644 --- a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1,2 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | -| inconsistent_mro.py:16:1:16:19 | class N | Construction of class N can fail due to invalid method resolution order(MRO) for bases $@ and $@. | file://:Compiled Code:0:0:0:0 | builtin-class object | object | inconsistent_mro.py:12:1:12:8 | class O | O | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | diff --git a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected index c59db3b2b657..396382d0b9f9 100644 --- a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected +++ b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected @@ -1,2 +1 @@ -| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | ControlFlowNode for MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | class MissingAiter | MissingAiter | -| statements_test.py:34:5:34:19 | For | This for-loop may attempt to iterate over a $@ of class $@. | statements_test.py:34:18:34:18 | ControlFlowNode for IntegerLiteral | non-iterable instance | file://:0:0:0:0 | builtin-class int | int | +| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | Class MissingAiter | MissingAiter | diff --git a/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql b/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql index a36375b7f3d7..5a35a35922c9 100644 --- a/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql +++ b/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql @@ -1,5 +1,5 @@ import python private import LegacyPointsTo -from FunctionMetrics func +from FunctionMetricsWithPointsTo func select func.toString(), func.getCyclomaticComplexity() diff --git a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected index 47c773804ae8..3cd6d92ff641 100644 --- a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected +++ b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected @@ -1,2 +1,2 @@ -| should_be_context_manager.py:3:1:3:22 | class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | -| should_be_context_manager.py:16:1:16:22 | class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:3:1:3:22 | Class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:16:1:16:22 | Class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. | diff --git a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected index cf6d78d0d36b..132852c73f1c 100644 --- a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected +++ b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected @@ -1,2 +1,2 @@ -| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | -| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | +| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | diff --git a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected index 9589547056f7..536a7281d2d6 100644 --- a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected +++ b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected @@ -1 +1 @@ -| expressions_test.py:42:20:42:25 | unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | file://:0:0:0:0 | builtin-class list | list | +| expressions_test.py:42:20:42:25 | ControlFlowNode for unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | list | diff --git a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected index 4c79685061f5..d11b87191ba6 100644 --- a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected +++ b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected @@ -1 +1 @@ -| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | ControlFlowNode for NonIterator() | non-iterable instance | test.py:45:1:45:26 | class NonIterator | NonIterator | +| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | NonIterator() | non-iterable instance | test.py:45:1:45:26 | Class NonIterator | NonIterator | diff --git a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected index d062717bbf25..50ff6cc1f914 100644 --- a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected +++ b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected @@ -1 +1 @@ -| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | class CM | CM | +| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM |