From 20fea3955ebac8b8e2de3f5613322e916f813811 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 18 Feb 2026 16:40:28 +0000 Subject: [PATCH 01/19] Python: Remove points-to from `Metrics.qll` Moves the classes/predicates that _actually_ depend on points-to to the `LegacyPointsTo` module, leaving behind a module that contains all of the metrics-related stuff (line counts, nesting depth, etc.) that don't need points-to to be evaluated. Consequently, `Metrics` is now no longer a private import in `python.qll`. --- python/ql/lib/LegacyPointsTo.qll | 176 ++++++++++++++++++++++++ python/ql/lib/python.qll | 2 +- python/ql/lib/semmle/python/Metrics.qll | 154 +-------------------- 3 files changed, 178 insertions(+), 154 deletions(-) diff --git a/python/ql/lib/LegacyPointsTo.qll b/python/ql/lib/LegacyPointsTo.qll index 45f4a80e8d1c..ffea2d93b66c 100644 --- a/python/ql/lib/LegacyPointsTo.qll +++ b/python/ql/lib/LegacyPointsTo.qll @@ -430,3 +430,179 @@ private predicate exits_early(BasicBlock b) { f.getACall().getBasicBlock() = b ) } + +/** The metrics for a function that require points-to analysis */ +class FunctionMetricsWithPointsTo extends FunctionMetrics { + /** + * Gets the cyclomatic complexity of the function: + * The number of linearly independent paths through the source code. + * Computed as E - N + 2P, + * where + * E = the number of edges of the graph. + * N = the number of nodes of the graph. + * P = the number of connected components, which for a single function is 1. + */ + int getCyclomaticComplexity() { + exists(int e, int n | + n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and + e = + count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 | + b1 = this.getABasicBlock() and + b1.likelyReachable() and + b2 = this.getABasicBlock() and + b2.likelyReachable() and + b2 = b1.getASuccessor() and + not b1.unlikelySuccessor(b2) + ) + | + result = e - n + 2 + ) + } + + private BasicBlock getABasicBlock() { + result = this.getEntryNode().getBasicBlock() + or + exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) + } + + /** + * Dependency of Callables + * One callable "this" depends on another callable "result" + * if "this" makes some call to a method that may end up being "result". + */ + FunctionMetricsWithPointsTo getADependency() { + result != this and + not non_coupling_method(result) and + exists(Call call | call.getScope() = this | + exists(FunctionObject callee | callee.getFunction() = result | + call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) + ) + or + exists(Attribute a | call.getFunc() = a | + unique_root_method(result, a.getName()) + or + exists(Name n | a.getObject() = n and n.getId() = "self" | + result.getScope() = this.getScope() and + result.getName() = a.getName() + ) + ) + ) + } + + /** + * Afferent Coupling + * the number of callables that depend on this method. + * This is sometimes called the "fan-in" of a method. + */ + int getAfferentCoupling() { + result = count(FunctionMetricsWithPointsTo m | m.getADependency() = this) + } + + /** + * Efferent Coupling + * the number of methods that this method depends on + * This is sometimes called the "fan-out" of a method. + */ + int getEfferentCoupling() { + result = count(FunctionMetricsWithPointsTo m | this.getADependency() = m) + } + + override string getAQlClass() { result = "FunctionMetrics" } +} + +/** The metrics for a class that require points-to analysis */ +class ClassMetricsWithPointsTo extends ClassMetrics { + private predicate dependsOn(Class other) { + other != this and + ( + exists(FunctionMetricsWithPointsTo f1, FunctionMetricsWithPointsTo f2 | + f1.getADependency() = f2 + | + f1.getScope() = this and f2.getScope() = other + ) + or + exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | + c.getFunc().(ExprWithPointsTo).refersTo(cls) and + cls.getPyClass() = other + ) + ) + } + + /** + * Gets the afferent coupling of a class -- the number of classes that + * directly depend on it. + */ + int getAfferentCoupling() { result = count(ClassMetricsWithPointsTo t | t.dependsOn(this)) } + + /** + * Gets the efferent coupling of a class -- the number of classes that + * it directly depends on. + */ + int getEfferentCoupling() { result = count(ClassMetricsWithPointsTo t | this.dependsOn(t)) } + + /** Gets the depth of inheritance of the class. */ + int getInheritanceDepth() { + exists(ClassObject cls | cls.getPyClass() = this | result = max(classInheritanceDepth(cls))) + } + + override string getAQlClass() { result = "ClassMetrics" } +} + +private int classInheritanceDepth(ClassObject cls) { + /* Prevent run-away recursion in case of circular inheritance */ + not cls.getASuperType() = cls and + ( + exists(ClassObject sup | cls.getABaseType() = sup | result = classInheritanceDepth(sup) + 1) + or + not exists(cls.getABaseType()) and + ( + major_version() = 2 and result = 0 + or + major_version() > 2 and result = 1 + ) + ) +} + +/** The metrics for a module that require points-to analysis */ +class ModuleMetricsWithPointsTo extends ModuleMetrics { + /** + * Gets the afferent coupling of a module -- the number of modules that + * directly depend on it. + */ + int getAfferentCoupling() { result = count(ModuleMetricsWithPointsTo t | t.dependsOn(this)) } + + /** + * Gets the efferent coupling of a module -- the number of modules that + * it directly depends on. + */ + int getEfferentCoupling() { result = count(ModuleMetricsWithPointsTo t | this.dependsOn(t)) } + + private predicate dependsOn(Module other) { + other != this and + ( + exists(FunctionMetricsWithPointsTo f1, FunctionMetricsWithPointsTo f2 | + f1.getADependency() = f2 + | + f1.getEnclosingModule() = this and f2.getEnclosingModule() = other + ) + or + exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | + c.getFunc().(ExprWithPointsTo).refersTo(cls) and + cls.getPyClass().getEnclosingModule() = other + ) + ) + } + + override string getAQlClass() { result = "ModuleMetrics" } +} + +/** Helpers for coupling */ +predicate unique_root_method(Function func, string name) { + name = func.getName() and + not exists(FunctionObject f, FunctionObject other | + f.getFunction() = func and + other.getName() = name + | + not other.overrides(f) + ) +} diff --git a/python/ql/lib/python.qll b/python/ql/lib/python.qll index 54306408a333..d127e297dbb4 100644 --- a/python/ql/lib/python.qll +++ b/python/ql/lib/python.qll @@ -14,7 +14,7 @@ import semmle.python.Patterns import semmle.python.Keywords import semmle.python.Comprehensions import semmle.python.Flow -private import semmle.python.Metrics +import semmle.python.Metrics import semmle.python.Constants import semmle.python.Scope import semmle.python.Comment diff --git a/python/ql/lib/semmle/python/Metrics.qll b/python/ql/lib/semmle/python/Metrics.qll index 4959a06317ff..26560bad25c9 100644 --- a/python/ql/lib/semmle/python/Metrics.qll +++ b/python/ql/lib/semmle/python/Metrics.qll @@ -1,5 +1,5 @@ import python -private import LegacyPointsTo +private import semmle.python.SelfAttribute /** The metrics for a function */ class FunctionMetrics extends Function { @@ -18,76 +18,6 @@ class FunctionMetrics extends Function { /** Gets the number of lines of docstring in the function */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - /** - * Gets the cyclomatic complexity of the function: - * The number of linearly independent paths through the source code. - * Computed as E - N + 2P, - * where - * E = the number of edges of the graph. - * N = the number of nodes of the graph. - * P = the number of connected components, which for a single function is 1. - */ - int getCyclomaticComplexity() { - exists(int e, int n | - n = count(BasicBlockWithPointsTo b | b = this.getABasicBlock() and b.likelyReachable()) and - e = - count(BasicBlockWithPointsTo b1, BasicBlockWithPointsTo b2 | - b1 = this.getABasicBlock() and - b1.likelyReachable() and - b2 = this.getABasicBlock() and - b2.likelyReachable() and - b2 = b1.getASuccessor() and - not b1.unlikelySuccessor(b2) - ) - | - result = e - n + 2 - ) - } - - private BasicBlock getABasicBlock() { - result = this.getEntryNode().getBasicBlock() - or - exists(BasicBlock mid | mid = this.getABasicBlock() and result = mid.getASuccessor()) - } - - /** - * Dependency of Callables - * One callable "this" depends on another callable "result" - * if "this" makes some call to a method that may end up being "result". - */ - FunctionMetrics getADependency() { - result != this and - not non_coupling_method(result) and - exists(Call call | call.getScope() = this | - exists(FunctionObject callee | callee.getFunction() = result | - call.getAFlowNode().getFunction().(ControlFlowNodeWithPointsTo).refersTo(callee) - ) - or - exists(Attribute a | call.getFunc() = a | - unique_root_method(result, a.getName()) - or - exists(Name n | a.getObject() = n and n.getId() = "self" | - result.getScope() = this.getScope() and - result.getName() = a.getName() - ) - ) - ) - } - - /** - * Afferent Coupling - * the number of callables that depend on this method. - * This is sometimes called the "fan-in" of a method. - */ - int getAfferentCoupling() { result = count(FunctionMetrics m | m.getADependency() = this) } - - /** - * Efferent Coupling - * the number of methods that this method depends on - * This is sometimes called the "fan-out" of a method. - */ - int getEfferentCoupling() { result = count(FunctionMetrics m | this.getADependency() = m) } - int getNumberOfParametersWithoutDefault() { result = this.getPositionalParameterCount() - @@ -116,36 +46,6 @@ class ClassMetrics extends Class { /** Gets the number of lines of docstrings in the class */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - private predicate dependsOn(Class other) { - other != this and - ( - exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 | - f1.getScope() = this and f2.getScope() = other - ) - or - exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | - c.getFunc().(ExprWithPointsTo).refersTo(cls) and - cls.getPyClass() = other - ) - ) - } - - /** - * Gets the afferent coupling of a class -- the number of classes that - * directly depend on it. - */ - int getAfferentCoupling() { result = count(ClassMetrics t | t.dependsOn(this)) } - - /** - * Gets the efferent coupling of a class -- the number of classes that - * it directly depends on. - */ - int getEfferentCoupling() { result = count(ClassMetrics t | this.dependsOn(t)) } - - int getInheritanceDepth() { - exists(ClassObject cls | cls.getPyClass() = this | result = max(classInheritanceDepth(cls))) - } - /* -------- CHIDAMBER AND KEMERER LACK OF COHESION IN METHODS ------------ */ /* * The aim of this metric is to try and determine whether a class @@ -245,21 +145,6 @@ class ClassMetrics extends Class { int getLackOfCohesionHM() { result = count(int line | this.unionSubgraph(_, line)) } } -private int classInheritanceDepth(ClassObject cls) { - /* Prevent run-away recursion in case of circular inheritance */ - not cls.getASuperType() = cls and - ( - exists(ClassObject sup | cls.getABaseType() = sup | result = classInheritanceDepth(sup) + 1) - or - not exists(cls.getABaseType()) and - ( - major_version() = 2 and result = 0 - or - major_version() > 2 and result = 1 - ) - ) -} - class ModuleMetrics extends Module { /** Gets the total number of lines (including blank lines) in the module */ int getNumberOfLines() { py_alllines(this, result) } @@ -272,43 +157,6 @@ class ModuleMetrics extends Module { /** Gets the number of lines of docstrings in the module */ int getNumberOfLinesOfDocStrings() { py_docstringlines(this, result) } - - /** - * Gets the afferent coupling of a class -- the number of classes that - * directly depend on it. - */ - int getAfferentCoupling() { result = count(ModuleMetrics t | t.dependsOn(this)) } - - /** - * Gets the efferent coupling of a class -- the number of classes that - * it directly depends on. - */ - int getEfferentCoupling() { result = count(ModuleMetrics t | this.dependsOn(t)) } - - private predicate dependsOn(Module other) { - other != this and - ( - exists(FunctionMetrics f1, FunctionMetrics f2 | f1.getADependency() = f2 | - f1.getEnclosingModule() = this and f2.getEnclosingModule() = other - ) - or - exists(Function f, Call c, ClassObject cls | c.getScope() = f and f.getScope() = this | - c.getFunc().(ExprWithPointsTo).refersTo(cls) and - cls.getPyClass().getEnclosingModule() = other - ) - ) - } -} - -/** Helpers for coupling */ -predicate unique_root_method(Function func, string name) { - name = func.getName() and - not exists(FunctionObject f, FunctionObject other | - f.getFunction() = func and - other.getName() = name - | - not other.overrides(f) - ) } predicate non_coupling_method(Function f) { From e8de8433f4a8493ffb50292f2d7f4ffd0cb7e258 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 18 Feb 2026 16:41:57 +0000 Subject: [PATCH 02/19] Python: Update all metrics-dependant queries The ones that no longer require points-to no longer import `LegacyPointsTo`. The ones that do use the specific `...MetricsWithPointsTo` classes that are applicable. --- python/ql/src/Functions/OverlyComplexDelMethod.ql | 2 +- python/ql/src/Metrics/CLinesOfCode.ql | 1 - python/ql/src/Metrics/ClassAfferentCoupling.ql | 2 +- python/ql/src/Metrics/ClassEfferentCoupling.ql | 2 +- python/ql/src/Metrics/CommentRatio.ql | 1 - python/ql/src/Metrics/CyclomaticComplexity.ql | 2 +- python/ql/src/Metrics/DocStringRatio.ql | 1 - python/ql/src/Metrics/FLines.ql | 1 - python/ql/src/Metrics/FLinesOfCode.ql | 1 - python/ql/src/Metrics/FLinesOfComments.ql | 1 - python/ql/src/Metrics/FunctionNumberOfCalls.ql | 1 - python/ql/src/Metrics/FunctionStatementNestingDepth.ql | 1 - python/ql/src/Metrics/History/HChurn.ql | 1 - python/ql/src/Metrics/History/HLinesAdded.ql | 1 - python/ql/src/Metrics/History/HLinesDeleted.ql | 1 - python/ql/src/Metrics/History/HNumberOfAuthors.ql | 1 - python/ql/src/Metrics/History/HNumberOfCoCommits.ql | 1 - python/ql/src/Metrics/History/HNumberOfReCommits.ql | 1 - python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql | 1 - python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql | 1 - python/ql/src/Metrics/LackofCohesionInMethodsCK.ql | 1 - python/ql/src/Metrics/LackofCohesionInMethodsHM.ql | 1 - python/ql/src/Metrics/ModuleAfferentCoupling.ql | 2 +- python/ql/src/Metrics/ModuleEfferentCoupling.ql | 2 +- python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql | 1 - python/ql/src/Summary/LinesOfCode.ql | 1 - python/ql/src/Summary/LinesOfUserCode.ql | 1 - python/ql/test/library-tests/ControlFlow/general/Cyclo.ql | 2 +- 28 files changed, 7 insertions(+), 28 deletions(-) diff --git a/python/ql/src/Functions/OverlyComplexDelMethod.ql b/python/ql/src/Functions/OverlyComplexDelMethod.ql index 12776db1b60f..6b08a40fa290 100644 --- a/python/ql/src/Functions/OverlyComplexDelMethod.ql +++ b/python/ql/src/Functions/OverlyComplexDelMethod.ql @@ -18,6 +18,6 @@ from FunctionValue method where exists(ClassValue c | c.declaredAttribute("__del__") = method and - method.getScope().(FunctionMetrics).getCyclomaticComplexity() > 3 + method.getScope().(FunctionMetricsWithPointsTo).getCyclomaticComplexity() > 3 ) select method, "Overly complex '__del__' method." diff --git a/python/ql/src/Metrics/CLinesOfCode.ql b/python/ql/src/Metrics/CLinesOfCode.ql index 66a107a521ff..274d45e84155 100644 --- a/python/ql/src/Metrics/CLinesOfCode.ql +++ b/python/ql/src/Metrics/CLinesOfCode.ql @@ -10,7 +10,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics f select f, f.getNumberOfLinesOfCode() as n order by n desc diff --git a/python/ql/src/Metrics/ClassAfferentCoupling.ql b/python/ql/src/Metrics/ClassAfferentCoupling.ql index 3faf714d09c9..69983f564ec6 100644 --- a/python/ql/src/Metrics/ClassAfferentCoupling.ql +++ b/python/ql/src/Metrics/ClassAfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ClassMetrics cls +from ClassMetricsWithPointsTo cls select cls, cls.getAfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/ClassEfferentCoupling.ql b/python/ql/src/Metrics/ClassEfferentCoupling.ql index b4c5a29696bd..08016a838891 100644 --- a/python/ql/src/Metrics/ClassEfferentCoupling.ql +++ b/python/ql/src/Metrics/ClassEfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ClassMetrics cls +from ClassMetricsWithPointsTo cls select cls, cls.getEfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/CommentRatio.ql b/python/ql/src/Metrics/CommentRatio.ql index 38394c1bf4fd..5886c9d09e73 100644 --- a/python/ql/src/Metrics/CommentRatio.ql +++ b/python/ql/src/Metrics/CommentRatio.ql @@ -12,7 +12,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics mm where mm.getNumberOfLines() > 0 diff --git a/python/ql/src/Metrics/CyclomaticComplexity.ql b/python/ql/src/Metrics/CyclomaticComplexity.ql index 3d9ca10dd99b..1d9874ac12c6 100644 --- a/python/ql/src/Metrics/CyclomaticComplexity.ql +++ b/python/ql/src/Metrics/CyclomaticComplexity.ql @@ -15,6 +15,6 @@ import python private import LegacyPointsTo -from FunctionMetrics func, int complexity +from FunctionMetricsWithPointsTo func, int complexity where complexity = func.getCyclomaticComplexity() select func, complexity order by complexity desc diff --git a/python/ql/src/Metrics/DocStringRatio.ql b/python/ql/src/Metrics/DocStringRatio.ql index 824ff5a35093..3c763c7742b1 100644 --- a/python/ql/src/Metrics/DocStringRatio.ql +++ b/python/ql/src/Metrics/DocStringRatio.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics mm where mm.getNumberOfLines() > 0 diff --git a/python/ql/src/Metrics/FLines.ql b/python/ql/src/Metrics/FLines.ql index dc5418711c9c..01ea5a04f77f 100644 --- a/python/ql/src/Metrics/FLines.ql +++ b/python/ql/src/Metrics/FLines.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLines() diff --git a/python/ql/src/Metrics/FLinesOfCode.ql b/python/ql/src/Metrics/FLinesOfCode.ql index 8159eb86c572..72131d3b74f8 100644 --- a/python/ql/src/Metrics/FLinesOfCode.ql +++ b/python/ql/src/Metrics/FLinesOfCode.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLinesOfCode() diff --git a/python/ql/src/Metrics/FLinesOfComments.ql b/python/ql/src/Metrics/FLinesOfComments.ql index ac4b295003a1..33523054f0ca 100644 --- a/python/ql/src/Metrics/FLinesOfComments.ql +++ b/python/ql/src/Metrics/FLinesOfComments.ql @@ -10,7 +10,6 @@ */ import python -private import LegacyPointsTo from ModuleMetrics m, int n where n = m.getNumberOfLinesOfComments() + m.getNumberOfLinesOfDocStrings() diff --git a/python/ql/src/Metrics/FunctionNumberOfCalls.ql b/python/ql/src/Metrics/FunctionNumberOfCalls.ql index 60171c790bfb..fb4dfe5a9d2e 100644 --- a/python/ql/src/Metrics/FunctionNumberOfCalls.ql +++ b/python/ql/src/Metrics/FunctionNumberOfCalls.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getNumberOfCalls() as n order by n desc diff --git a/python/ql/src/Metrics/FunctionStatementNestingDepth.ql b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql index 94f7e355750e..ab40cc6068dd 100644 --- a/python/ql/src/Metrics/FunctionStatementNestingDepth.ql +++ b/python/ql/src/Metrics/FunctionStatementNestingDepth.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getStatementNestingDepth() as n order by n desc diff --git a/python/ql/src/Metrics/History/HChurn.ql b/python/ql/src/Metrics/History/HChurn.ql index dba288436709..f9d90d5eb197 100644 --- a/python/ql/src/Metrics/History/HChurn.ql +++ b/python/ql/src/Metrics/History/HChurn.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HLinesAdded.ql b/python/ql/src/Metrics/History/HLinesAdded.ql index 51a0af62460b..8d13986988d9 100644 --- a/python/ql/src/Metrics/History/HLinesAdded.ql +++ b/python/ql/src/Metrics/History/HLinesAdded.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HLinesDeleted.ql b/python/ql/src/Metrics/History/HLinesDeleted.ql index 3016c9f128b2..a1e3ac0275cb 100644 --- a/python/ql/src/Metrics/History/HLinesDeleted.ql +++ b/python/ql/src/Metrics/History/HLinesDeleted.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m, int n where diff --git a/python/ql/src/Metrics/History/HNumberOfAuthors.ql b/python/ql/src/Metrics/History/HNumberOfAuthors.ql index ff00773d98ce..1bc1dbb70b00 100644 --- a/python/ql/src/Metrics/History/HNumberOfAuthors.ql +++ b/python/ql/src/Metrics/History/HNumberOfAuthors.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where exists(m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Metrics/History/HNumberOfCoCommits.ql b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql index 4192ecf57acd..e998aaa66d4a 100644 --- a/python/ql/src/Metrics/History/HNumberOfCoCommits.ql +++ b/python/ql/src/Metrics/History/HNumberOfCoCommits.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo int committedFiles(Commit commit) { result = count(commit.getAnAffectedFile()) } diff --git a/python/ql/src/Metrics/History/HNumberOfReCommits.ql b/python/ql/src/Metrics/History/HNumberOfReCommits.ql index f12c51840dc9..14330692ff93 100644 --- a/python/ql/src/Metrics/History/HNumberOfReCommits.ql +++ b/python/ql/src/Metrics/History/HNumberOfReCommits.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo predicate inRange(Commit first, Commit second) { first.getAnAffectedFile() = second.getAnAffectedFile() and diff --git a/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql index 9b787f529576..6469b393dcea 100644 --- a/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql +++ b/python/ql/src/Metrics/History/HNumberOfRecentAuthors.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where exists(m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql index 501094907af0..9ec94df7e763 100644 --- a/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql +++ b/python/ql/src/Metrics/History/HNumberOfRecentChangedFiles.ql @@ -10,7 +10,6 @@ import python import external.VCS -private import LegacyPointsTo from ModuleMetrics m where diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql index f06b050827cd..c0ef582c32b6 100644 --- a/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql +++ b/python/ql/src/Metrics/LackofCohesionInMethodsCK.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ClassMetrics cls select cls, cls.getLackOfCohesionCK() as n order by n desc diff --git a/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql index 1456b76b286f..5cc77ecfb4f7 100644 --- a/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql +++ b/python/ql/src/Metrics/LackofCohesionInMethodsHM.ql @@ -9,7 +9,6 @@ */ import python -private import LegacyPointsTo from ClassMetrics cls select cls, cls.getLackOfCohesionHM() as n order by n desc diff --git a/python/ql/src/Metrics/ModuleAfferentCoupling.ql b/python/ql/src/Metrics/ModuleAfferentCoupling.ql index d61cc61e4daf..7203d12ae843 100644 --- a/python/ql/src/Metrics/ModuleAfferentCoupling.ql +++ b/python/ql/src/Metrics/ModuleAfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ModuleMetrics m +from ModuleMetricsWithPointsTo m select m, m.getAfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/ModuleEfferentCoupling.ql b/python/ql/src/Metrics/ModuleEfferentCoupling.ql index bfabf4b60128..a392a44e7519 100644 --- a/python/ql/src/Metrics/ModuleEfferentCoupling.ql +++ b/python/ql/src/Metrics/ModuleEfferentCoupling.ql @@ -13,5 +13,5 @@ import python private import LegacyPointsTo -from ModuleMetrics m +from ModuleMetricsWithPointsTo m select m, m.getEfferentCoupling() as n order by n desc diff --git a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql index e5164499331c..00a4c1bf0db0 100644 --- a/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql +++ b/python/ql/src/Metrics/NumberOfParametersWithoutDefault.ql @@ -11,7 +11,6 @@ */ import python -private import LegacyPointsTo from FunctionMetrics func select func, func.getNumberOfParametersWithoutDefault() as n order by n desc diff --git a/python/ql/src/Summary/LinesOfCode.ql b/python/ql/src/Summary/LinesOfCode.ql index 04cb33a34515..24f1f38fbd70 100644 --- a/python/ql/src/Summary/LinesOfCode.ql +++ b/python/ql/src/Summary/LinesOfCode.ql @@ -10,6 +10,5 @@ */ import python -private import LegacyPointsTo select sum(ModuleMetrics m | | m.getNumberOfLinesOfCode()) diff --git a/python/ql/src/Summary/LinesOfUserCode.ql b/python/ql/src/Summary/LinesOfUserCode.ql index f920ebb51f41..331350dae4e8 100644 --- a/python/ql/src/Summary/LinesOfUserCode.ql +++ b/python/ql/src/Summary/LinesOfUserCode.ql @@ -14,7 +14,6 @@ import python import semmle.python.filters.GeneratedCode -private import LegacyPointsTo select sum(ModuleMetrics m | exists(m.getFile().getRelativePath()) and diff --git a/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql b/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql index a36375b7f3d7..5a35a35922c9 100644 --- a/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql +++ b/python/ql/test/library-tests/ControlFlow/general/Cyclo.ql @@ -1,5 +1,5 @@ import python private import LegacyPointsTo -from FunctionMetrics func +from FunctionMetricsWithPointsTo func select func.toString(), func.getCyclomaticComplexity() From 07099f17d670859846e59b9ecb83d948fe6f5754 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 18 Feb 2026 16:51:03 +0000 Subject: [PATCH 03/19] Python: Add change note --- .../change-notes/2026-02-18-remove-points-to-from-metrics.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md diff --git a/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md b/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md new file mode 100644 index 000000000000..ed64206bd3b3 --- /dev/null +++ b/python/ql/lib/change-notes/2026-02-18-remove-points-to-from-metrics.md @@ -0,0 +1,5 @@ +--- +category: minorAnalysis +--- + +- The `Metrics` library no longer contains code that depends on the points-to analysis. The removed functionality has instead been moved to the `LegacyPointsTo` module, to classes like `ModuleMetricsWithPointsTo` etc. If you depend on any of these classes, you must now remember to import `LegacyPointsTo`, and use the appropriate types in order to use the points-to-based functionality. From 8a67917ba56c69b2be93f080c91a92de15650204 Mon Sep 17 00:00:00 2001 From: Taus Date: Wed, 18 Feb 2026 17:12:32 +0000 Subject: [PATCH 04/19] Python: Use API graphs instead of points-to for simple built-ins Removes the use of points-to for accessing various built-ins from three of the queries. In order for this to work I had to extend the lists of known built-ins slightly. --- .../python/dataflow/new/internal/Builtins.qll | 8 +++--- python/ql/src/Expressions/UseofApply.ql | 9 ++++--- python/ql/src/Imports/DeprecatedModule.ql | 4 +-- .../ql/src/Statements/ModificationOfLocals.ql | 8 +++--- .../ql/src/Statements/SideEffectInAssert.ql | 12 ++++----- python/ql/src/Statements/UnnecessaryDelete.ql | 8 +++--- python/ql/src/Statements/UnreachableCode.ql | 8 ++---- python/ql/src/Statements/UseOfExit.ql | 6 +++-- .../SuspiciousUnusedLoopIterationVariable.ql | 27 +++++++------------ 9 files changed, 42 insertions(+), 48 deletions(-) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll index 9ed9e7d7a2b0..1620d1b9b26b 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/Builtins.qll @@ -30,7 +30,9 @@ module Builtins { "UnicodeDecodeError", "UnicodeEncodeError", "UnicodeError", "UnicodeTranslateError", "UnicodeWarning", "UserWarning", "ValueError", "Warning", "ZeroDivisionError", // Added for compatibility - "exec" + "exec", + // Added by the `site` module (available by default unless `-S` is used) + "copyright", "credits", "exit", "quit" ] or // Built-in constants shared between Python 2 and 3 @@ -49,8 +51,8 @@ module Builtins { or // Python 2 only result in [ - "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", "unichr", - "unicode", "xrange" + "apply", "basestring", "cmp", "execfile", "file", "long", "raw_input", "reduce", "reload", + "unichr", "unicode", "xrange" ] } diff --git a/python/ql/src/Expressions/UseofApply.ql b/python/ql/src/Expressions/UseofApply.ql index 2012f2d93618..f1068eca837c 100644 --- a/python/ql/src/Expressions/UseofApply.ql +++ b/python/ql/src/Expressions/UseofApply.ql @@ -10,9 +10,10 @@ */ import python -private import LegacyPointsTo -private import semmle.python.types.Builtins +private import semmle.python.ApiGraphs -from CallNode call, ControlFlowNodeWithPointsTo func -where major_version() = 2 and call.getFunction() = func and func.pointsTo(Value::named("apply")) +from CallNode call +where + major_version() = 2 and + call = API::builtin("apply").getACall().asCfgNode() select call, "Call to the obsolete builtin function 'apply'." diff --git a/python/ql/src/Imports/DeprecatedModule.ql b/python/ql/src/Imports/DeprecatedModule.ql index 3d529943fb8f..62d772313343 100644 --- a/python/ql/src/Imports/DeprecatedModule.ql +++ b/python/ql/src/Imports/DeprecatedModule.ql @@ -11,7 +11,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs /** * Holds if the module `name` was deprecated in Python version `major`.`minor`, @@ -80,7 +80,7 @@ where name = imp.getName() and deprecated_module(name, instead, _, _) and not exists(Try try, ExceptStmt except | except = try.getAHandler() | - except.getType().(ExprWithPointsTo).pointsTo(ClassValue::importError()) and + except.getType() = API::builtin("ImportError").getAValueReachableFromSource().asExpr() and except.containsInScope(imp) ) select imp, deprecation_message(name) + replacement_message(name) diff --git a/python/ql/src/Statements/ModificationOfLocals.ql b/python/ql/src/Statements/ModificationOfLocals.ql index e4791a410f7a..82529cbd6d0b 100644 --- a/python/ql/src/Statements/ModificationOfLocals.ql +++ b/python/ql/src/Statements/ModificationOfLocals.ql @@ -12,10 +12,10 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs -predicate originIsLocals(ControlFlowNodeWithPointsTo n) { - n.pointsTo(_, _, Value::named("locals").getACall()) +predicate originIsLocals(ControlFlowNode n) { + API::builtin("locals").getReturn().getAValueReachableFromSource().asCfgNode() = n } predicate modification_of_locals(ControlFlowNode f) { @@ -37,5 +37,5 @@ where // in module level scope `locals() == globals()` // see https://docs.python.org/3/library/functions.html#locals // FP report in https://github.com/github/codeql/issues/6674 - not a.getScope() instanceof ModuleScope + not a.getScope() instanceof Module select a, "Modification of the locals() dictionary will have no effect on the local variables." diff --git a/python/ql/src/Statements/SideEffectInAssert.ql b/python/ql/src/Statements/SideEffectInAssert.ql index 902b6c4c9c16..7ac96030c04e 100644 --- a/python/ql/src/Statements/SideEffectInAssert.ql +++ b/python/ql/src/Statements/SideEffectInAssert.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate func_with_side_effects(Expr e) { exists(string name | name = e.(Attribute).getName() or name = e.(Name).getId() | @@ -24,11 +24,11 @@ predicate func_with_side_effects(Expr e) { } predicate call_with_side_effect(Call e) { - e.getAFlowNode() = Value::named("subprocess.call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_call").getACall() - or - e.getAFlowNode() = Value::named("subprocess.check_output").getACall() + e.getAFlowNode() = + API::moduleImport("subprocess") + .getMember(["call", "check_call", "check_output"]) + .getACall() + .asCfgNode() } predicate probable_side_effect(Expr e) { diff --git a/python/ql/src/Statements/UnnecessaryDelete.ql b/python/ql/src/Statements/UnnecessaryDelete.ql index c7b80ecc66ac..da239f814054 100644 --- a/python/ql/src/Statements/UnnecessaryDelete.ql +++ b/python/ql/src/Statements/UnnecessaryDelete.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate isInsideLoop(AstNode node) { node.getParentNode() instanceof While @@ -33,9 +33,9 @@ where not isInsideLoop(del) and // False positive: calling `sys.exc_info` within a function results in a // reference cycle, and an explicit call to `del` helps break this cycle. - not exists(FunctionValue ex | - ex = Value::named("sys.exc_info") and - ex.getACall().getScope() = f + not exists(API::CallNode call | + call = API::moduleImport("sys").getMember("exc_info").getACall() and + call.getScope() = f ) select del, "Unnecessary deletion of local variable $@ in function $@.", e, e.toString(), f, f.getName() diff --git a/python/ql/src/Statements/UnreachableCode.ql b/python/ql/src/Statements/UnreachableCode.ql index 55582ed2f061..200e073cff6b 100644 --- a/python/ql/src/Statements/UnreachableCode.ql +++ b/python/ql/src/Statements/UnreachableCode.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs predicate typing_import(ImportingStmt is) { exists(Module m | @@ -34,11 +34,7 @@ predicate unique_yield(Stmt s) { /** Holds if `contextlib.suppress` may be used in the same scope as `s` */ predicate suppression_in_scope(Stmt s) { exists(With w | - w.getContextExpr() - .(Call) - .getFunc() - .(ExprWithPointsTo) - .pointsTo(Value::named("contextlib.suppress")) and + w.getContextExpr() = API::moduleImport("contextlib").getMember("suppress").getACall().asExpr() and w.getScope() = s.getScope() ) } diff --git a/python/ql/src/Statements/UseOfExit.ql b/python/ql/src/Statements/UseOfExit.ql index 437ff93b5371..2310a839f67b 100644 --- a/python/ql/src/Statements/UseOfExit.ql +++ b/python/ql/src/Statements/UseOfExit.ql @@ -12,10 +12,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs from CallNode call, string name -where call.getFunction().(ControlFlowNodeWithPointsTo).pointsTo(Value::siteQuitter(name)) +where + name = ["exit", "quit"] and + call = API::builtin(name).getACall().asCfgNode() select call, "The '" + name + "' site.Quitter object may not exist if the 'site' module is not loaded or is modified." diff --git a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql index 87900c48fc58..18c832406675 100644 --- a/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql +++ b/python/ql/src/Variables/SuspiciousUnusedLoopIterationVariable.ql @@ -12,7 +12,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.ApiGraphs import Definition predicate is_increment(Stmt s) { @@ -41,23 +41,16 @@ predicate one_item_only(For f) { ) } -predicate points_to_call_to_range(ControlFlowNode f) { - /* (x)range is a function in Py2 and a class in Py3, so we must treat it as a plain object */ - exists(Value range | - range = Value::named("range") or - range = Value::named("xrange") - | - f = range.getACall() - ) +/** Holds if `node` is a call to `range`, `xrange`, or `list(range(...))`. */ +predicate call_to_range(DataFlow::Node node) { + node = API::builtin(["range", "xrange"]).getACall() or - /* In case points-to fails due to 'from six.moves import range' or similar. */ - exists(string range | f.getNode().(Call).getFunc().(Name).getId() = range | - range = "range" or range = "xrange" - ) + /* Handle 'from six.moves import range' or similar. */ + node = API::moduleImport("six").getMember("moves").getMember(["range", "xrange"]).getACall() or /* Handle list(range(...)) and list(list(range(...))) */ - f.(CallNode).(ControlFlowNodeWithPointsTo).pointsTo().getClass() = ClassValue::list() and - points_to_call_to_range(f.(CallNode).getArg(0)) + node = API::builtin("list").getACall() and + call_to_range(node.(DataFlow::CallCfgNode).getArg(0)) } /** Whether n is a use of a variable that is a not effectively a constant. */ @@ -102,8 +95,8 @@ from For f, Variable v, string msg where f.getTarget() = v.getAnAccess() and not f.getAStmt().contains(v.getAnAccess()) and - not points_to_call_to_range(f.getIter().getAFlowNode()) and - not points_to_call_to_range(get_comp_iterable(f)) and + not call_to_range(DataFlow::exprNode(f.getIter())) and + not call_to_range(DataFlow::exprNode(get_comp_iterable(f).getNode())) and not name_acceptable_for_unused_variable(v) and not f.getScope().getName() = "genexpr" and not empty_loop(f) and From 017423778c1f04ac5d391e123d17c4ba85ee38db Mon Sep 17 00:00:00 2001 From: Taus Date: Thu, 19 Feb 2026 15:06:47 +0000 Subject: [PATCH 05/19] Python: Port `py/print-during-import` Uses a (perhaps) slightly coarser approximation of what modules are imported, but it's probably fine. --- python/ql/src/Statements/TopLevelPrint.ql | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/python/ql/src/Statements/TopLevelPrint.ql b/python/ql/src/Statements/TopLevelPrint.ql index 12095f7a484d..1896d41908e3 100644 --- a/python/ql/src/Statements/TopLevelPrint.ql +++ b/python/ql/src/Statements/TopLevelPrint.ql @@ -12,7 +12,6 @@ */ import python -private import LegacyPointsTo predicate main_eq_name(If i) { exists(Name n, StringLiteral m, Compare c | @@ -32,10 +31,19 @@ predicate is_print_stmt(Stmt s) { ) } +/** + * Holds if module `m` is likely used as a module (imported by another module), + * as opposed to being exclusively used as a script. + */ +predicate is_used_as_module(Module m) { + m.isPackageInit() + or + exists(ImportingStmt i | i.getAnImportedModuleName() = m.getName()) +} + from Stmt p where is_print_stmt(p) and - // TODO: Need to discuss how we would like to handle ModuleObject.getKind in the glorious future - exists(ModuleValue m | m.getScope() = p.getScope() and m.isUsedAsModule()) and + is_used_as_module(p.getScope()) and not exists(If i | main_eq_name(i) and i.getASubStatement().getASubStatement*() = p) select p, "Print statement may execute during import." From 40a8e22c2467a13dd427e9c8e399510d25d18636 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 13:37:25 +0000 Subject: [PATCH 06/19] Python: Introduce `DuckTyping` module This module (which for convenience currently resides inside `DataFlowDispatch`, but this may change later) contains convenience predicates for bridging the gap between the data-flow layer and the old points-to analysis. --- .../new/internal/DataFlowDispatch.qll | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index b04b83be83ec..7572ad05793a 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1921,3 +1921,95 @@ private module OutNodes { * `kind`. */ OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { call = result.getCall(kind) } + +/** + * Provides predicates for approximating type properties of user-defined classes + * based on their structure (method declarations, base classes). + * + * This module should _not_ be used in the call graph computation itself, as parts of it may depend + * on layers that themselves build upon the call graph (e.g. API graphs). + */ +module DuckTyping { + private import semmle.python.ApiGraphs + + /** + * Holds if `cls` or any of its resolved superclasses declares a method with the given `name`. + */ + predicate hasMethod(Class cls, string name) { + cls.getAMethod().getName() = name + or + hasMethod(getADirectSuperclass(cls), name) + } + + /** + * Holds if `cls` has a base class that cannot be resolved to a user-defined class + * and is not just `object`, meaning it may inherit methods from an unknown class. + */ + predicate hasUnresolvedBase(Class cls) { + exists(Expr base | base = cls.getABase() | + not base = classTracker(_).asExpr() and + not base = API::builtin("object").getAValueReachableFromSource().asExpr() + ) + } + + /** + * Holds if `cls` supports the container protocol, i.e. it declares + * `__contains__`, `__iter__`, or `__getitem__`. + */ + predicate isContainer(Class cls) { + hasMethod(cls, "__contains__") or + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterable protocol, i.e. it declares + * `__iter__` or `__getitem__`. + */ + predicate isIterable(Class cls) { + hasMethod(cls, "__iter__") or + hasMethod(cls, "__getitem__") + } + + /** + * Holds if `cls` supports the iterator protocol, i.e. it declares + * both `__iter__` and `__next__`. + */ + predicate isIterator(Class cls) { + hasMethod(cls, "__iter__") and + hasMethod(cls, "__next__") + } + + /** + * Holds if `cls` supports the context manager protocol, i.e. it declares + * both `__enter__` and `__exit__`. + */ + predicate isContextManager(Class cls) { + hasMethod(cls, "__enter__") and + hasMethod(cls, "__exit__") + } + + /** + * Holds if `cls` supports the descriptor protocol, i.e. it declares + * `__get__`, `__set__`, or `__delete__`. + */ + predicate isDescriptor(Class cls) { + hasMethod(cls, "__get__") or + hasMethod(cls, "__set__") or + hasMethod(cls, "__delete__") + } + + /** + * Holds if `cls` is callable, i.e. it declares `__call__`. + */ + predicate isCallable(Class cls) { hasMethod(cls, "__call__") } + + /** + * Holds if `cls` supports the mapping protocol, i.e. it declares + * `__getitem__` and `__keys__`, or `__getitem__` and `__iter__`. + */ + predicate isMapping(Class cls) { + hasMethod(cls, "__getitem__") and + (hasMethod(cls, "keys") or hasMethod(cls, "__iter__")) + } +} From 3b686c037a45a3618b3dede5838dbffddc7a23dc Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:52:14 +0000 Subject: [PATCH 07/19] Python: Port ContainsNonContainer.ql Uses the new `DuckTyping` module to handle recognising whether a class is a container or not. Only trivial test changes (one version uses "class", the other "Class"). Note that the ported query has no understanding of built-in classes. At some point we'll likely want to replace `hasUnresolvedBase` (which will hold for any class that extends a built-in) with something that's aware of the built-in classes. --- .../src/Expressions/ContainsNonContainer.ql | 24 ++++++++----------- .../general/ContainsNonContainer.expected | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/python/ql/src/Expressions/ContainsNonContainer.ql b/python/ql/src/Expressions/ContainsNonContainer.ql index de8c38795675..a1b989020586 100644 --- a/python/ql/src/Expressions/ContainsNonContainer.ql +++ b/python/ql/src/Expressions/ContainsNonContainer.ql @@ -12,25 +12,21 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch -predicate rhs_in_expr(ControlFlowNode rhs, Compare cmp) { - exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs.getNode() | +predicate rhs_in_expr(Expr rhs, Compare cmp) { + exists(Cmpop op, int i | cmp.getOp(i) = op and cmp.getComparator(i) = rhs | op instanceof In or op instanceof NotIn ) } -from - ControlFlowNodeWithPointsTo non_seq, Compare cmp, Value v, ClassValue cls, ControlFlowNode origin +from Compare cmp, DataFlow::LocalSourceNode origin, DataFlow::Node rhs, Class cls where - rhs_in_expr(non_seq, cmp) and - non_seq.pointsTo(_, v, origin) and - v.getClass() = cls and - not Types::failedInference(cls, _) and - not cls.hasAttribute("__contains__") and - not cls.hasAttribute("__iter__") and - not cls.hasAttribute("__getitem__") and - not cls = ClassValue::nonetype() and - not cls = Value::named("types.MappingProxyType") + origin = classInstanceTracker(cls) and + origin.flowsTo(rhs) and + not DuckTyping::isContainer(cls) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) and + rhs_in_expr(rhs.asExpr(), cmp) select cmp, "This test may raise an Exception as the $@ may be of non-container class $@.", origin, "target", cls, cls.getName() diff --git a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected index cf6d78d0d36b..132852c73f1c 100644 --- a/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected +++ b/python/ql/test/query-tests/Expressions/general/ContainsNonContainer.expected @@ -1,2 +1,2 @@ -| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | -| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | class XIter | XIter | +| expressions_test.py:89:8:89:15 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | +| expressions_test.py:91:8:91:19 | Compare | This test may raise an Exception as the $@ may be of non-container class $@. | expressions_test.py:88:11:88:17 | ControlFlowNode for XIter() | target | expressions_test.py:77:1:77:20 | Class XIter | XIter | From 9717e3fa710c87aaedfcd8cdefac64578a570ca7 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:53:39 +0000 Subject: [PATCH 08/19] Python: Port NonIteratorInForLoop.ql Same comment as for the preceding commit. We lose one test result due to the fact that we don't know what to do about `for ... in 1` (because `1` is an instance of a built-in). I'm going to defer addressing this until we get some modelling of built-in types. --- .../ql/src/Statements/NonIteratorInForLoop.ql | 21 +++++++++---------- .../iter/NonIteratorInForLoop.expected | 3 +-- .../general/NonIteratorInForLoop.expected | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/python/ql/src/Statements/NonIteratorInForLoop.ql b/python/ql/src/Statements/NonIteratorInForLoop.ql index f8e6e51b55ff..44dfa967bff9 100644 --- a/python/ql/src/Statements/NonIteratorInForLoop.ql +++ b/python/ql/src/Statements/NonIteratorInForLoop.ql @@ -12,16 +12,15 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from For loop, ControlFlowNodeWithPointsTo iter, Value v, ClassValue t, ControlFlowNode origin +from For loop, Expr iter, Class cls where - loop.getIter().getAFlowNode() = iter and - iter.pointsTo(_, v, origin) and - v.getClass() = t and - not t.isIterable() and - not t.failedInference(_) and - not v = Value::named("None") and - not t.isDescriptorType() -select loop, "This for-loop may attempt to iterate over a $@ of class $@.", origin, - "non-iterable instance", t, t.getName() + iter = loop.getIter() and + classInstanceTracker(cls).asExpr() = iter and + not DuckTyping::isIterable(cls) and + not DuckTyping::isDescriptor(cls) and + not (loop.isAsync() and DuckTyping::hasMethod(cls, "__aiter__")) and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) +select loop, "This for-loop may attempt to iterate over a $@ of class $@.", iter, + "non-iterable instance", cls, cls.getName() diff --git a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected index c59db3b2b657..396382d0b9f9 100644 --- a/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected +++ b/python/ql/test/3/query-tests/Statements/iter/NonIteratorInForLoop.expected @@ -1,2 +1 @@ -| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | ControlFlowNode for MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | class MissingAiter | MissingAiter | -| statements_test.py:34:5:34:19 | For | This for-loop may attempt to iterate over a $@ of class $@. | statements_test.py:34:18:34:18 | ControlFlowNode for IntegerLiteral | non-iterable instance | file://:0:0:0:0 | builtin-class int | int | +| async_iterator.py:26:11:26:34 | For | This for-loop may attempt to iterate over a $@ of class $@. | async_iterator.py:26:20:26:33 | MissingAiter() | non-iterable instance | async_iterator.py:13:1:13:19 | Class MissingAiter | MissingAiter | diff --git a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected index 4c79685061f5..d11b87191ba6 100644 --- a/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected +++ b/python/ql/test/query-tests/Statements/general/NonIteratorInForLoop.expected @@ -1 +1 @@ -| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | ControlFlowNode for NonIterator() | non-iterable instance | test.py:45:1:45:26 | class NonIterator | NonIterator | +| test.py:50:1:50:23 | For | This for-loop may attempt to iterate over a $@ of class $@. | test.py:50:10:50:22 | NonIterator() | non-iterable instance | test.py:45:1:45:26 | Class NonIterator | NonIterator | From 69f42c4b4bcbba927acd9ae7dcf8bef0cb306141 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 14:54:19 +0000 Subject: [PATCH 09/19] Python: Port ShouldUseWithStatement.ql Only trivial test changes. --- python/ql/src/Statements/ShouldUseWithStatement.ql | 14 ++++---------- .../general/ShouldUseWithStatement.expected | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/python/ql/src/Statements/ShouldUseWithStatement.ql b/python/ql/src/Statements/ShouldUseWithStatement.ql index eb5cf9237d57..20bf053f6daa 100644 --- a/python/ql/src/Statements/ShouldUseWithStatement.ql +++ b/python/ql/src/Statements/ShouldUseWithStatement.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate calls_close(Call c) { exists(Attribute a | c.getFunc() = a and a.getName() = "close") } @@ -23,18 +23,12 @@ predicate only_stmt_in_finally(Try t, Call c) { ) } -predicate points_to_context_manager(ControlFlowNodeWithPointsTo f, ClassValue cls) { - forex(Value v | f.pointsTo(v) | v.getClass() = cls) and - cls.isContextManager() -} - -from Call close, Try t, ClassValue cls +from Call close, Try t, Class cls where only_stmt_in_finally(t, close) and calls_close(close) and - exists(ControlFlowNode f | f = close.getFunc().getAFlowNode().(AttrNode).getObject() | - points_to_context_manager(f, cls) - ) + classInstanceTracker(cls).asExpr() = close.getFunc().(Attribute).getObject() and + DuckTyping::isContextManager(cls) select close, "Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement.", cls, cls.getName() diff --git a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected index d062717bbf25..50ff6cc1f914 100644 --- a/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected +++ b/python/ql/test/query-tests/Statements/general/ShouldUseWithStatement.expected @@ -1 +1 @@ -| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | class CM | CM | +| test.py:168:9:168:17 | Attribute() | Instance of context-manager class $@ is closed in a finally block. Consider using 'with' statement. | test.py:151:1:151:17 | Class CM | CM | From 576b560774dd40e72e2f93b69bc95eace29b0663 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:03:15 +0000 Subject: [PATCH 10/19] Python: Port UnusedExceptionObject.ql Depending on whether other queries depend on this, we may end up moving the exception utility functions to a more central location. --- .../src/Statements/UnusedExceptionObject.ql | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Statements/UnusedExceptionObject.ql b/python/ql/src/Statements/UnusedExceptionObject.ql index 9a6a3650b7e6..890cdc963aca 100644 --- a/python/ql/src/Statements/UnusedExceptionObject.ql +++ b/python/ql/src/Statements/UnusedExceptionObject.ql @@ -12,11 +12,49 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.dataflow.new.internal.Builtins +private import semmle.python.ApiGraphs -from Call call, ClassValue ex +/** + * Holds if `cls` is a user-defined exception class, i.e. it transitively + * extends one of the builtin exception base classes. + */ +predicate isUserDefinedExceptionClass(Class cls) { + cls.getABase() = + API::builtin(["BaseException", "Exception"]).getAValueReachableFromSource().asExpr() + or + isUserDefinedExceptionClass(getADirectSuperclass(cls)) +} + +/** + * Gets the name of a builtin exception class. + */ +string getBuiltinExceptionName() { + result = Builtins::getBuiltinName() and + ( + result.matches("%Error") or + result.matches("%Exception") or + result.matches("%Warning") or + result = + ["GeneratorExit", "KeyboardInterrupt", "StopIteration", "StopAsyncIteration", "SystemExit"] + ) +} + +/** + * Holds if `call` is an instantiation of an exception class. + */ +predicate isExceptionInstantiation(Call call) { + exists(Class cls | + classTracker(cls).asExpr() = call.getFunc() and + isUserDefinedExceptionClass(cls) + ) + or + call.getFunc() = API::builtin(getBuiltinExceptionName()).getAValueReachableFromSource().asExpr() +} + +from Call call where - call.getFunc().(ExprWithPointsTo).pointsTo(ex) and - ex.getASuperType() = ClassValue::exception() and + isExceptionInstantiation(call) and exists(ExprStmt s | s.getValue() = call) select call, "Instantiating an exception, but not raising it, has no effect." From 2fce1b383ffedc1af856a07a6ca5e6a0a7472d14 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:19:18 +0000 Subject: [PATCH 11/19] Python: Add `DuckTyping::isNewStyle` Approximates the behaviour of `Types::isNewStyle` but without depending on points-to --- .../new/internal/DataFlowDispatch.qll | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 7572ad05793a..8fa78d6b4d90 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -2012,4 +2012,30 @@ module DuckTyping { hasMethod(cls, "__getitem__") and (hasMethod(cls, "keys") or hasMethod(cls, "__iter__")) } + + /** + * Holds if `cls` is a new-style class. In Python 3, all classes are new-style. + * In Python 2, a class is new-style if it (transitively) inherits from `object`, + * or has a declared `__metaclass__`, or has an unresolved base class. + */ + predicate isNewStyle(Class cls) { + major_version() = 3 + or + major_version() = 2 and + ( + cls.getABase() = API::builtin("object").getAValueReachableFromSource().asExpr() + or + isNewStyle(getADirectSuperclass(cls)) + or + hasUnresolvedBase(cls) + or + exists(cls.getMetaClass()) + or + // Module-level __metaclass__ = type makes all classes in the module new-style + exists(Assign a | + a.getScope() = cls.getEnclosingModule() and + a.getATarget().(Name).getId() = "__metaclass__" + ) + ) + } } From 48e51cd6c3f9a2e74a30dac85b940002f27267c9 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:56:14 +0000 Subject: [PATCH 12/19] Python: Add declares/getAttribute API These could arguably be moved to `Class` itself, but for now I'm choosing to limit the changes to the `DuckTyping` module (until we decide on a proper API). --- .../dataflow/new/internal/DataFlowDispatch.qll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll index 8fa78d6b4d90..5fd59eb12184 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowDispatch.qll @@ -1999,6 +1999,23 @@ module DuckTyping { hasMethod(cls, "__delete__") } + /** + * Holds if `cls` directly assigns to an attribute named `name` in its class body. + * This covers attribute assignments like `x = value`, but not method definitions. + */ + predicate declaresAttribute(Class cls, string name) { exists(getAnAttributeValue(cls, name)) } + + /** + * Gets the value expression assigned to attribute `name` directly in the class body of `cls`. + */ + Expr getAnAttributeValue(Class cls, string name) { + exists(Assign a | + a.getScope() = cls and + a.getATarget().(Name).getId() = name and + result = a.getValue() + ) + } + /** * Holds if `cls` is callable, i.e. it declares `__call__`. */ From 890ce5617567a9a0ed6fd1924a3b87102f2091d1 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:25:57 +0000 Subject: [PATCH 13/19] Python: Port SlotsInOldStyleClass.ql Only trivial test changes. --- python/ql/src/Classes/SlotsInOldStyleClass.ql | 8 +++++--- .../Classes/new-style/SlotsInOldStyleClass.expected | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Classes/SlotsInOldStyleClass.ql b/python/ql/src/Classes/SlotsInOldStyleClass.ql index 2f91a88cf64e..70a7997113e6 100644 --- a/python/ql/src/Classes/SlotsInOldStyleClass.ql +++ b/python/ql/src/Classes/SlotsInOldStyleClass.ql @@ -12,9 +12,11 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassObject c -where not c.isNewStyle() and c.declaresAttribute("__slots__") and not c.failedInference() +from Class c +where + not DuckTyping::isNewStyle(c) and + DuckTyping::declaresAttribute(c, "__slots__") select c, "Using '__slots__' in an old style class just creates a class attribute called '__slots__'." diff --git a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected index ccad85bd3846..14d30913b93a 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/SlotsInOldStyleClass.expected @@ -1 +1 @@ -| newstyle_test.py:4:1:4:16 | class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | +| newstyle_test.py:4:1:4:16 | Class OldStyle1 | Using '__slots__' in an old style class just creates a class attribute called '__slots__'. | From 9847c83a72c25578d984a3b1fbd82d6ab2eba72b Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:26:27 +0000 Subject: [PATCH 14/19] Python: Port SuperInOldStyleClass.ql --- python/ql/src/Classes/SuperInOldStyleClass.ql | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Classes/SuperInOldStyleClass.ql b/python/ql/src/Classes/SuperInOldStyleClass.ql index a6a272b1b3b7..bc6541052de0 100644 --- a/python/ql/src/Classes/SuperInOldStyleClass.ql +++ b/python/ql/src/Classes/SuperInOldStyleClass.ql @@ -11,14 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate uses_of_super_in_old_style_class(Call s) { - exists(Function f, ClassObject c | + exists(Function f, Class c | s.getScope() = f and - f.getScope() = c.getPyClass() and - not c.failedInference() and - not c.isNewStyle() and + f.getScope() = c and + not DuckTyping::isNewStyle(c) and s.getFunc().(Name).getId() = "super" ) } From e09a4ba055353b84690b35da1268b1b6ea6b68df Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:27:32 +0000 Subject: [PATCH 15/19] Python: Port PropertyInOldStyleClass.ql Only trivial test changes. --- python/ql/src/Classes/PropertyInOldStyleClass.ql | 9 ++++++--- .../Classes/new-style/PropertyInOldStyleClass.expected | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/ql/src/Classes/PropertyInOldStyleClass.ql b/python/ql/src/Classes/PropertyInOldStyleClass.ql index 2fd7b1d14cf2..80a5ef06bbee 100644 --- a/python/ql/src/Classes/PropertyInOldStyleClass.ql +++ b/python/ql/src/Classes/PropertyInOldStyleClass.ql @@ -11,10 +11,13 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from PropertyObject prop, ClassObject cls -where cls.declaredAttribute(_) = prop and not cls.failedInference() and not cls.isNewStyle() +from Function prop, Class cls +where + prop.getScope() = cls and + prop.getADecorator().(Name).getId() = "property" and + not DuckTyping::isNewStyle(cls) select prop, "Property " + prop.getName() + " will not work properly, as class " + cls.getName() + " is an old-style class." diff --git a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected index 9fb3e582cb7b..1e5be51fbaff 100644 --- a/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected +++ b/python/ql/test/2/query-tests/Classes/new-style/PropertyInOldStyleClass.expected @@ -1 +1 @@ -| property_old_style.py:8:6:8:13 | Property piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | +| property_old_style.py:9:5:9:20 | Function piosc | Property piosc will not work properly, as class OldStyle is an old-style class. | From 6826c4e6d3abe6ad6ccb6411d5dc07a5a3c92979 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 15:29:48 +0000 Subject: [PATCH 16/19] Python: Port InconsistentMRO.ql For this one we actually lose a test result. However, this is kind of to be expected since we no longer have the "precise" MRO that the points-to analysis computes. Honestly, I'm on the fence about even keeping this query at all. It seems like it might be superfluous in a world with good Python type checking. --- python/ql/src/Classes/InconsistentMRO.ql | 19 ++++++++++++------- .../inconsistent-mro/InconsistentMRO.expected | 2 +- .../inconsistent-mro/InconsistentMRO.expected | 3 +-- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/python/ql/src/Classes/InconsistentMRO.ql b/python/ql/src/Classes/InconsistentMRO.ql index aa319d56114e..73f6bf9240b8 100644 --- a/python/ql/src/Classes/InconsistentMRO.ql +++ b/python/ql/src/Classes/InconsistentMRO.ql @@ -12,19 +12,24 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -ClassObject left_base(ClassObject type, ClassObject base) { - exists(int i | i > 0 and type.getBaseType(i) = base and result = type.getBaseType(i - 1)) +/** + * Gets the `i`th base class of `cls`, if it can be resolved to a user-defined class. + */ +Class getBaseType(Class cls, int i) { cls.getBase(i) = classTracker(result).asExpr() } + +Class left_base(Class type, Class base) { + exists(int i | i > 0 and getBaseType(type, i) = base and result = getBaseType(type, i - 1)) } -predicate invalid_mro(ClassObject t, ClassObject left, ClassObject right) { - t.isNewStyle() and +predicate invalid_mro(Class t, Class left, Class right) { + DuckTyping::isNewStyle(t) and left = left_base(t, right) and - left = right.getAnImproperSuperType() + left = getADirectSuperclass*(right) } -from ClassObject t, ClassObject left, ClassObject right +from Class t, Class left, Class right where invalid_mro(t, left, right) select t, "Construction of class " + t.getName() + diff --git a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ebec9bb61e0b..93d12fc0a454 100644 --- a/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/2/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | diff --git a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected index ca0a64f70e1b..93d12fc0a454 100644 --- a/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected +++ b/python/ql/test/3/query-tests/Classes/inconsistent-mro/InconsistentMRO.expected @@ -1,2 +1 @@ -| inconsistent_mro.py:9:1:9:14 | class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | class X | X | inconsistent_mro.py:6:1:6:11 | class Y | Y | -| inconsistent_mro.py:16:1:16:19 | class N | Construction of class N can fail due to invalid method resolution order(MRO) for bases $@ and $@. | file://:Compiled Code:0:0:0:0 | builtin-class object | object | inconsistent_mro.py:12:1:12:8 | class O | O | +| inconsistent_mro.py:9:1:9:14 | Class Z | Construction of class Z can fail due to invalid method resolution order(MRO) for bases $@ and $@. | inconsistent_mro.py:3:1:3:16 | Class X | X | inconsistent_mro.py:6:1:6:11 | Class Y | Y | From 7b34f1195253a40b3fa5c955331445636959c500 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:33:25 +0000 Subject: [PATCH 17/19] Python: Port HashedButNoHash.ql This one is a bit more involved. Of note is the fact that it at present only uses local flow when determining the origin of some value (whereas the points-to version used global flow). It may be desirable to rewrite this query to use global data-flow, but this should be done with some care (as using "all unhashable objects" as the set of sources is somewhat iffy with respect to performance). For that reason, I'm sticking to mostly local flow (except for well behaved things like types and built-ins). --- python/ql/src/Expressions/HashedButNoHash.ql | 115 +++++++++++------- .../general/HashedButNoHash.expected | 2 +- 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/python/ql/src/Expressions/HashedButNoHash.ql b/python/ql/src/Expressions/HashedButNoHash.ql index 705806bf3605..93edc7c9c6cc 100644 --- a/python/ql/src/Expressions/HashedButNoHash.ql +++ b/python/ql/src/Expressions/HashedButNoHash.ql @@ -12,76 +12,97 @@ */ import python -private import LegacyPointsTo +import semmle.python.dataflow.new.DataFlow +private import semmle.python.dataflow.new.internal.DataFlowDispatch +private import semmle.python.ApiGraphs -/* - * This assumes that any indexing operation where the value is not a sequence or numpy array involves hashing. - * For sequences, the index must be an int, which are hashable, so we don't need to treat them specially. - * For numpy arrays, the index may be a list, which are not hashable and needs to be treated specially. +/** + * Holds if `cls` explicitly sets `__hash__` to `None`, making instances unhashable. */ - -predicate numpy_array_type(ClassValue na) { - exists(ModuleValue np | np.getName() = "numpy" or np.getName() = "numpy.core" | - na.getASuperType() = np.attr("ndarray") - ) +predicate setsHashToNone(Class cls) { + DuckTyping::getAnAttributeValue(cls, "__hash__") instanceof None } -predicate has_custom_getitem(Value v) { - v.getClass().lookup("__getitem__") instanceof PythonFunctionValue +/** + * Holds if `cls` is a user-defined class whose instances are unhashable. + * A new-style class without `__hash__` is unhashable, as is one that explicitly + * sets `__hash__ = None`. + */ +predicate isUnhashableUserClass(Class cls) { + DuckTyping::isNewStyle(cls) and + not DuckTyping::hasMethod(cls, "__hash__") and + not DuckTyping::hasUnresolvedBase(getADirectSuperclass*(cls)) or - numpy_array_type(v.getClass()) + setsHashToNone(cls) } -predicate explicitly_hashed(ControlFlowNode f) { - exists(CallNode c, GlobalVariable hash | - c.getArg(0) = f and c.getFunction().(NameNode).uses(hash) and hash.getId() = "hash" +/** + * Gets the name of a builtin type whose instances are unhashable. + */ +string getUnhashableBuiltinName() { result = ["list", "set", "dict", "bytearray"] } + +/** + * Holds if `origin` is a local source node tracking an unhashable instance that + * flows to `node`, with `clsName` describing the class for the alert. + */ +predicate isUnhashable(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Class c | + isUnhashableUserClass(c) and + origin = classInstanceTracker(c) and + origin.flowsTo(node) and + clsName = c.getName() ) + or + clsName = getUnhashableBuiltinName() and + origin = API::builtin(clsName).getAnInstance().asSource() and + origin.flowsTo(node) +} + +predicate explicitly_hashed(DataFlow::Node node) { + node = API::builtin("hash").getACall().getArg(0) } -predicate unhashable_subscript(ControlFlowNode f, ClassValue c, ControlFlowNode origin) { - is_unhashable(f, c, origin) and - exists(SubscriptNode sub | sub.getIndex() = f | - exists(Value custom_getitem | - sub.getObject().(ControlFlowNodeWithPointsTo).pointsTo(custom_getitem) and - not has_custom_getitem(custom_getitem) - ) +/** + * Holds if the subscript object in `sub[...]` is known to use hashing for indexing, + * i.e. it does not have a custom `__getitem__` that could accept unhashable indices. + */ +predicate subscriptUsesHashing(Subscript sub) { + DataFlow::exprNode(sub.getObject()) = + API::builtin("dict").getAnInstance().getAValueReachableFromSource() + or + exists(Class cls | + classInstanceTracker(cls) + .(DataFlow::LocalSourceNode) + .flowsTo(DataFlow::exprNode(sub.getObject())) and + not DuckTyping::hasMethod(cls, "__getitem__") ) } -predicate is_unhashable(ControlFlowNodeWithPointsTo f, ClassValue cls, ControlFlowNode origin) { - exists(Value v | f.pointsTo(v, origin) and v.getClass() = cls | - not cls.hasAttribute("__hash__") and not cls.failedInference(_) and cls.isNewStyle() - or - cls.lookup("__hash__") = Value::named("None") +predicate unhashable_subscript(DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName) { + exists(Subscript sub | + node = DataFlow::exprNode(sub.getIndex()) and + subscriptUsesHashing(sub) + | + isUnhashable(origin, node, clsName) ) } /** - * Holds if `f` is inside a `try` that catches `TypeError`. For example: - * - * try: - * ... f ... - * except TypeError: - * ... - * - * This predicate is used to eliminate false positive results. If `hash` - * is called on an unhashable object then a `TypeError` will be thrown. - * But this is not a bug if the code catches the `TypeError` and handles - * it. + * Holds if `e` is inside a `try` that catches `TypeError`. */ -predicate typeerror_is_caught(ControlFlowNode f) { +predicate typeerror_is_caught(Expr e) { exists(Try try | - try.getBody().contains(f.getNode()) and - try.getAHandler().getType().(ExprWithPointsTo).pointsTo(ClassValue::typeError()) + try.getBody().contains(e) and + try.getAHandler().getType() = API::builtin("TypeError").getAValueReachableFromSource().asExpr() ) } -from ControlFlowNode f, ClassValue c, ControlFlowNode origin +from DataFlow::LocalSourceNode origin, DataFlow::Node node, string clsName where - not typeerror_is_caught(f) and + not typeerror_is_caught(node.asExpr()) and ( - explicitly_hashed(f) and is_unhashable(f, c, origin) + explicitly_hashed(node) and isUnhashable(origin, node, clsName) or - unhashable_subscript(f, c, origin) + unhashable_subscript(origin, node, clsName) ) -select f.getNode(), "This $@ of $@ is unhashable.", origin, "instance", c, c.getQualifiedName() +select node, "This $@ of $@ is unhashable.", origin, "instance", origin, clsName diff --git a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected index 9589547056f7..536a7281d2d6 100644 --- a/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected +++ b/python/ql/test/query-tests/Expressions/general/HashedButNoHash.expected @@ -1 +1 @@ -| expressions_test.py:42:20:42:25 | unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | file://:0:0:0:0 | builtin-class list | list | +| expressions_test.py:42:20:42:25 | ControlFlowNode for unhash | This $@ of $@ is unhashable. | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | instance | expressions_test.py:41:32:41:37 | ControlFlowNode for list() | list | From f64811ad4cc7265addd2589fed4bb9bd23cda734 Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:54:34 +0000 Subject: [PATCH 18/19] Python: Port UselessClass.ql No test changes. --- python/ql/src/Classes/UselessClass.ql | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/python/ql/src/Classes/UselessClass.ql b/python/ql/src/Classes/UselessClass.ql index 740c74bf96d9..229e42fd292f 100644 --- a/python/ql/src/Classes/UselessClass.ql +++ b/python/ql/src/Classes/UselessClass.ql @@ -13,7 +13,7 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch predicate fewer_than_two_public_methods(Class cls, int methods) { (methods = 0 or methods = 1) and @@ -25,13 +25,8 @@ predicate does_not_define_special_method(Class cls) { } predicate no_inheritance(Class c) { - not exists(ClassValue cls, ClassValue other | - cls.getScope() = c and - other != ClassValue::object() - | - other.getABaseType() = cls or - cls.getABaseType() = other - ) and + not exists(getADirectSubclass(c)) and + not exists(getADirectSuperclass(c)) and not exists(Expr base | base = c.getABase() | not base instanceof Name or base.(Name).getId() != "object" ) From cd4dbb4abe092ccdf1c62c9426731119aafd27df Mon Sep 17 00:00:00 2001 From: Taus Date: Fri, 20 Feb 2026 16:54:49 +0000 Subject: [PATCH 19/19] Python: Port ShouldBeContextManager.ql Only trivial test changes. --- python/ql/src/Classes/ShouldBeContextManager.ql | 8 +++++--- .../ShouldBeContextManager.expected | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/ql/src/Classes/ShouldBeContextManager.ql b/python/ql/src/Classes/ShouldBeContextManager.ql index 6aec0f0e0ab0..9a50d841a740 100644 --- a/python/ql/src/Classes/ShouldBeContextManager.ql +++ b/python/ql/src/Classes/ShouldBeContextManager.ql @@ -14,10 +14,12 @@ */ import python -private import LegacyPointsTo +private import semmle.python.dataflow.new.internal.DataFlowDispatch -from ClassValue c -where not c.isBuiltin() and not c.isContextManager() and exists(c.declaredAttribute("__del__")) +from Class c +where + not DuckTyping::isContextManager(c) and + DuckTyping::hasMethod(c, "__del__") select c, "Class " + c.getName() + " implements __del__ (presumably to release some resource). Consider making it a context manager." diff --git a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected index 47c773804ae8..3cd6d92ff641 100644 --- a/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected +++ b/python/ql/test/query-tests/Classes/should-be-context-manager/ShouldBeContextManager.expected @@ -1,2 +1,2 @@ -| should_be_context_manager.py:3:1:3:22 | class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | -| should_be_context_manager.py:16:1:16:22 | class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:3:1:3:22 | Class MegaDel | Class MegaDel implements __del__ (presumably to release some resource). Consider making it a context manager. | +| should_be_context_manager.py:16:1:16:22 | Class MiniDel | Class MiniDel implements __del__ (presumably to release some resource). Consider making it a context manager. |