diff --git a/its/ruling/src/test/resources/eclipse-jetty-similar-to-main/java-S5852.json b/its/ruling/src/test/resources/eclipse-jetty-similar-to-main/java-S8786.json similarity index 100% rename from its/ruling/src/test/resources/eclipse-jetty-similar-to-main/java-S5852.json rename to its/ruling/src/test/resources/eclipse-jetty-similar-to-main/java-S8786.json diff --git a/its/ruling/src/test/resources/eclipse-jetty/java-S5852.json b/its/ruling/src/test/resources/eclipse-jetty/java-S5852.json deleted file mode 100644 index 449afe7edd6..00000000000 --- a/its/ruling/src/test/resources/eclipse-jetty/java-S5852.json +++ /dev/null @@ -1,5 +0,0 @@ -{ -"org.eclipse.jetty:jetty-project:jetty-http/src/main/java/org/eclipse/jetty/http/HttpCompliance.java": [ -137 -] -} diff --git a/its/ruling/src/test/resources/eclipse-jetty/java-S8786.json b/its/ruling/src/test/resources/eclipse-jetty/java-S8786.json new file mode 100644 index 00000000000..07c07933d8f --- /dev/null +++ b/its/ruling/src/test/resources/eclipse-jetty/java-S8786.json @@ -0,0 +1,8 @@ +{ +"org.eclipse.jetty:jetty-project:jetty-http/src/main/java/org/eclipse/jetty/http/HttpCompliance.java": [ +137 +], +"org.eclipse.jetty:jetty-project:jetty-util/src/test/java/org/eclipse/jetty/util/ssl/SslContextFactoryTest.java": [ +366 +] +} diff --git a/its/ruling/src/test/resources/guava/java-S5852.json b/its/ruling/src/test/resources/guava/java-S8786.json similarity index 100% rename from its/ruling/src/test/resources/guava/java-S5852.json rename to its/ruling/src/test/resources/guava/java-S8786.json diff --git a/its/ruling/src/test/resources/regex-examples/java-S5852.json b/its/ruling/src/test/resources/regex-examples/java-S5852.json index 06e2259bc86..7788f50649a 100644 --- a/its/ruling/src/test/resources/regex-examples/java-S5852.json +++ b/its/ruling/src/test/resources/regex-examples/java-S5852.json @@ -1,770 +1,8 @@ { "org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase1.java": [ -33, -49, -145, -195, -205, -209, -211, -213, -241, -243, -271, -297, -311, -341, -355, -367, -385, -387, -389, -391, -397, -403, -407, -413, -463, -481, -487, -493, -639, -683, -695, -717, -721, -795, -805, -811, -813, -819, -829, -865, -875, -893, -901, -935, -949, -1007, -1045, -1063, -1119, -1121, -1123, -1233, -1235, -1253, -1255, -1257, -1259, -1261, -1263, -1321, -1331, -1333, -1339, -1433, -1515, -1517, -1519, -1525, -1537, -1541, -1551, -1587, -1589, -1593, -1607, -1621, -1631, -1653, -1677, -1741, -1767, -1769, -1771, -1789, -1791, -1797, -1853, -1855, -1949, -1961, -1973, -1975, -1981, -1989 +1433 ], "org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase2.java": [ -13, -15, -17, -25, -59, -67, -91, -99, -105, -109, -125, -127, -179, -215, -239, -241, -243, -271, -281, -291, -293, -345, -465, -473, -475, -479, -551, -557, -717, -737, -779, -881, -925, -953, -977, -1025, -1077, -1119, -1169, -1191, -1311, -1381, -1405, -1413, -1453, -1481, -1515, -1517, -1533, -1553, -1555, -1559, -1631, -1637, -1639, -1673, -1741, -1749, -1871, -1897, -1899, -1901, -1911, -1913, -1917, -1921, -1925, -1933, -1979 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase3.java": [ -11, -29, -115, -117, -119, -121, -123, -173, -207, -211, -219, -223, -231, -261, -263, -291, -303, -319, -321, -347, -355, -379, -389, -447, -451, -483, -501, -571, -577, -599, -609, -613, -627, -693, -791, -793, -801, -859, -875, -903, -905, -925, -961, -999, -1009, -1017, -1033, -1081, -1095, -1097, -1147, -1149, -1151, -1153, -1155, -1157, -1159, -1161, -1163, -1221, -1223, -1231, -1235, -1237, -1265, -1329, -1341, -1369, -1379, -1405, -1429, -1433, -1445, -1453, -1455, -1457, -1459, -1467, -1489, -1497, -1501, -1505, -1511, -1529, -1547, -1549, -1597, -1605, -1609, -1627, -1715, -1719, -1731, -1745, -1747, -1749, -1753, -1755, -1825, -1853, -1855, -1905, -1909, -1935, -1941, -1957, -1959, -1971, -1975, -1977, -1979, -1981, -1983, -1985, -1987, -1989, -1991, -1993, -1995, -1997, -2005, -2007 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase4.java": [ -9, -11, -17, -19, -21, -23, -29, -31, -33, -39, -41, -45, -55, -57, -59, -85, -147, -169, -171, -179, -181, -183, -185, -187, -189, -203, -211, -213, -221, -261, -281, -285, -291, -305, -341, -355, -357, -373, -385, -415, -421, -427, -463, -471, -473, -475, -477, -479, -481, -483, -489, -491, -493, -509, -511, -513, -547, -549, -551, -553, -555, -563, -565, -575, -587, -589, -603, -671, -677, -691, -693, -695, -697, -699, -701, -703, -705, -707, -789, -791, -819, -847, -855, -861, -871, -889, -891, -893, -895, -899, -901, -915, -919, -927, -931, -935, -941, -945, -949, -953, -993, -1017, -1019, -1021, -1107, -1109, -1111, -1113, -1115, -1117, -1119, -1123, -1235, -1237, -1239, -1241, -1245, -1247, -1301, -1343, -1345, -1347, -1373, -1383, -1387, -1389, -1403, -1405, -1447, -1473, -1481, -1521, -1523, -1525, -1527, -1529, -1531, -1533, -1535, -1537, -1539, -1583, -1585, -1589, -1623, -1639, -1657, -1681, -1801, -1803, -1811, -1813, -1819, -1905, -1995, -2007 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase5.java": [ -17, -21, -23, -65, -75, -91, -119, -121, -137, -177, -207, -229, -231, -235, -259, -281, -285, -309, -311, -315, -335, -365, -385, -393, -403, -405, -453, -577, -581, -599, -617, -643, -655, -705, -725, -729, -757, -761, -835, -875, -961, -963, -965, -1049, -1057, -1071, -1103, -1121, -1135, -1137, -1171, -1177, -1233, -1285, -1287, -1289, -1327, -1355, -1359, -1363, -1405, -1409, -1477, -1513, -1561, -1563, -1573, -1575, -1577, -1579, -1581, -1625, -1627, -1733, -1741, -1771, -1781, -1809, -1823, -1835, -1839, -1841, -1843, -1847, -1879, -1927, -1981, -1999 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase6.java": [ -39, -55, -61, -67, -71, -77, -79, -81, -103, -105, -115, -181, -207, -227, -253, -297, -335, -339, -341, -345, -347, -393, -461, -467, -473, -481, -495, -497, -509, -575, -601, -603, -615, -619, -623, -741, -771, -773, -805, -807, -809, -815, -817, -821, -859, -861, -891, -937, -951, -1005, -1021, -1023, -1027, -1037, -1051, -1123, -1149, -1233, -1255, -1305, -1389, -1395, -1435, -1445, -1447, -1455, -1519, -1523, -1533, -1543, -1549, -1553, -1555, -1663, -1673, -1739, -1743, -1823, -1825, -1827, -1829, -1839, -1841, -1843, -1895, -1925, -1929, -1933, -1945, -1949, -1957 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase7.java": [ -29, -43, -45, -51, -63, -71, -105, -121, -145, -147, -149, -209, -211, -247, -249, -267, -275, -277, -279, -351, -365, -389, -423, -427, -429, -431, -527, -539, -543, -609, -653, -663, -705, -707, -709, -711, -725, -1133, -1159, -1193, -1233, -1289, -1291, -1293, -1331, -1369, -1509, -1523, -1731, -1771, -1781, -1783, -1785, -1791, -1801, -1805, -1807, -1817, -1821, -1839, -1863, -1919, -1935, -1943, -2007 -], -"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase8.java": [ -11, -13, -37, -53, -55, -225, -227, -231, -275, -277, -285, -331, -347, -503, -505, -509, -517, -633, -637, -645, -649, -655, -657, -659, -665, -667, -669, -687, -691, -693, -695, -697, -699, -715, -791, -853, -861, -869, -873, -905, -907, -929, -933, -935, -941, -951, -953, -965, -975, -995, -1039, -1191, -1249, -1263, -1311, -1315, -1321, -1323, -1327, -1329, -1331, -1349, -1463, -1475, -1481, -1491, -1513 +345 ] } diff --git a/its/ruling/src/test/resources/regex-examples/java-S8786.json b/its/ruling/src/test/resources/regex-examples/java-S8786.json new file mode 100644 index 00000000000..6bb591f72b2 --- /dev/null +++ b/its/ruling/src/test/resources/regex-examples/java-S8786.json @@ -0,0 +1,768 @@ +{ +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase1.java": [ +33, +49, +145, +195, +205, +209, +211, +213, +241, +243, +271, +297, +311, +341, +355, +367, +385, +387, +389, +391, +397, +403, +407, +413, +463, +481, +487, +493, +639, +683, +695, +717, +721, +795, +805, +811, +813, +819, +829, +865, +875, +893, +901, +935, +949, +1007, +1045, +1063, +1119, +1121, +1123, +1233, +1235, +1253, +1255, +1257, +1259, +1261, +1263, +1321, +1331, +1333, +1339, +1515, +1517, +1519, +1525, +1537, +1541, +1551, +1587, +1589, +1593, +1607, +1621, +1631, +1653, +1677, +1741, +1767, +1769, +1771, +1789, +1791, +1797, +1853, +1855, +1949, +1961, +1973, +1975, +1981, +1989 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase2.java": [ +13, +15, +17, +25, +59, +67, +91, +99, +105, +109, +125, +127, +179, +215, +239, +241, +243, +271, +281, +291, +293, +465, +473, +475, +479, +551, +557, +717, +737, +779, +881, +925, +953, +977, +1025, +1077, +1119, +1169, +1191, +1311, +1381, +1405, +1413, +1453, +1481, +1515, +1517, +1533, +1553, +1555, +1559, +1631, +1637, +1639, +1673, +1741, +1749, +1871, +1897, +1899, +1901, +1911, +1913, +1917, +1921, +1925, +1933, +1979 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase3.java": [ +11, +29, +115, +117, +119, +121, +123, +173, +207, +211, +219, +223, +231, +261, +263, +291, +303, +319, +321, +347, +355, +379, +389, +447, +451, +483, +501, +571, +577, +599, +609, +613, +627, +693, +791, +793, +801, +859, +875, +903, +905, +925, +961, +999, +1009, +1017, +1033, +1081, +1095, +1097, +1147, +1149, +1151, +1153, +1155, +1157, +1159, +1161, +1163, +1221, +1223, +1231, +1235, +1237, +1265, +1329, +1341, +1369, +1379, +1405, +1429, +1433, +1445, +1453, +1455, +1457, +1459, +1467, +1489, +1497, +1501, +1505, +1511, +1529, +1547, +1549, +1597, +1605, +1609, +1627, +1715, +1719, +1731, +1745, +1747, +1749, +1753, +1755, +1825, +1853, +1855, +1905, +1909, +1935, +1941, +1957, +1959, +1971, +1975, +1977, +1979, +1981, +1983, +1985, +1987, +1989, +1991, +1993, +1995, +1997, +2005, +2007 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase4.java": [ +9, +11, +17, +19, +21, +23, +29, +31, +33, +39, +41, +45, +55, +57, +59, +85, +147, +169, +171, +179, +181, +183, +185, +187, +189, +203, +211, +213, +221, +261, +281, +285, +291, +305, +341, +355, +357, +373, +385, +415, +421, +427, +463, +471, +473, +475, +477, +479, +481, +483, +489, +491, +493, +509, +511, +513, +547, +549, +551, +553, +555, +563, +565, +575, +587, +589, +603, +671, +677, +691, +693, +695, +697, +699, +701, +703, +705, +707, +789, +791, +819, +847, +855, +861, +871, +889, +891, +893, +895, +899, +901, +915, +919, +927, +931, +935, +941, +945, +949, +953, +993, +1017, +1019, +1021, +1107, +1109, +1111, +1113, +1115, +1117, +1119, +1123, +1235, +1237, +1239, +1241, +1245, +1247, +1301, +1343, +1345, +1347, +1373, +1383, +1387, +1389, +1403, +1405, +1447, +1473, +1481, +1521, +1523, +1525, +1527, +1529, +1531, +1533, +1535, +1537, +1539, +1583, +1585, +1589, +1623, +1639, +1657, +1681, +1801, +1803, +1811, +1813, +1819, +1905, +1995, +2007 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase5.java": [ +17, +21, +23, +65, +75, +91, +119, +121, +137, +177, +207, +229, +231, +235, +259, +281, +285, +309, +311, +315, +335, +365, +385, +393, +403, +405, +453, +577, +581, +599, +617, +643, +655, +705, +725, +729, +757, +761, +835, +875, +961, +963, +965, +1049, +1057, +1071, +1103, +1121, +1135, +1137, +1171, +1177, +1233, +1285, +1287, +1289, +1327, +1355, +1359, +1363, +1405, +1409, +1477, +1513, +1561, +1563, +1573, +1575, +1577, +1579, +1581, +1625, +1627, +1733, +1741, +1771, +1781, +1809, +1823, +1835, +1839, +1841, +1843, +1847, +1879, +1927, +1981, +1999 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase6.java": [ +39, +55, +61, +67, +71, +77, +79, +81, +103, +105, +115, +181, +207, +227, +253, +297, +335, +339, +341, +345, +347, +393, +461, +467, +473, +481, +495, +497, +509, +575, +601, +603, +615, +619, +623, +741, +771, +773, +805, +807, +809, +815, +817, +821, +859, +861, +891, +937, +951, +1005, +1021, +1023, +1027, +1037, +1051, +1123, +1149, +1233, +1255, +1305, +1389, +1395, +1435, +1445, +1447, +1455, +1519, +1523, +1533, +1543, +1549, +1553, +1555, +1663, +1673, +1739, +1743, +1823, +1825, +1827, +1829, +1839, +1841, +1843, +1895, +1925, +1929, +1933, +1945, +1949, +1957 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase7.java": [ +29, +43, +45, +51, +63, +71, +105, +121, +145, +147, +149, +209, +211, +247, +249, +267, +275, +277, +279, +351, +365, +389, +423, +427, +429, +431, +527, +539, +543, +609, +653, +663, +705, +707, +709, +711, +725, +1133, +1159, +1193, +1233, +1289, +1291, +1293, +1331, +1369, +1509, +1523, +1731, +1771, +1781, +1783, +1785, +1791, +1801, +1805, +1807, +1817, +1821, +1839, +1863, +1919, +1935, +1943, +2007 +], +"org.regex-examples:regex-examples:src/main/java/org/regex/examples/RegexDatabase8.java": [ +11, +13, +37, +53, +55, +225, +227, +231, +275, +277, +285, +331, +347, +503, +505, +509, +517, +633, +637, +645, +649, +655, +657, +659, +665, +667, +669, +687, +691, +693, +695, +697, +699, +715, +791, +853, +861, +869, +873, +905, +907, +929, +933, +935, +941, +951, +953, +965, +975, +995, +1039, +1191, +1249, +1263, +1311, +1315, +1321, +1323, +1327, +1329, +1331, +1349, +1463, +1475, +1481, +1491, +1513 +] +} diff --git a/its/ruling/src/test/resources/sonar-server/java-S5852.json b/its/ruling/src/test/resources/sonar-server/java-S8786.json similarity index 100% rename from its/ruling/src/test/resources/sonar-server/java-S5852.json rename to its/ruling/src/test/resources/sonar-server/java-S8786.json diff --git a/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckJava8.java b/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckJava8.java index b78cd77226b..66758b5dac8 100644 --- a/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckJava8.java +++ b/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckJava8.java @@ -38,16 +38,16 @@ void quadraticInJava9(String str) { } void alwaysQuadratic(String str) { - str.matches("x*\\w*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches(".*.*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*\\w*"); // Compliant - always quadratic, reported by S8786 + str.matches(".*.*X"); // Compliant - always quadratic, reported by S8786 } void fixedInJava9(String str) { - str.matches("(.?,)*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to exponential runtime due to backtracking, cannot lead to denial of service or make sure the code is only run using Java 9 or later.}} + str.matches("(.?,)*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to exponential runtime due to backtracking, cannot lead to denial of service.}} } void notFixedInJava9(String str) { - // The back reference prevents the Java 9+ optimization from being applied, so we shouldn't mention it + // The back reference prevents the Java 9+ optimization from being applied str.matches("(.?,)*\\1"); // Noncompliant {{Make sure the regex used here, which is vulnerable to exponential runtime due to backtracking, cannot lead to denial of service.}} } diff --git a/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckSample.java b/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckSample.java index dbdd3f2cb7c..9a60e6182fa 100644 --- a/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckSample.java +++ b/java-checks-test-sources/default/src/main/java/checks/regex/RedosCheckSample.java @@ -5,24 +5,22 @@ public class RedosCheckSample { - @Email(regexp = "(.*-)*@.*") // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} -// ^^^^^ + @Email(regexp = "(.*-)*@.*") // Compliant - polynomial, reported by S8786 String email; - @jakarta.validation.constraints.Email(regexp = "(.*-)*@.*") // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + @jakarta.validation.constraints.Email(regexp = "(.*-)*@.*") // Compliant - polynomial, reported by S8786 String email2; void realWorldExamples(String str) { String cloudflareAttack = "(?:(?:\"|'|\\]|\\}|\\\\|\\d|(?:nan|infinity|true|false|null|undefined|symbol|math)|\\`|\\-|\\+)+[)]*;?((?:\\s|-|~|!|\\{\\}|\\|\\||\\+)*.*(?:.*=.*)))"; String stackOverflowAttack = "^[\\s\\u200c]+|[\\s\\u200c]+$"; - str.matches(cloudflareAttack); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.replaceAll(stackOverflowAttack, ""); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches(cloudflareAttack); // Compliant - always quadratic, reported by S8786 + str.replaceAll(stackOverflowAttack, ""); // Compliant - always quadratic, reported by S8786 } void fullAndPartialMatches(String str) { Pattern p1 = Pattern.compile("(.*,)*"); // Compliant because it's never used for a full match - Pattern p2 = Pattern.compile("(.*,)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + Pattern p2 = Pattern.compile("(.*,)*"); // Compliant - polynomial on Java 9+, reported by S8786 p1.matcher(str).find(); p2.matcher(str).find(); p2.matcher(str).matches(); @@ -38,53 +36,53 @@ void alwaysExponential(String str) { } void polynomialInJava9(String str) { - str.matches("(.*,)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)*.*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.split("(.*,)*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*?,)+"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*?,){5,}"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("((.*,)*)*+"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("((.*,)*)?"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(?>(.*,)*)"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("((?>.*,)*)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)* (.*,)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.split("(.*,)*$"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)*$"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)*(..)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(.*,)*(.{2})*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("(.*,)*"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)*.*"); // Compliant - polynomial on Java 9+, reported by S8786 + str.split("(.*,)*X"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)*X"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*?,)+"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*?,){5,}"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("((.*,)*)*+"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("((.*,)*)?"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(?>(.*,)*)"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("((?>.*,)*)*"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)* (.*,)*"); // Compliant - polynomial on Java 9+, reported by S8786 + str.split("(.*,)*$"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)*$"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)*(..)*"); // Compliant - polynomial on Java 9+, reported by S8786 + str.matches("(.*,)*(.{2})*"); // Compliant - polynomial on Java 9+, reported by S8786 } void alwaysQuadratic(String str) { // Always polynomial when two non-possessive quantifiers overlap in a sequence - str.matches("x*\\w*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches(".*.*X"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("x*a*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*\\w*"); // Compliant - always quadratic, reported by S8786 + str.matches(".*.*X"); // Compliant - always quadratic, reported by S8786 + str.matches("x*a*x*"); // Compliant - always quadratic, reported by S8786 str.matches("x*,a*x*"); // Compliant, can fail between the two quantifiers - str.matches("x*(xy?)*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*(xy?)*"); // Compliant - always quadratic, reported by S8786 str.matches("(ab)*a(ba)*"); // False Negative :-( - str.matches("x*xx*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*xx*"); // Compliant - always quadratic, reported by S8786 str.matches("x*yx*"); // Compliant - str.matches("x*a*b*c*d*e*f*g*h*i*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*a*b*c*d*e*f*g*h*i*x*"); // Compliant - always quadratic, reported by S8786 str.matches("x*a*b*c*d*e*f*g*h*i*j*x*"); // FN because we forget about the first x* when the maximum number of tracked repetitions is exceeded - str.matches("x*a*b*c*d*e*f*g*h*i*j*x*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*a*b*c*d*e*f*g*h*i*j*x*x*"); // Compliant - always quadratic, reported by S8786 // Non-possessive followed by possessive quantifier is actually polynomial - str.matches(".*\\s*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches(".*\\s*+"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches(".*\\s*"); // Compliant - always quadratic, reported by S8786 + str.matches(".*\\s*+"); // Compliant - always quadratic, reported by S8786 str.matches(".*+\\s*"); // Compliant, other way (possessive then non-possessive) is fine str.matches(".*+\\s*+"); // Compliant, two possessives is fine str.matches(".*,\\s*+,"); // Compliant, can fail between the two quantifiers - str.matches("\\s*\\s*+,"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("\\s*\\s*+,"); // Compliant - always quadratic, reported by S8786 str.matches("a*\\s*+,"); // Compliant, no overlap - str.matches("[a\\s]*\\s*+,"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("[a\\s]*b*\\s*+,"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("[a\\s]*\\s*+,"); // Compliant - always quadratic, reported by S8786 + str.matches("[a\\s]*b*\\s*+,"); // Compliant - always quadratic, reported by S8786 str.matches("\\s*+[a\\s]*b*,"); // Compliant, possessive then non-possessive str.matches("\\s*+b*[a\\s]*,"); // Compliant, possessive then non-possessive // Implicit reluctant quantifier in partial match also leads to polynomial runtime - str.split("\\s*,"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.split("\\s*+,"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(?s:.*)\\s*,(?s:.*)"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} - str.matches("(?s:.*)\\s*+,(?s:.*)"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.split("\\s*,"); // Compliant - always quadratic, reported by S8786 + str.split("\\s*+,"); // Compliant - always quadratic, reported by S8786 + str.matches("(?s:.*)\\s*,(?s:.*)"); // Compliant - always quadratic, reported by S8786 + str.matches("(?s:.*)\\s*+,(?s:.*)"); // Compliant - always quadratic, reported by S8786 str.split(",\\s*+"); // Compliant str.split(",\\s*+,"); // Compliant str.split("\\s*+"); // Compliant @@ -92,15 +90,15 @@ void alwaysQuadratic(String str) { void differentPolynomials(String str) { // quadratic (O(n^2)) - str.matches("x*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*x*"); // Compliant - always quadratic, reported by S8786 // cubic (O(n^3)) - str.matches("x*x*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*x*x*"); // Compliant - always quadratic, reported by S8786 // O(n^4) - str.matches("x*x*x*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*x*x*x*"); // Compliant - always quadratic, reported by S8786 // O(n^5) - str.matches("x*x*x*x*x*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("x*x*x*x*x*"); // Compliant - always quadratic, reported by S8786 // cubic - str.matches("[^=]*.*.*=.*"); // Noncompliant {{Make sure the regex used here, which is vulnerable to polynomial runtime due to backtracking, cannot lead to denial of service.}} + str.matches("[^=]*.*.*=.*"); // Compliant - always quadratic, reported by S8786 } void fixedInJava9(String str) { diff --git a/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckJava8.java b/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckJava8.java new file mode 100644 index 00000000000..fdc6a959f73 --- /dev/null +++ b/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckJava8.java @@ -0,0 +1,70 @@ +package checks.regex; + +import javax.validation.constraints.Email; + +public class SuperLinearRegexCheckJava8 { + + @Email(regexp = "(.*-)*@.*") // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + String email; + + @jakarta.validation.constraints.Email(regexp = "(.*-)*@.*") // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + String email2; + + void alwaysExponential(String str) { + str.matches("(.*,)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(.?,)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(a|.a)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(.*,)*X\\1"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*,)*\\1"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + } + + void quadraticInJava9(String str) { + str.matches("(.*,)*"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*,)*.*"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.split("(.*,)*X"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*,)*X"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*?,)+"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*?,){5,}"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("((.*,)*)*+"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("((.*,)*)?"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(?>(.*,)*)"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("((?>.*,)*)*"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*,)* (.*,)*"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.split("(.*,)*$"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + str.matches("(.*,)*$"); // Compliant - QUADRATIC_WHEN_OPTIMIZED on Java 8, reported by S5852 + } + + void alwaysQuadratic(String str) { + str.matches("x*\\w*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches(".*.*X"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + } + + void fixedInJava9(String str) { + str.matches("(.?,)*X"); // Compliant - LINEAR_WHEN_OPTIMIZED on Java 8, reported by S5852 + } + + void notFixedInJava9(String str) { + // The back reference prevents the Java 9+ optimization from being applied + str.matches("(.?,)*\\1"); // Compliant - LINEAR_WHEN_OPTIMIZED + backref on Java 8, reported by S5852 + } + + void compliant(String str) { + str.split("(.*,)*"); + str.matches("(?s)(.*,)*.*"); + str.matches("(a|b)*"); + str.matches("(x*,){1,5}X"); + str.matches("((a|.a),)*"); + str.matches("(.*,)*[\\s\\S]*"); + str.matches("(?U)(.*,)*(.|\\s)*"); + str.matches("(x?,)?"); + str.matches("(?>.*,)*"); + str.matches("([^,]*+,)*"); + str.matches("(.*?,){5}"); + str.matches("(.*?,){1,5}"); + str.matches("([^,]*,)*"); + str.matches("(;?,)*"); + str.matches("(;*,)*"); + str.matches("(.*,)*("); // Rule is not applied to syntactically invalid regular expressions + } + +} diff --git a/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckSample.java b/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckSample.java new file mode 100644 index 00000000000..86194d35401 --- /dev/null +++ b/java-checks-test-sources/default/src/main/java/checks/regex/SuperLinearRegexCheckSample.java @@ -0,0 +1,144 @@ +package checks.regex; + +import java.util.regex.Pattern; +import javax.validation.constraints.Email; + +public class SuperLinearRegexCheckSample { + + @Email(regexp = "(.*-)*@.*") // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} +// ^^^^^ + String email; + + @jakarta.validation.constraints.Email(regexp = "(.*-)*@.*") // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + String email2; + + void realWorldExamples(String str) { + String cloudflareAttack = "(?:(?:\"|'|\\]|\\}|\\\\|\\d|(?:nan|infinity|true|false|null|undefined|symbol|math)|\\`|\\-|\\+)+[)]*;?((?:\\s|-|~|!|\\{\\}|\\|\\||\\+)*.*(?:.*=.*)))"; + String stackOverflowAttack = "^[\\s\\u200c]+|[\\s\\u200c]+$"; + str.matches(cloudflareAttack); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.replaceAll(stackOverflowAttack, ""); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + } + + void fullAndPartialMatches(String str) { + Pattern p1 = Pattern.compile("(.*,)*"); // Compliant because it's never used for a full match + Pattern p2 = Pattern.compile("(.*,)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + p1.matcher(str).find(); + p2.matcher(str).find(); + p2.matcher(str).matches(); + } + + void alwaysExponential(String str) { + str.matches("(.*,)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(.?,)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(a|.a)*?"); // Compliant - always exponential, reported by S5852 + str.matches("(?:.*,)*(X)\\1"); // Compliant - QUADRATIC_WHEN_OPTIMIZED + backref on Java 9+, reported by S5852 + str.matches("(.*,)*\\1"); // Compliant - QUADRATIC_WHEN_OPTIMIZED + backref on Java 9+, reported by S5852 + } + + void polynomialInJava9(String str) { + str.matches("(.*,)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)*.*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.split("(.*,)*X"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)*X"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*?,)+"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*?,){5,}"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("((.*,)*)*+"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("((.*,)*)?"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(?>(.*,)*)"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("((?>.*,)*)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)* (.*,)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.split("(.*,)*$"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)*$"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)*(..)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(.*,)*(.{2})*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + } + + void alwaysQuadratic(String str) { + // Always polynomial when two non-possessive quantifiers overlap in a sequence + str.matches("x*\\w*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches(".*.*X"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("x*a*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("x*,a*x*"); // Compliant, can fail between the two quantifiers + str.matches("x*(xy?)*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(ab)*a(ba)*"); // False Negative :-( + str.matches("x*xx*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("x*yx*"); // Compliant + str.matches("x*a*b*c*d*e*f*g*h*i*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("x*a*b*c*d*e*f*g*h*i*j*x*"); // FN because we forget about the first x* when the maximum number of tracked repetitions is exceeded + str.matches("x*a*b*c*d*e*f*g*h*i*j*x*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + // Non-possessive followed by possessive quantifier is actually polynomial + str.matches(".*\\s*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches(".*\\s*+"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches(".*+\\s*"); // Compliant, other way (possessive then non-possessive) is fine + str.matches(".*+\\s*+"); // Compliant, two possessives is fine + str.matches(".*,\\s*+,"); // Compliant, can fail between the two quantifiers + str.matches("\\s*\\s*+,"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("a*\\s*+,"); // Compliant, no overlap + str.matches("[a\\s]*\\s*+,"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("[a\\s]*b*\\s*+,"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("\\s*+[a\\s]*b*,"); // Compliant, possessive then non-possessive + str.matches("\\s*+b*[a\\s]*,"); // Compliant, possessive then non-possessive + // Implicit reluctant quantifier in partial match also leads to polynomial runtime + str.split("\\s*,"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.split("\\s*+,"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(?s:.*)\\s*,(?s:.*)"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.matches("(?s:.*)\\s*+,(?s:.*)"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + str.split(",\\s*+"); // Compliant + str.split(",\\s*+,"); // Compliant + str.split("\\s*+"); // Compliant + } + + void differentPolynomials(String str) { + // quadratic (O(n^2)) + str.matches("x*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + // cubic (O(n^3)) + str.matches("x*x*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + // O(n^4) + str.matches("x*x*x*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + // O(n^5) + str.matches("x*x*x*x*x*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + // cubic + str.matches("[^=]*.*.*=.*"); // Noncompliant {{Simplify this regular expression to reduce its runtime, as it has super-linear performance due to backtracking.}} + } + + void fixedInJava9(String str) { + str.matches("(.?,)*X"); // Compliant - linear on Java 9+ + } + + void notFixedInJava9(String str) { + // The back reference prevents the Java 9+ optimization from being applied + str.matches("(.?,)*\\1"); // Compliant - LINEAR_WHEN_OPTIMIZED + backref on Java 9+, reported by S5852 + str.matches("(?:(.?)\\1,)*"); // FN because RegexTreeHelpers.intersects can't currently handle backreferences inside the repetition + } + + void compliant(String str) { + str.split("(.*,)*"); + str.matches("(?s)(.*,)*.*"); + str.matches("(.*,)*(?s:.)*"); + str.matches("(?s)(.*,)*(.?)*"); + str.matches("(a|b)*"); + str.matches("(x*,){1,5}X"); + str.matches("((a|.a),)*"); + str.matches("(.*,)*[\\s\\S]*"); + str.matches("(?U)(.*,)*(.|\\s)*"); + str.matches("(x?,)?"); + str.matches("(?>.*,)*"); + str.matches("([^,]*+,)*"); + str.matches("(.*?,){5}"); + str.matches("(.*?,){1,5}"); + str.matches("([^,]*,)*"); + str.matches("(;?,)*"); + str.matches("(;*,)*"); + str.matches("x*|x*"); + str.matches("a*b*"); + str.matches("a*a?b*"); + str.matches("a*(a?b)*"); + str.matches("a*(ab)*"); + str.split("x*x*"); + str.matches("(?s)x*.*"); + str.matches("x*(?s)*"); // Coverage + str.matches("(.*,)*("); // Rule is not applied to syntactically invalid regular expressions + } + +} diff --git a/java-checks/src/main/java/org/sonar/java/checks/regex/AbstractRedosCheck.java b/java-checks/src/main/java/org/sonar/java/checks/regex/AbstractRedosCheck.java new file mode 100644 index 00000000000..dfdbbe376ab --- /dev/null +++ b/java-checks/src/main/java/org/sonar/java/checks/regex/AbstractRedosCheck.java @@ -0,0 +1,327 @@ +/* + * SonarQube Java + * Copyright (C) SonarSource Sàrl + * mailto:info AT sonarsource DOT com + * + * You can redistribute and/or modify this program under the terms of + * the Sonar Source-Available License Version 1, as published by SonarSource Sàrl. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the Sonar Source-Available License for more details. + * + * You should have received a copy of the Sonar Source-Available License + * along with this program; if not, see https://sonarsource.com/license/ssal/ + */ +package org.sonar.java.checks.regex; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import javax.annotation.Nullable; +import org.sonar.plugins.java.api.tree.ExpressionTree; +import org.sonarsource.analyzer.commons.regex.MatchType; +import org.sonarsource.analyzer.commons.regex.RegexParseResult; +import org.sonarsource.analyzer.commons.regex.ast.AtomicGroupTree; +import org.sonarsource.analyzer.commons.regex.ast.AutomatonState; +import org.sonarsource.analyzer.commons.regex.ast.BackReferenceTree; +import org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree; +import org.sonarsource.analyzer.commons.regex.ast.DisjunctionTree; +import org.sonarsource.analyzer.commons.regex.ast.DotTree; +import org.sonarsource.analyzer.commons.regex.ast.GroupTree; +import org.sonarsource.analyzer.commons.regex.ast.RegexBaseVisitor; +import org.sonarsource.analyzer.commons.regex.ast.RegexTree; +import org.sonarsource.analyzer.commons.regex.ast.RepetitionTree; +import org.sonarsource.analyzer.commons.regex.helpers.IntersectAutomataChecker; +import org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker; +import org.sonarsource.analyzer.commons.regex.helpers.SimplifiedRegexCharacterClass; +import org.sonarsource.analyzer.commons.regex.helpers.SubAutomaton; + +import static org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker.canReachWithoutConsumingInput; +import static org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker.canReachWithoutConsumingInputNorCrossingBoundaries; +import static org.sonarsource.analyzer.commons.regex.helpers.RegexTreeHelper.isAnchoredAtEnd; + +public abstract class AbstractRedosCheck extends AbstractRegexCheckTrackingMatchType { + + /** + * The maximum number of repetitions we keep track of in order to find overlapping consecutive repetitions. + * If a regex contains more repetitions than this, we will ignore some combinations of them to avoid performance + * problems (possibly causing FNs). + */ + private static final int MAX_TRACKED_REPETITIONS = 10; + + /** + * The maximum regex length that we analyze. If a regex contains more characters than this, we skip this rule to avoid + * performance problems. + */ + private static final int MAX_REGEX_LENGTH = 1000; + + protected boolean regexContainsBackReference; + protected BacktrackingType foundBacktrackingType; + + private final RegexReachabilityChecker reachabilityChecker = new RegexReachabilityChecker(false); + private final IntersectAutomataChecker intersectionChecker = new IntersectAutomataChecker(false); + + // Java 9 introduced a loop optimization that's applied to greedy repetitions in regexes that don't use capturing groups. + // Without this optimization any loop where for the same input multiple paths can be taken through the loop's body, + // has exponential runtime. With the optimization such loops, if they are greedy, have either quadratic runtime (if + // the paths go through an inner loop) or linear (i.e. safe) runtime. + // Consecutive (not nested) loops that can overlap each other cause quadratic runtime and are unaffected by this + // optimization. + enum BacktrackingType { + ALWAYS_EXPONENTIAL, + QUADRATIC_WHEN_OPTIMIZED, + ALWAYS_QUADRATIC, + LINEAR_WHEN_OPTIMIZED, + NO_ISSUE + } + + protected boolean isJava9OrHigher() { + return context.getJavaVersion().isNotSet() || context.getJavaVersion().asInt() >= 9; + } + + abstract Optional buildMessage(); + + @Override + public void checkRegex(RegexParseResult regexForLiterals, ExpressionTree methodInvocationOrAnnotation, MatchType matchType) { + if (regexForLiterals.getResult().getText().length() > MAX_REGEX_LENGTH) { + return; + } + regexContainsBackReference = false; + foundBacktrackingType = BacktrackingType.NO_ISSUE; + reachabilityChecker.clearCache(); + intersectionChecker.clearCache(); + boolean isUsedForFullMatch = matchType == MatchType.FULL || matchType == MatchType.BOTH; + boolean isUsedForPartialMatch = matchType == MatchType.PARTIAL || matchType == MatchType.BOTH; + RedosFinder visitor = new RedosFinder(regexForLiterals.getStartState(), regexForLiterals.getFinalState(), isUsedForFullMatch, isUsedForPartialMatch); + visitor.visit(regexForLiterals); + buildMessage().ifPresent(message -> + reportIssue(methodOrAnnotationName(methodInvocationOrAnnotation), message, null, Collections.emptyList()) + ); + } + + private void addBacktracking(BacktrackingType newBacktrackingType) { + if (newBacktrackingType.ordinal() < foundBacktrackingType.ordinal()) { + foundBacktrackingType = newBacktrackingType; + } + } + + private class RedosFinder extends RegexBaseVisitor { + + private final Deque nonPossessiveRepetitions = new ArrayDeque<>(); + private final Map canFailCache = new HashMap<>(); + + private final AutomatonState startOfRegex; + private final AutomatonState endOfRegex; + private final boolean isUsedForFullMatch; + private final boolean isUsedForPartialMatch; + + public RedosFinder(AutomatonState startOfRegex, AutomatonState endOfRegex, boolean isUsedForFullMatch, boolean isUsedForPartialMatch) { + this.startOfRegex = startOfRegex; + this.endOfRegex = endOfRegex; + this.isUsedForFullMatch = isUsedForFullMatch; + this.isUsedForPartialMatch = isUsedForPartialMatch; + } + + @Override + public void visitRepetition(RepetitionTree tree) { + if (canFail(tree.continuation())) { + if (!tree.isPossessive() && tree.getQuantifier().isOpenEnded()) { + new BacktrackingFinder(tree.isReluctant(), tree.continuation()).visit(tree.getElement()); + } else { + super.visitRepetition(tree); + } + checkForOverlappingRepetitions(tree); + } + } + + private void checkForOverlappingRepetitions(RepetitionTree tree) { + if (tree.getQuantifier().isOpenEnded() && canFail(tree)) { + for (RepetitionTree repetition : nonPossessiveRepetitions) { + if (reachabilityChecker.canReach(repetition, tree)) { + SubAutomaton repetitionAuto = new SubAutomaton(repetition.getElement(), repetition.continuation(), false); + SubAutomaton continuationAuto = new SubAutomaton(repetition.continuation(), tree, false); + SubAutomaton treeAuto = new SubAutomaton(tree.getElement(), tree.continuation(), false); + if (subAutomatonCanConsume(repetitionAuto, continuationAuto) + && automatonIsEmptyOrIntersects(continuationAuto, treeAuto) + && intersectionChecker.check(repetitionAuto, treeAuto)) { + addBacktracking(BacktrackingType.ALWAYS_QUADRATIC); + } + } + } + if (overlapsWithImplicitMatchAlls(tree)) { + addBacktracking(BacktrackingType.ALWAYS_QUADRATIC); + } + addIfNonPossessive(tree); + } + } + + private boolean subAutomatonCanConsume(SubAutomaton auto1, SubAutomaton auto2) { + return canReachWithoutConsumingInputNorCrossingBoundaries(auto1.end, auto2.end) + || intersectionChecker.check(auto1, auto2); + } + + private boolean automatonIsEmptyOrIntersects(SubAutomaton auto1, SubAutomaton auto2) { + return canReachWithoutConsumingInputNorCrossingBoundaries(auto1.start, auto1.end) + || intersectionChecker.check(auto1, auto2); + } + + private void addIfNonPossessive(RepetitionTree tree) { + if (!tree.isPossessive()) { + nonPossessiveRepetitions.add(tree); + if (nonPossessiveRepetitions.size() > MAX_TRACKED_REPETITIONS) { + nonPossessiveRepetitions.removeFirst(); + } + } + } + + /** + * When used for partial matches, a regex acts as if it had `(?s:.*)` attached to its beginning and end unless anchored. + */ + private boolean overlapsWithImplicitMatchAlls(RepetitionTree tree) { + return isUsedForPartialMatch && canReachWithoutConsumingInputNorCrossingBoundaries(startOfRegex, tree); + } + + @Override + public void visitBackReference(BackReferenceTree tree) { + regexContainsBackReference = true; + } + + private boolean canFail(AutomatonState state) { + return canFail(state, !isUsedForFullMatch && !isAnchoredAtEnd(state)); + } + + private boolean canFail(AutomatonState state, boolean succeedOnEnd) { + if (canFailCache.containsKey(state)) { + return canFailCache.get(state); + } + canFailCache.put(state, true); + if (state.incomingTransitionType() != AutomatonState.TransitionType.EPSILON) { + return true; + } + if (canMatchAnything(state)) { + succeedOnEnd = true; + state = state.continuation(); + } + if ((succeedOnEnd && canReachWithoutConsumingInput(state, endOfRegex))) { + canFailCache.put(state, false); + return false; + } + for (AutomatonState successor : state.successors()) { + if (!canFail(successor, succeedOnEnd)) { + canFailCache.put(state, false); + return false; + } + } + return true; + } + + private boolean canMatchAnything(AutomatonState state) { + if (!(state instanceof RepetitionTree repetition)) { + return false; + } + return repetition.getQuantifier().getMinimumRepetitions() == 0 && repetition.getQuantifier().isOpenEnded() + && canMatchAnyCharacter(repetition.getElement()); + } + + private boolean canMatchAnyCharacter(RegexTree tree) { + SimplifiedRegexCharacterClass characterClass = new SimplifiedRegexCharacterClass(); + for (RegexTree singleCharacter : collectSingleCharacters(tree, new ArrayList<>())) { + if (singleCharacter.is(RegexTree.Kind.DOT)) { + characterClass.add((DotTree) singleCharacter); + } else { + characterClass.add((CharacterClassElementTree) singleCharacter); + } + } + return characterClass.matchesAnyCharacter(); + } + + private List collectSingleCharacters(@Nullable RegexTree tree, List accumulator) { + if (tree == null) { + return accumulator; + } + if (tree instanceof CharacterClassElementTree || tree.is(RegexTree.Kind.DOT)) { + accumulator.add(tree); + } else if (tree.is(RegexTree.Kind.DISJUNCTION)) { + for (RegexTree alternative : ((DisjunctionTree) tree).getAlternatives()) { + collectSingleCharacters(alternative, accumulator); + } + } else if (tree instanceof GroupTree groupTree) { + collectSingleCharacters(groupTree.getElement(), accumulator); + } else if (tree.is(RegexTree.Kind.REPETITION)) { + RepetitionTree repetition = (RepetitionTree) tree; + if (repetition.getQuantifier().getMinimumRepetitions() <= 1) { + collectSingleCharacters(repetition.getElement(), accumulator); + } + } + return accumulator; + } + + } + + private class BacktrackingFinder extends RegexBaseVisitor { + + private final boolean isReluctant; + private final AutomatonState endOfLoop; + + public BacktrackingFinder(boolean isReluctant, AutomatonState endOfLoop) { + this.isReluctant = isReluctant; + this.endOfLoop = endOfLoop; + } + + @Override + public void visitAtomicGroup(AtomicGroupTree tree) { + new RedosFinder(tree, tree.continuation(), false, false).visit(tree); + } + + @Override + public void visitRepetition(RepetitionTree tree) { + if (tree.isPossessive()) { + new RedosFinder(tree, tree.continuation(), false, false).visit(tree); + } else if (containsIntersections(Arrays.asList(tree.getElement(), tree.continuation()))) { + BacktrackingType greedyComplexity = tree.getQuantifier().isOpenEnded() ? BacktrackingType.QUADRATIC_WHEN_OPTIMIZED : BacktrackingType.LINEAR_WHEN_OPTIMIZED; + addBacktracking(isReluctant ? BacktrackingType.ALWAYS_EXPONENTIAL : greedyComplexity); + super.visitRepetition(tree); + } else { + super.visitRepetition(tree); + } + } + + @Override + public void visitDisjunction(DisjunctionTree tree) { + if (containsIntersections(tree.getAlternatives())) { + addBacktracking(isReluctant ? BacktrackingType.ALWAYS_EXPONENTIAL : BacktrackingType.LINEAR_WHEN_OPTIMIZED); + } else { + super.visitDisjunction(tree); + } + } + + @Override + public void visitBackReference(BackReferenceTree tree) { + regexContainsBackReference = true; + } + + boolean containsIntersections(List alternatives) { + for (int i = 0; i < alternatives.size() - 1; i++) { + AutomatonState state1 = alternatives.get(i); + for (int j = i + 1; j < alternatives.size(); j++) { + AutomatonState state2 = alternatives.get(j); + SubAutomaton auto1 = new SubAutomaton(state1, endOfLoop, false); + SubAutomaton auto2 = new SubAutomaton(state2, endOfLoop, false); + if (intersectionChecker.check(auto1, auto2)) { + return true; + } + } + } + return false; + } + } + +} diff --git a/java-checks/src/main/java/org/sonar/java/checks/regex/RedosCheck.java b/java-checks/src/main/java/org/sonar/java/checks/regex/RedosCheck.java index a1d0e32fd22..c0251e561c2 100644 --- a/java-checks/src/main/java/org/sonar/java/checks/regex/RedosCheck.java +++ b/java-checks/src/main/java/org/sonar/java/checks/regex/RedosCheck.java @@ -16,343 +16,23 @@ */ package org.sonar.java.checks.regex; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Deque; -import java.util.HashMap; -import java.util.List; -import java.util.Map; import java.util.Optional; -import javax.annotation.Nullable; import org.sonar.check.Rule; -import org.sonar.plugins.java.api.tree.ExpressionTree; -import org.sonarsource.analyzer.commons.regex.MatchType; -import org.sonarsource.analyzer.commons.regex.RegexParseResult; -import org.sonarsource.analyzer.commons.regex.ast.AtomicGroupTree; -import org.sonarsource.analyzer.commons.regex.ast.AutomatonState; -import org.sonarsource.analyzer.commons.regex.ast.BackReferenceTree; -import org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree; -import org.sonarsource.analyzer.commons.regex.ast.DisjunctionTree; -import org.sonarsource.analyzer.commons.regex.ast.DotTree; -import org.sonarsource.analyzer.commons.regex.ast.GroupTree; -import org.sonarsource.analyzer.commons.regex.ast.RegexBaseVisitor; -import org.sonarsource.analyzer.commons.regex.ast.RegexTree; -import org.sonarsource.analyzer.commons.regex.ast.RepetitionTree; -import org.sonarsource.analyzer.commons.regex.helpers.IntersectAutomataChecker; -import org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker; -import org.sonarsource.analyzer.commons.regex.helpers.SimplifiedRegexCharacterClass; -import org.sonarsource.analyzer.commons.regex.helpers.SubAutomaton; - -import static org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker.canReachWithoutConsumingInput; -import static org.sonarsource.analyzer.commons.regex.helpers.RegexReachabilityChecker.canReachWithoutConsumingInputNorCrossingBoundaries; -import static org.sonarsource.analyzer.commons.regex.helpers.RegexTreeHelper.isAnchoredAtEnd; @Rule(key = "S5852") -public class RedosCheck extends AbstractRegexCheckTrackingMatchType { - - private static final String MESSAGE = "Make sure the regex used here, which is vulnerable to %s runtime due to backtracking," + - " cannot lead to denial of service%s."; - private static final String JAVA8_MESSAGE = " or make sure the code is only run using Java 9 or later"; - private static final String EXP = "exponential"; - private static final String POLY = "polynomial"; - - /** - * The maximum number of repetitions we keep track of in order to find overlapping consecutive repetitions. - * If a regex contains more repetitions than this, we will ignore some combinations of them to avoid performance - * problems (possibly causing FNs). - */ - private static final int MAX_TRACKED_REPETITIONS = 10; - - /** - * The maximum regex length that we analyze. If a regex contains more characters than this, we skip this rule to avoid - * performance problems. - */ - private static final int MAX_REGEX_LENGTH = 1000; - - private boolean regexContainsBackReference; - private BacktrackingType foundBacktrackingType; +public class RedosCheck extends AbstractRedosCheck { - private final RegexReachabilityChecker reachabilityChecker = new RegexReachabilityChecker(false); - private final IntersectAutomataChecker intersectionChecker = new IntersectAutomataChecker(false); - - // Java 9 introduced a loop optimization that's applied to greedy repetitions in regexes that don't use capturing groups. - // Without this optimization any loop where for the same input multiple paths can be taken through the loop's body, - // has exponential runtime. With the optimization such loops, if they are greedy, have either quadratic runtime (if - // the paths go through an inner loop) or linear (i.e. safe) runtime. - // Consecutive (not nested) loops that can overlap each other cause quadratic runtime and are unaffected by this - // optimization. - enum BacktrackingType { - ALWAYS_EXPONENTIAL, - QUADRATIC_WHEN_OPTIMIZED, - ALWAYS_QUADRATIC, - LINEAR_WHEN_OPTIMIZED, - NO_ISSUE - } - - private boolean isJava9OrHigher() { - return context.getJavaVersion().isNotSet() || context.getJavaVersion().asInt() >= 9; - } - - private Optional message() { - boolean canBeOptimized = !regexContainsBackReference; - boolean optimized = isJava9OrHigher() && canBeOptimized; - switch (foundBacktrackingType) { - case ALWAYS_EXPONENTIAL: - return Optional.of(String.format(MESSAGE, EXP, "")); - case QUADRATIC_WHEN_OPTIMIZED: - // We only suggest upgrading to Java 9+ when that would make the regex safe (i.e. linear runtime), not if it would - // merely improve it from exponential to quadratic. - return Optional.of(String.format(MESSAGE, optimized ? POLY : EXP, "")); - case LINEAR_WHEN_OPTIMIZED: - if (optimized) { - return Optional.empty(); - } else { - return Optional.of(String.format(MESSAGE, EXP, canBeOptimized ? JAVA8_MESSAGE : "")); - } - case ALWAYS_QUADRATIC: - return Optional.of(String.format(MESSAGE, POLY, "")); - case NO_ISSUE: - return Optional.empty(); - } - throw new IllegalStateException("This line is not actually reachable"); - } + private static final String MESSAGE = "Make sure the regex used here, which is vulnerable to " + + "exponential runtime due to backtracking, cannot lead to denial of service."; @Override - public void checkRegex(RegexParseResult regexForLiterals, ExpressionTree methodInvocationOrAnnotation, MatchType matchType) { - if (regexForLiterals.getResult().getText().length() > MAX_REGEX_LENGTH) { - return; - } - regexContainsBackReference = false; - foundBacktrackingType = BacktrackingType.NO_ISSUE; - reachabilityChecker.clearCache(); - intersectionChecker.clearCache(); - boolean isUsedForFullMatch = matchType == MatchType.FULL || matchType == MatchType.BOTH; - boolean isUsedForPartialMatch = matchType == MatchType.PARTIAL || matchType == MatchType.BOTH; - RedosFinder visitor = new RedosFinder(regexForLiterals.getStartState(), regexForLiterals.getFinalState(), isUsedForFullMatch, isUsedForPartialMatch); - visitor.visit(regexForLiterals); - message().ifPresent(message -> - reportIssue(methodOrAnnotationName(methodInvocationOrAnnotation), message, null, Collections.emptyList()) - ); - } - - private void addBacktracking(BacktrackingType newBacktrackingType) { - if (newBacktrackingType.ordinal() < foundBacktrackingType.ordinal()) { - foundBacktrackingType = newBacktrackingType; - } - } - - private class RedosFinder extends RegexBaseVisitor { - - - private final Deque nonPossessiveRepetitions = new ArrayDeque<>(); - private final Map canFailCache = new HashMap<>(); - - private final AutomatonState startOfRegex; - private final AutomatonState endOfRegex; - private final boolean isUsedForFullMatch; - private final boolean isUsedForPartialMatch; - - public RedosFinder(AutomatonState startOfRegex, AutomatonState endOfRegex, boolean isUsedForFullMatch, boolean isUsedForPartialMatch) { - this.startOfRegex = startOfRegex; - this.endOfRegex = endOfRegex; - this.isUsedForFullMatch = isUsedForFullMatch; - this.isUsedForPartialMatch = isUsedForPartialMatch; - } - - @Override - public void visitRepetition(RepetitionTree tree) { - if (canFail(tree.continuation())) { - if (!tree.isPossessive() && tree.getQuantifier().isOpenEnded()) { - new BacktrackingFinder(tree.isReluctant(), tree.continuation()).visit(tree.getElement()); - } else { - super.visitRepetition(tree); - } - checkForOverlappingRepetitions(tree); - } - } - - private void checkForOverlappingRepetitions(RepetitionTree tree) { - if (tree.getQuantifier().isOpenEnded() && canFail(tree)) { - for (RepetitionTree repetition : nonPossessiveRepetitions) { - if (reachabilityChecker.canReach(repetition, tree)) { - SubAutomaton repetitionAuto = new SubAutomaton(repetition.getElement(), repetition.continuation(), false); - SubAutomaton continuationAuto = new SubAutomaton(repetition.continuation(), tree, false); - SubAutomaton treeAuto = new SubAutomaton(tree.getElement(), tree.continuation(), false); - if (subAutomatonCanConsume(repetitionAuto, continuationAuto) - && automatonIsEmptyOrIntersects(continuationAuto, treeAuto) - && intersectionChecker.check(repetitionAuto, treeAuto)) { - addBacktracking(BacktrackingType.ALWAYS_QUADRATIC); - } - } - } - if (overlapsWithImplicitMatchAlls(tree)) { - addBacktracking(BacktrackingType.ALWAYS_QUADRATIC); - } - addIfNonPossessive(tree); - } - } - - private boolean subAutomatonCanConsume(SubAutomaton auto1, SubAutomaton auto2) { - return canReachWithoutConsumingInputNorCrossingBoundaries(auto1.end, auto2.end) - || intersectionChecker.check(auto1, auto2); - } - - private boolean automatonIsEmptyOrIntersects(SubAutomaton auto1, SubAutomaton auto2) { - return canReachWithoutConsumingInputNorCrossingBoundaries(auto1.start, auto1.end) - || intersectionChecker.check(auto1, auto2); - } - - private void addIfNonPossessive(RepetitionTree tree) { - if (!tree.isPossessive()) { - nonPossessiveRepetitions.add(tree); - if (nonPossessiveRepetitions.size() > MAX_TRACKED_REPETITIONS) { - nonPossessiveRepetitions.removeFirst(); - } - } - } - - /** - * When used for partial matches, a regex acts as if it had `(?s:.*)` attached to its beginning and end unless anchored. - */ - private boolean overlapsWithImplicitMatchAlls(RepetitionTree tree) { - return isUsedForPartialMatch && canReachWithoutConsumingInputNorCrossingBoundaries(startOfRegex, tree); - } - - @Override - public void visitBackReference(BackReferenceTree tree) { - regexContainsBackReference = true; - } - - private boolean canFail(AutomatonState state) { - return canFail(state, !isUsedForFullMatch && !isAnchoredAtEnd(state)); - } - - private boolean canFail(AutomatonState state, boolean succeedOnEnd) { - if (canFailCache.containsKey(state)) { - return canFailCache.get(state); - } - canFailCache.put(state, true); - if (state.incomingTransitionType() != AutomatonState.TransitionType.EPSILON) { - return true; - } - if (canMatchAnything(state)) { - succeedOnEnd = true; - state = state.continuation(); - } - if ((succeedOnEnd && canReachWithoutConsumingInput(state, endOfRegex))) { - canFailCache.put(state, false); - return false; - } - for (AutomatonState successor : state.successors()) { - if (!canFail(successor, succeedOnEnd)) { - canFailCache.put(state, false); - return false; - } - } - return true; - } - - private boolean canMatchAnything(AutomatonState state) { - if (!(state instanceof RepetitionTree repetition)) { - return false; - } - return repetition.getQuantifier().getMinimumRepetitions() == 0 && repetition.getQuantifier().isOpenEnded() - && canMatchAnyCharacter(repetition.getElement()); - } - - private boolean canMatchAnyCharacter(RegexTree tree) { - SimplifiedRegexCharacterClass characterClass = new SimplifiedRegexCharacterClass(); - for (RegexTree singleCharacter : collectSingleCharacters(tree, new ArrayList<>())) { - if (singleCharacter.is(RegexTree.Kind.DOT)) { - characterClass.add((DotTree) singleCharacter); - } else { - characterClass.add((CharacterClassElementTree) singleCharacter); - } - } - return characterClass.matchesAnyCharacter(); - } - - private List collectSingleCharacters(@Nullable RegexTree tree, List accumulator) { - if (tree == null) { - return accumulator; - } - if (tree instanceof CharacterClassElementTree || tree.is(RegexTree.Kind.DOT)) { - accumulator.add(tree); - } else if (tree.is(RegexTree.Kind.DISJUNCTION)) { - for (RegexTree alternative : ((DisjunctionTree) tree).getAlternatives()) { - collectSingleCharacters(alternative, accumulator); - } - } else if (tree instanceof GroupTree groupTree) { - collectSingleCharacters(groupTree.getElement(), accumulator); - } else if (tree.is(RegexTree.Kind.REPETITION)) { - RepetitionTree repetition = (RepetitionTree) tree; - if (repetition.getQuantifier().getMinimumRepetitions() <= 1) { - collectSingleCharacters(repetition.getElement(), accumulator); - } - } - return accumulator; - } - - } - - private class BacktrackingFinder extends RegexBaseVisitor { - - private final boolean isReluctant; - private final AutomatonState endOfLoop; - - public BacktrackingFinder(boolean isReluctant, AutomatonState endOfLoop) { - this.isReluctant = isReluctant; - this.endOfLoop = endOfLoop; - } - - @Override - public void visitAtomicGroup(AtomicGroupTree tree) { - new RedosFinder(tree, tree.continuation(), false, false).visit(tree); - } - - @Override - public void visitRepetition(RepetitionTree tree) { - if (tree.isPossessive()) { - new RedosFinder(tree, tree.continuation(), false, false).visit(tree); - } else if (containsIntersections(Arrays.asList(tree.getElement(), tree.continuation()))) { - BacktrackingType greedyComplexity = tree.getQuantifier().isOpenEnded() ? BacktrackingType.QUADRATIC_WHEN_OPTIMIZED : BacktrackingType.LINEAR_WHEN_OPTIMIZED; - addBacktracking(isReluctant ? BacktrackingType.ALWAYS_EXPONENTIAL : greedyComplexity); - super.visitRepetition(tree); - } else { - super.visitRepetition(tree); - } - } - - @Override - public void visitDisjunction(DisjunctionTree tree) { - if (containsIntersections(tree.getAlternatives())) { - addBacktracking(isReluctant ? BacktrackingType.ALWAYS_EXPONENTIAL : BacktrackingType.LINEAR_WHEN_OPTIMIZED); - } else { - super.visitDisjunction(tree); - } - } - - @Override - public void visitBackReference(BackReferenceTree tree) { - regexContainsBackReference = true; - } - - boolean containsIntersections(List alternatives) { - for (int i = 0; i < alternatives.size() - 1; i++) { - AutomatonState state1 = alternatives.get(i); - for (int j = i + 1; j < alternatives.size(); j++) { - AutomatonState state2 = alternatives.get(j); - SubAutomaton auto1 = new SubAutomaton(state1, endOfLoop, false); - SubAutomaton auto2 = new SubAutomaton(state2, endOfLoop, false); - if (intersectionChecker.check(auto1, auto2)) { - return true; - } - } - } - return false; - } + Optional buildMessage() { + boolean optimized = isJava9OrHigher() && !regexContainsBackReference; + return switch (foundBacktrackingType) { + case ALWAYS_EXPONENTIAL -> Optional.of(MESSAGE); + case QUADRATIC_WHEN_OPTIMIZED, LINEAR_WHEN_OPTIMIZED -> optimized ? Optional.empty() : Optional.of(MESSAGE); + default -> Optional.empty(); + }; } } diff --git a/java-checks/src/main/java/org/sonar/java/checks/regex/SuperLinearRegexCheck.java b/java-checks/src/main/java/org/sonar/java/checks/regex/SuperLinearRegexCheck.java new file mode 100644 index 00000000000..984be17b486 --- /dev/null +++ b/java-checks/src/main/java/org/sonar/java/checks/regex/SuperLinearRegexCheck.java @@ -0,0 +1,40 @@ +/* + * SonarQube Java + * Copyright (C) SonarSource Sàrl + * mailto:info AT sonarsource DOT com + * + * You can redistribute and/or modify this program under the terms of + * the Sonar Source-Available License Version 1, as published by SonarSource Sàrl. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the Sonar Source-Available License for more details. + * + * You should have received a copy of the Sonar Source-Available License + * along with this program; if not, see https://sonarsource.com/license/ssal/ + */ +package org.sonar.java.checks.regex; + +import java.util.Optional; +import org.sonar.check.Rule; + +@Rule(key = "S8786") +public class SuperLinearRegexCheck extends AbstractRedosCheck { + + private static final String MESSAGE = "Simplify this regular expression to reduce its runtime, " + + "as it has super-linear performance due to backtracking."; + + @Override + Optional buildMessage() { + boolean optimized = isJava9OrHigher() && !regexContainsBackReference; + return switch (foundBacktrackingType) { + case ALWAYS_EXPONENTIAL -> Optional.empty(); + case QUADRATIC_WHEN_OPTIMIZED -> optimized ? Optional.of(MESSAGE) : Optional.empty(); + case ALWAYS_QUADRATIC -> Optional.of(MESSAGE); + case LINEAR_WHEN_OPTIMIZED -> Optional.empty(); + case NO_ISSUE -> Optional.empty(); + }; + } + +} diff --git a/java-checks/src/test/java/org/sonar/java/checks/regex/SuperLinearRegexCheckTest.java b/java-checks/src/test/java/org/sonar/java/checks/regex/SuperLinearRegexCheckTest.java new file mode 100644 index 00000000000..9f05f093dac --- /dev/null +++ b/java-checks/src/test/java/org/sonar/java/checks/regex/SuperLinearRegexCheckTest.java @@ -0,0 +1,52 @@ +/* + * SonarQube Java + * Copyright (C) SonarSource Sàrl + * mailto:info AT sonarsource DOT com + * + * You can redistribute and/or modify this program under the terms of + * the Sonar Source-Available License Version 1, as published by SonarSource Sàrl. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the Sonar Source-Available License for more details. + * + * You should have received a copy of the Sonar Source-Available License + * along with this program; if not, see https://sonarsource.com/license/ssal/ + */ +package org.sonar.java.checks.regex; + +import org.junit.jupiter.api.Test; +import org.sonar.java.checks.verifier.CheckVerifier; + +import static org.sonar.java.checks.verifier.TestUtils.mainCodeSourcesPath; + +class SuperLinearRegexCheckTest { + + @Test + void test_java_version_unset() { + CheckVerifier.newVerifier() + .onFile(mainCodeSourcesPath("checks/regex/SuperLinearRegexCheckSample.java")) + .withCheck(new SuperLinearRegexCheck()) + .verifyIssues(); + } + + @Test + void test_java_version_9() { + CheckVerifier.newVerifier() + .withJavaVersion(9) + .onFile(mainCodeSourcesPath("checks/regex/SuperLinearRegexCheckSample.java")) + .withCheck(new SuperLinearRegexCheck()) + .verifyIssues(); + } + + @Test + void test_java_version_8() { + CheckVerifier.newVerifier() + .withJavaVersion(8) + .onFile(mainCodeSourcesPath("checks/regex/SuperLinearRegexCheckJava8.java")) + .withCheck(new SuperLinearRegexCheck()) + .verifyIssues(); + } + +} diff --git a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.html b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.html new file mode 100644 index 00000000000..f7ded426891 --- /dev/null +++ b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.html @@ -0,0 +1,42 @@ +

Regular expression engines rely on backtracking to evaluate patterns against input. Certain regex patterns lead to non-linear backtracking, where +the evaluation time grows polynomially with input size.

+

Why is this an issue?

+

Regular expression engines use backtracking to try all possible execution paths when evaluating a pattern against an input. In some cases, this +leads to non-linear backtracking where the worst-case evaluation time grows polynomially (e.g., O(n²) or O(n³)) with the input size. While not as +severe as catastrophic backtracking, such patterns can significantly degrade application performance when processing large or untrusted inputs.

+

This rule reports regular expressions that exhibit non-linear backtracking behavior.

+

How to fix it

+

To fix a regular expression with non-linear backtracking, consider the following strategies:

+
    +
  • Replace . with negated character classes to exclude separators where applicable (e.g., + instead of . before ,).
  • +
  • Use bounded quantifiers such as {1,5} to limit repetitions.
  • +
  • Restructure alternations and quantifiers to eliminate ambiguity — avoid patterns where multiple alternatives can match the same character.
  • +
  • Use possessive quantifiers (+`, `*, ?+) or atomic grouping to prevent the regex engine from keeping backtracking + positions.
  • +
+

Code examples

+

The following regular expression has polynomial backtracking: without a start anchor, the engine retries the pattern at every position, leading to +quadratic evaluation time when there is no match.

+

Noncompliant code example

+
+java.util.regex.Pattern.compile("a+b").matcher(input).find(); // Noncompliant - polynomial backtracking when the pattern does not match
+
+

Compliant solution

+

Adding a start anchor prevents the engine from retrying at every position:

+
+java.util.regex.Pattern.compile("^a+b").matcher(input).find(); // Compliant - anchor eliminates redundant backtracking positions
+
+

Resources

+

Articles & blog posts

+ +

Related rules

+
    +
  • {rule:java:S5852} - Regular expressions should not cause catastrophic backtracking
  • +
+ diff --git a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.json b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.json new file mode 100644 index 00000000000..236ff5b14de --- /dev/null +++ b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/S8786.json @@ -0,0 +1,24 @@ +{ + "title": "Regular expressions should not cause non-linear backtracking", + "type": "CODE_SMELL", + "code": { + "impacts": { + "RELIABILITY": "MEDIUM" + }, + "attribute": "EFFICIENT" + }, + "status": "ready", + "quickfix": "unknown", + "remediation": { + "func": "Constant\/Issue", + "constantCost": "20min" + }, + "tags": [ + "regex", + "performance" + ], + "defaultSeverity": "Major", + "ruleSpecification": "RSPEC-8786", + "sqKey": "S8786", + "scope": "All" +} diff --git a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_agentic_AI_profile.json b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_agentic_AI_profile.json index ce376d7f611..dae9c3b958b 100644 --- a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_agentic_AI_profile.json +++ b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_agentic_AI_profile.json @@ -470,6 +470,7 @@ "S8696", "S8714", "S8715", - "S8745" + "S8745", + "S8786" ] } diff --git a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_way_profile.json b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_way_profile.json index 31960db62d8..22ad6034de5 100644 --- a/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_way_profile.json +++ b/sonar-java-plugin/src/main/resources/org/sonar/l10n/java/rules/java/Sonar_way_profile.json @@ -538,6 +538,7 @@ "S8700", "S8714", "S8715", - "S8745" + "S8745", + "S8786" ] }