Skip to content

Commit 1b36fc2

Browse files
committed
Adding unix filename pattern matching to GNATCOLL
TN: T915-017 Change-Id: Ie1fabaccdaece97fc2d18a913172c19027a18409
1 parent 920985a commit 1b36fc2

File tree

2 files changed

+261
-1
lines changed

2 files changed

+261
-1
lines changed

src/gnatcoll-utils.adb

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,259 @@ package body GNATCOLL.Utils is
869869
return Str'Last;
870870
end Line_End;
871871

872+
-----------
873+
-- Match --
874+
-----------
875+
876+
function Match (Str : String; Pattern : String) return Boolean
877+
is
878+
879+
-- P_I and S_I are index to next character to read in respectively
880+
-- Pattern and String
881+
P_I : Natural := Pattern'First;
882+
S_I : Natural := Str'First;
883+
884+
-- P_C and S_C contain the current character to be processed for
885+
-- Pattern and String
886+
P_C : Character;
887+
S_C : Character;
888+
889+
procedure Next_Str_Char;
890+
-- Read the next chracter in String. Adjust S_C and S_I. When no
891+
-- character is available in String then S_C is set to ASCII.NUL.
892+
pragma Inline (Next_Str_Char);
893+
894+
procedure Next_Pattern_Char;
895+
-- Read the next chracter in Pattern. Adjust P_C and P_I. When no
896+
-- character is available in Pattern then P_C is set to ASCII.NUL.
897+
pragma Inline (Next_Pattern_Char);
898+
899+
function Lookahead_Pattern
900+
(N : Positive)
901+
return Character;
902+
-- Return character which is at N position of the current one without
903+
-- updating P_I
904+
905+
function Bracket_Match return Boolean;
906+
-- Return True if next character in Str match a bracket expression.
907+
908+
-------------------
909+
-- Bracket_Match --
910+
-------------------
911+
912+
function Bracket_Match return Boolean is
913+
Saved_P_I : constant Natural := P_I;
914+
Is_Negation : Boolean := False;
915+
Has_Closing_Bracket : Boolean := False;
916+
Result : Boolean := False;
917+
begin
918+
919+
if Lookahead_Pattern (1) in '^' | '!' then
920+
Is_Negation := True;
921+
Next_Pattern_Char;
922+
end if;
923+
924+
loop
925+
Next_Pattern_Char;
926+
case P_C is
927+
when ASCII.NUL =>
928+
exit;
929+
when ']' =>
930+
Has_Closing_Bracket := True;
931+
exit;
932+
when others =>
933+
-- This is either a character match or a range
934+
declare
935+
Range_Start : constant Character := P_C;
936+
Range_End : Character := P_C;
937+
begin
938+
if Lookahead_Pattern (1) = '-' and then
939+
Lookahead_Pattern (2) not in ASCII.NUL | ']'
940+
then
941+
-- this is a range
942+
Next_Pattern_Char;
943+
Next_Pattern_Char;
944+
Range_End := P_C;
945+
end if;
946+
if S_C in Range_Start .. Range_End then
947+
Result := True;
948+
exit;
949+
end if;
950+
end;
951+
end case;
952+
end loop;
953+
954+
if Result then
955+
-- Go to next closing bracket
956+
while P_C /= ASCII.NUL loop
957+
Next_Pattern_Char;
958+
if P_C = ']' then
959+
Has_Closing_Bracket := True;
960+
exit;
961+
end if;
962+
end loop;
963+
end if;
964+
965+
if not Has_Closing_Bracket then
966+
P_I := Saved_P_I;
967+
return S_C = '[';
968+
else
969+
if Is_Negation then
970+
return not Result;
971+
else
972+
return Result;
973+
end if;
974+
end if;
975+
end Bracket_Match;
976+
977+
-----------------------
978+
-- Lookahead_Pattern --
979+
-----------------------
980+
981+
function Lookahead_Pattern
982+
(N : Positive)
983+
return Character
984+
is
985+
begin
986+
987+
if P_I + N - 1 <= Pattern'Last then
988+
return Pattern (P_I + N - 1);
989+
else
990+
return ASCII.NUL;
991+
end if;
992+
end Lookahead_Pattern;
993+
994+
-----------------------
995+
-- Next_Pattern_Char --
996+
-----------------------
997+
998+
procedure Next_Pattern_Char is
999+
begin
1000+
if P_I <= Pattern'Last then
1001+
P_C := Pattern (P_I);
1002+
P_I := P_I + 1;
1003+
else
1004+
P_C := ASCII.NUL;
1005+
end if;
1006+
end Next_Pattern_Char;
1007+
1008+
-------------------
1009+
-- Next_Str_Char --
1010+
-------------------
1011+
1012+
procedure Next_Str_Char is
1013+
begin
1014+
if S_I <= Str'Last then
1015+
S_C := Str (S_I);
1016+
S_I := S_I + 1;
1017+
else
1018+
S_C := ASCII.NUL;
1019+
end if;
1020+
end Next_Str_Char;
1021+
1022+
begin
1023+
1024+
-- Handle special cases in which Pattern is an empty string
1025+
if Pattern'Length = 0 then
1026+
if Str'Length = 0 then
1027+
return True;
1028+
else
1029+
return False;
1030+
end if;
1031+
end if;
1032+
1033+
while P_I <= Pattern'Last loop
1034+
Next_Pattern_Char;
1035+
1036+
case P_C is
1037+
when '?' =>
1038+
-- A '?' is a pattern that shall match any character.
1039+
-- The only case when match fails is when there is no more
1040+
-- character to read from Str.
1041+
Next_Str_Char;
1042+
if S_C = ASCII.NUL then
1043+
return False;
1044+
end if;
1045+
1046+
when '\' =>
1047+
-- Special characters can be escaped to remove their special
1048+
-- meaning by preceding them with a '\' character.
1049+
-- This escaping '\' is discarded. The sequence "\\"
1050+
-- represents one literal '\'.
1051+
1052+
-- Trailing backslash in the pattern result in an invalid
1053+
-- pattern.
1054+
if P_I > Pattern'Last then
1055+
return False;
1056+
end if;
1057+
1058+
Next_Pattern_Char;
1059+
Next_Str_Char;
1060+
1061+
if S_C /= P_C then
1062+
return False;
1063+
end if;
1064+
1065+
when '[' =>
1066+
Next_Str_Char;
1067+
if not Bracket_Match then
1068+
return False;
1069+
end if;
1070+
1071+
when '*' =>
1072+
-- Collapse multiple successive '?' and '*'.
1073+
-- For successive ' * ' there is nothing to do.
1074+
-- For '?' only a character on Str is needed.
1075+
while Lookahead_Pattern (1) in '*' | '?' loop
1076+
1077+
Next_Pattern_Char;
1078+
1079+
if P_C = '?' then
1080+
if S_C = ASCII.NUL then
1081+
return False;
1082+
end if;
1083+
1084+
Next_Str_Char;
1085+
end if;
1086+
1087+
end loop;
1088+
1089+
-- If we reach the end of the Pattern then match is ensured.
1090+
if P_I > Pattern'Last then
1091+
return True;
1092+
end if;
1093+
1094+
-- Try all string suffixes against remaining part of the
1095+
-- pattern. This simulates how many characters are consumed
1096+
-- by the '*'. (0, 1, ...).
1097+
-- The recursion is not infinite as next character in the
1098+
-- pattern will consume a character from Str
1099+
-- (i.e next character in Pattern is not a ' * ').
1100+
1101+
for Index in S_I .. Str'Last loop
1102+
if Match
1103+
(Str (Index .. Str'Last),
1104+
Pattern (P_I .. Pattern'Last))
1105+
then
1106+
return True;
1107+
end if;
1108+
end loop;
1109+
1110+
return False;
1111+
1112+
when others =>
1113+
Next_Str_Char;
1114+
if S_C /= P_C then
1115+
return False;
1116+
end if;
1117+
end case;
1118+
1119+
end loop;
1120+
1121+
return S_I > Str'Last;
1122+
1123+
end Match;
1124+
8721125
---------------
8731126
-- Next_Line --
8741127
---------------

src/gnatcoll-utils.ads

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
------------------------------------------------------------------------------
22
-- G N A T C O L L --
33
-- --
4-
-- Copyright (C) 2008-2020, AdaCore --
4+
-- Copyright (C) 2008-2022, AdaCore --
55
-- --
66
-- This library is free software; you can redistribute it and/or modify it --
77
-- under terms of the GNU General Public License as published by the Free --
@@ -234,6 +234,13 @@ package GNATCOLL.Utils is
234234
-- languages). It doesn't check whether the identifier starts with an
235235
-- underscore for instance, just whether the characters would be valid.
236236

237+
function Match (Str : String; Pattern : String) return Boolean;
238+
-- Implementation of POSIX pattern matching
239+
--
240+
-- @param Str string to check
241+
-- @param Pattern a posix pattern
242+
-- @return True if Str matches Pattern, False otherwise
243+
237244
------------
238245
-- Expect --
239246
------------

0 commit comments

Comments
 (0)