|
1 | 1 | //! # string_pipeline |
2 | 2 | //! |
3 | | -//! A flexible, template-driven string transformation pipeline for Rust. |
| 3 | +//! A powerful string transformation CLI tool and Rust library that makes complex text processing simple. |
| 4 | +//! Transform data using intuitive **template syntax** — chain operations like **split**, **join**, **replace**, |
| 5 | +//! **filter**, and **20+ others** in a single readable expression. |
4 | 6 | //! |
5 | | -//! This library provides a way to define a sequence of string operations using a concise template syntax, |
6 | | -//! allowing for dynamic string manipulation based on user-defined templates. |
| 7 | +//! ## Features |
| 8 | +//! |
| 9 | +//! - **🔗 Chainable Operations**: Pipe operations together naturally |
| 10 | +//! - **🎯 Precise Control**: Python-like ranges with Rust syntax (`-2..`, `1..=3`) |
| 11 | +//! - **🗺️ Powerful Mapping**: Apply sub-pipelines to each list item |
| 12 | +//! - **🔍 Regex Support**: sed-like patterns for complex transformations |
| 13 | +//! - **🐛 Debug Mode**: Step-by-step operation visualization |
| 14 | +//! - **📥 Flexible I/O**: CLI tool + embeddable Rust library |
| 15 | +//! - **🦀 Performance optimized**: Zero-copy operations where possible, efficient memory usage |
| 16 | +//! - **🌍 Unicode support**: Full UTF-8 and Unicode character handling |
| 17 | +//! - **🛡️ Error handling**: Comprehensive error reporting for invalid operations |
| 18 | +//! |
| 19 | +//! ## Quick Start |
7 | 20 | //! |
8 | | -//! # Quick start |
9 | 21 | //! ```rust |
10 | 22 | //! use string_pipeline::Template; |
11 | 23 | //! |
12 | | -//! // Define a template with operations |
| 24 | +//! // Split by comma, take first 2 items, join with " and " |
13 | 25 | //! let template = Template::parse("{split:,:0..2|join: and }").unwrap(); |
| 26 | +//! let result = template.format("apple,banana,cherry,date").unwrap(); |
| 27 | +//! assert_eq!(result, "apple and banana"); |
| 28 | +//! ``` |
14 | 29 | //! |
15 | | -//! // Format a string using the template |
16 | | -//! let result = template.format("a,b,c,d").unwrap(); |
| 30 | +//! ## Template Syntax Overview |
17 | 31 | //! |
18 | | -//! assert_eq!(result, "a and b"); |
| 32 | +//! Templates are enclosed in `{}` and consist of operations separated by `|`: |
| 33 | +//! |
| 34 | +//! ```text |
| 35 | +//! {operation1|operation2|operation3} |
19 | 36 | //! ``` |
20 | 37 | //! |
21 | | -//! A more in-depth view of the template syntax can be found in the [Template::parse](Template::parse) method documentation. |
| 38 | +//! ### Core Operations (20+ Available) |
| 39 | +//! |
| 40 | +//! **🔪 Text Splitting & Joining** |
| 41 | +//! - **`split:sep:range`** - Split text and optionally select range |
| 42 | +//! - **`join:sep`** - Join list items with separator |
| 43 | +//! - **`slice:range`** - Select list elements by range |
| 44 | +//! |
| 45 | +//! **✨ Text Transformation** |
| 46 | +//! - **`upper`**, **`lower`** - Case conversion |
| 47 | +//! - **`trim[:chars][:direction]`** - Remove whitespace or custom characters |
| 48 | +//! - **`append:text`**, **`prepend:text`** - Add text to ends |
| 49 | +//! - **`pad:width[:char][:direction]`** - Pad string to width |
| 50 | +//! - **`substring:range`** - Extract characters from string |
| 51 | +//! |
| 52 | +//! **🔍 Pattern Matching & Replacement** |
| 53 | +//! - **`replace:s/pattern/replacement/flags`** - Regex find/replace (sed-like) |
| 54 | +//! - **`regex_extract:pattern[:group]`** - Extract with regex pattern |
| 55 | +//! - **`filter:pattern`** - Keep items matching regex |
| 56 | +//! - **`filter_not:pattern`** - Remove items matching regex |
| 57 | +//! |
| 58 | +//! **🗂️ List Processing** |
| 59 | +//! - **`sort[:asc|desc]`** - Sort items alphabetically |
| 60 | +//! - **`reverse`** - Reverse string or list order |
| 61 | +//! - **`unique`** - Remove duplicate list items |
| 62 | +//! - **`map:{operations}`** - Apply sub-pipeline to each list item |
| 63 | +//! |
| 64 | +//! **🧹 Utility Operations** |
| 65 | +//! - **`strip_ansi`** - Remove ANSI escape sequences |
| 66 | +//! |
| 67 | +//! ### Range Syntax |
| 68 | +//! |
| 69 | +//! Supports Rust-like syntax with negative indexing: |
| 70 | +//! |
| 71 | +//! - **`N`** - Single index (`1` = second item) |
| 72 | +//! - **`N..M`** - Range exclusive (`1..3` = items 1,2) |
| 73 | +//! - **`N..=M`** - Range inclusive (`1..=3` = items 1,2,3) |
| 74 | +//! - **`N..`** - From N to end |
| 75 | +//! - **`..M`** - From start to M-1 |
| 76 | +//! - **`..`** - All items |
| 77 | +//! |
| 78 | +//! Negative indices count from end (`-1` = last item). |
| 79 | +//! |
| 80 | +//! ### Debug Mode |
| 81 | +//! |
| 82 | +//! Add `!` after opening `{` to enable detailed operation tracing: |
22 | 83 | //! |
23 | | -//! # More examples |
24 | | -//! Get the second item in a comma-separated list: |
25 | 84 | //! ```rust |
26 | 85 | //! use string_pipeline::Template; |
27 | 86 | //! |
28 | | -//! let template = Template::parse("{split:,:1}").unwrap(); |
29 | | -//! |
| 87 | +//! let template = Template::parse("{!split:,:..}").unwrap(); |
| 88 | +//! // Outputs debug information to stderr during processing |
30 | 89 | //! let result = template.format("a,b,c").unwrap(); |
| 90 | +//! assert_eq!(result, "a,b,c"); |
| 91 | +//! ``` |
| 92 | +//! |
| 93 | +//! ## Common Use Cases |
| 94 | +//! |
| 95 | +//! ### Basic Text Processing |
| 96 | +//! ```rust |
| 97 | +//! use string_pipeline::Template; |
| 98 | +//! |
| 99 | +//! // Clean and normalize text |
| 100 | +//! let cleaner = Template::parse("{trim|replace:s/\\s+/ /g|lower}").unwrap(); |
| 101 | +//! let result = cleaner.format(" Hello WORLD ").unwrap(); |
| 102 | +//! assert_eq!(result, "hello world"); |
| 103 | +//! ``` |
31 | 104 | //! |
32 | | -//! assert_eq!(result, "b"); |
| 105 | +//! ### Data Extraction |
| 106 | +//! ```rust |
| 107 | +//! use string_pipeline::Template; |
| 108 | +//! |
| 109 | +//! // Extract second field from space-separated data |
| 110 | +//! let extractor = Template::parse("{split: :1}").unwrap(); |
| 111 | +//! let result = extractor.format("user 1234 active").unwrap(); |
| 112 | +//! assert_eq!(result, "1234"); |
33 | 113 | //! ``` |
34 | 114 | //! |
35 | | -//! Replace all spaces with underscores and uppercase: |
| 115 | +//! ### List Processing with Map |
36 | 116 | //! ```rust |
37 | 117 | //! use string_pipeline::Template; |
38 | 118 | //! |
39 | | -//! let template = Template::parse("{replace:s/ /_/g|upper}").unwrap(); |
| 119 | +//! // Process each item in a list |
| 120 | +//! let processor = Template::parse("{split:,:..|map:{trim|upper}|join:\\|}").unwrap(); |
| 121 | +//! let result = processor.format(" apple, banana , cherry ").unwrap(); |
| 122 | +//! assert_eq!(result, "APPLE|BANANA|CHERRY"); |
| 123 | +//! ``` |
40 | 124 | //! |
41 | | -//! let result = template.format("foo bar baz").unwrap(); |
| 125 | +//! ### Advanced Data Processing |
| 126 | +//! ```rust |
| 127 | +//! use string_pipeline::Template; |
42 | 128 | //! |
43 | | -//! assert_eq!(result, "FOO_BAR_BAZ"); |
| 129 | +//! // Extract domains from URLs |
| 130 | +//! let domain_extractor = Template::parse("{split:,:..|map:{regex_extract://([^/]+):1|upper}}").unwrap(); |
| 131 | +//! let result = domain_extractor.format("https://github.com,https://google.com").unwrap(); |
| 132 | +//! assert_eq!(result, "GITHUB.COM,GOOGLE.COM"); |
44 | 133 | //! ``` |
45 | 134 | //! |
46 | | -//! Trim, split and append a suffix to each resulting item: |
| 135 | +//! ### Log Processing |
47 | 136 | //! ```rust |
48 | 137 | //! use string_pipeline::Template; |
49 | 138 | //! |
50 | | -//! let template = Template::parse("{split:,:..|map:{trim|append:!}}").unwrap(); |
| 139 | +//! // Extract timestamps from log entries |
| 140 | +//! let log_parser = Template::parse(r"{split:\n:..|map:{regex_extract:\d\d\d\d-\d\d-\d\d}|filter_not:^$|join:\n}").unwrap(); |
| 141 | +//! let logs = "2023-12-01 ERROR: Failed\n2023-12-02 INFO: Success\nInvalid line"; |
| 142 | +//! let result = log_parser.format(logs).unwrap(); |
| 143 | +//! assert_eq!(result, "2023-12-01\n2023-12-02"); |
| 144 | +//! ``` |
51 | 145 | //! |
52 | | -//! let result = template.format(" a, b,c , d , e ").unwrap(); |
| 146 | +//! ### Filter Operations |
| 147 | +//! ```rust |
| 148 | +//! use string_pipeline::Template; |
53 | 149 | //! |
54 | | -//! assert_eq!(result, "a!,b!,c!,d!,e!"); |
| 150 | +//! // Filter files by extension |
| 151 | +//! let py_filter = Template::parse("{split:,:..|filter:\\.py$|sort|join:\\n}").unwrap(); |
| 152 | +//! let files = "app.py,readme.md,test.py,data.json"; |
| 153 | +//! let result = py_filter.format(files).unwrap(); |
| 154 | +//! assert_eq!(result, "app.py\ntest.py"); |
55 | 155 | //! ``` |
56 | 156 | //! |
57 | | -//! Strip ANSI escape codes: |
| 157 | +//! ## Type System |
| 158 | +//! |
| 159 | +//! The pipeline system has a clear type system that distinguishes between: |
| 160 | +//! - **String operations**: Work only on strings (e.g., `upper`, `lower`, `trim`, `replace`) |
| 161 | +//! - **List operations**: Work only on lists (e.g., `sort`, `unique`, `slice`) |
| 162 | +//! - **Type-preserving operations**: Accept both types (e.g., `filter`, `reverse`) |
| 163 | +//! - **Type-converting operations**: Change between types (e.g., `split` converts string→list, `join` converts list→string) |
| 164 | +//! |
| 165 | +//! Use `map:{operation}` to apply string operations to each item in a list. |
| 166 | +//! |
| 167 | +//! ## Error Handling |
| 168 | +//! |
| 169 | +//! All operations return `Result<String, String>` for comprehensive error handling: |
| 170 | +//! |
58 | 171 | //! ```rust |
59 | 172 | //! use string_pipeline::Template; |
60 | 173 | //! |
61 | | -//! let template = Template::parse("{strip_ansi}").unwrap(); |
| 174 | +//! // Invalid template syntax |
| 175 | +//! let result = Template::parse("{split:}"); |
| 176 | +//! assert!(result.is_err()); |
| 177 | +//! |
| 178 | +//! // Type mismatch errors are clear and helpful |
| 179 | +//! let template = Template::parse("{sort}").unwrap(); |
| 180 | +//! let result = template.format("not_a_list"); |
| 181 | +//! assert!(result.is_err()); |
| 182 | +//! // Error: "sort operation can only be applied to lists. Use split first." |
| 183 | +//! ``` |
| 184 | +//! |
| 185 | +//! ## Performance Notes |
| 186 | +//! |
| 187 | +//! - Templates are compiled once and can be reused efficiently |
| 188 | +//! - Operations use zero-copy techniques where possible |
| 189 | +//! - Large datasets are processed with optimized algorithms |
| 190 | +//! - Regex patterns are compiled and cached internally |
| 191 | +//! - Memory allocation is minimized for common operations |
| 192 | +//! - Early exit optimizations for string operations when patterns don't match |
62 | 193 | //! |
63 | | -//! let result = template.format("\x1b[31mHello\x1b[0m").unwrap(); |
| 194 | +//! For high-throughput applications, compile templates once and reuse them: |
64 | 195 | //! |
65 | | -//! assert_eq!(result, "Hello"); |
| 196 | +//! ```rust |
| 197 | +//! use string_pipeline::Template; |
| 198 | +//! |
| 199 | +//! // Compile once |
| 200 | +//! let template = Template::parse("{split:,:0}").unwrap(); |
| 201 | +//! |
| 202 | +//! // Reuse many times |
| 203 | +//! for input in &["a,b,c", "x,y,z", "1,2,3"] { |
| 204 | +//! let result = template.format(input).unwrap(); |
| 205 | +//! println!("{}", result); |
| 206 | +//! } |
66 | 207 | //! ``` |
| 208 | +//! |
| 209 | +//! For complete documentation including all operations, advanced features, and debugging techniques, |
| 210 | +//! see the [`Template`] documentation and the comprehensive guides in the `docs/` directory. |
67 | 211 |
|
68 | 212 | mod pipeline; |
69 | 213 |
|
|
0 commit comments