Skip to main content

nautilus_core/string/
conversions.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2026 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! String case conversions (`snake_case`, Title Case).
17
18/// Converts a string from any common case to `snake_case`.
19///
20/// Word boundaries are detected at:
21/// - Non-alphanumeric characters (spaces, hyphens, underscores, colons, etc.)
22/// - Transitions from lowercase or digit to uppercase (`camelCase` -> `camel_case`)
23/// - Within consecutive uppercase letters, before the last if followed by lowercase
24///   (`XMLParser` -> `xml_parser`)
25#[must_use]
26pub fn to_snake_case(s: &str) -> String {
27    if s.is_ascii() {
28        to_snake_case_ascii(s.as_bytes())
29    } else {
30        to_snake_case_unicode(s)
31    }
32}
33
34fn to_snake_case_ascii(bytes: &[u8]) -> String {
35    // Single pass over bytes. Mode tracks the case of the last cased character
36    // within the current alphanumeric run, matching heck's word-boundary rules.
37    const BOUNDARY: u8 = 0;
38    const LOWER: u8 = 1;
39    const UPPER: u8 = 2;
40
41    let len = bytes.len();
42    let mut result = String::with_capacity(len + len / 4);
43    let mut first_word = true;
44    let mut mode: u8 = BOUNDARY;
45    let mut word_start = 0;
46    let mut i = 0;
47
48    while i < len {
49        let b = bytes[i];
50
51        if !b.is_ascii_alphanumeric() {
52            if word_start < i {
53                push_lower_ascii(&mut result, &bytes[word_start..i], &mut first_word);
54            }
55            word_start = i + 1;
56            mode = BOUNDARY;
57            i += 1;
58            continue;
59        }
60
61        let next_mode = if b.is_ascii_lowercase() {
62            LOWER
63        } else if b.is_ascii_uppercase() {
64            UPPER
65        } else {
66            mode
67        };
68
69        if i + 1 < len && bytes[i + 1].is_ascii_alphanumeric() {
70            let next = bytes[i + 1];
71
72            if next_mode == LOWER && next.is_ascii_uppercase() {
73                push_lower_ascii(&mut result, &bytes[word_start..=i], &mut first_word);
74                word_start = i + 1;
75                mode = BOUNDARY;
76            } else if mode == UPPER && b.is_ascii_uppercase() && next.is_ascii_lowercase() {
77                if word_start < i {
78                    push_lower_ascii(&mut result, &bytes[word_start..i], &mut first_word);
79                }
80                word_start = i;
81                mode = BOUNDARY;
82            } else {
83                mode = next_mode;
84            }
85        }
86
87        i += 1;
88    }
89
90    if word_start < len && bytes[word_start].is_ascii_alphanumeric() {
91        push_lower_ascii(&mut result, &bytes[word_start..], &mut first_word);
92    }
93
94    result
95}
96
97fn push_lower_ascii(result: &mut String, word: &[u8], first_word: &mut bool) {
98    if word.is_empty() {
99        *first_word = false;
100        return;
101    }
102
103    if !*first_word {
104        result.push('_');
105    }
106    *first_word = false;
107
108    for &b in word {
109        result.push(char::from(b.to_ascii_lowercase()));
110    }
111}
112
113fn to_snake_case_unicode(s: &str) -> String {
114    #[derive(Clone, Copy, PartialEq)]
115    enum Mode {
116        Boundary,
117        Lowercase,
118        Uppercase,
119    }
120
121    let mut result = String::with_capacity(s.len() + s.len() / 4);
122    let mut first_word = true;
123
124    for word in s.split(|c: char| !c.is_alphanumeric()) {
125        let mut char_indices = word.char_indices().peekable();
126        let mut init = 0;
127        let mut mode = Mode::Boundary;
128
129        while let Some((i, c)) = char_indices.next() {
130            if let Some(&(next_i, next)) = char_indices.peek() {
131                let next_mode = if c.is_lowercase() {
132                    Mode::Lowercase
133                } else if c.is_uppercase() {
134                    Mode::Uppercase
135                } else {
136                    mode
137                };
138
139                if next_mode == Mode::Lowercase && next.is_uppercase() {
140                    push_lower_unicode(&mut result, &word[init..next_i], &mut first_word);
141                    init = next_i;
142                    mode = Mode::Boundary;
143                } else if mode == Mode::Uppercase && c.is_uppercase() && next.is_lowercase() {
144                    push_lower_unicode(&mut result, &word[init..i], &mut first_word);
145                    init = i;
146                    mode = Mode::Boundary;
147                } else {
148                    mode = next_mode;
149                }
150            } else {
151                push_lower_unicode(&mut result, &word[init..], &mut first_word);
152                break;
153            }
154        }
155    }
156
157    result
158}
159
160fn push_lower_unicode(result: &mut String, word: &str, first_word: &mut bool) {
161    if word.is_empty() {
162        *first_word = false;
163        return;
164    }
165
166    if !*first_word {
167        result.push('_');
168    }
169    *first_word = false;
170
171    for c in word.chars() {
172        for lc in c.to_lowercase() {
173            result.push(lc);
174        }
175    }
176}
177
178/// Title-cases `s` by capitalizing the first letter of each alphabetic run.
179///
180/// Mirrors Python's `str.title()`: word boundaries fall at any non-alphabetic
181/// character, the first letter of each run is uppercased, and the rest are
182/// lowercased.
183///
184/// # Examples
185///
186/// ```
187/// use nautilus_core::string::conversions::title_case;
188///
189/// assert_eq!(title_case("example"), "Example");
190/// assert_eq!(title_case("hello_world"), "Hello_World");
191/// assert_eq!(title_case("hello world"), "Hello World");
192/// assert_eq!(title_case(""), "");
193/// ```
194#[must_use]
195pub fn title_case(s: &str) -> String {
196    let mut out = String::with_capacity(s.len());
197    let mut prev_alpha = false;
198
199    for ch in s.chars() {
200        if ch.is_alphabetic() {
201            if prev_alpha {
202                out.extend(ch.to_lowercase());
203            } else {
204                out.extend(ch.to_uppercase());
205            }
206            prev_alpha = true;
207        } else {
208            out.push(ch);
209            prev_alpha = false;
210        }
211    }
212
213    out
214}
215
216#[cfg(test)]
217mod tests {
218    use rstest::rstest;
219
220    use super::*;
221
222    #[rstest]
223    #[case("CamelCase", "camel_case")]
224    #[case("This is Human case.", "this_is_human_case")]
225    #[case(
226        "MixedUP CamelCase, with some Spaces",
227        "mixed_up_camel_case_with_some_spaces"
228    )]
229    #[case(
230        "mixed_up_ snake_case with some _spaces",
231        "mixed_up_snake_case_with_some_spaces"
232    )]
233    #[case("kebab-case", "kebab_case")]
234    #[case("SHOUTY_SNAKE_CASE", "shouty_snake_case")]
235    #[case("snake_case", "snake_case")]
236    #[case("XMLHttpRequest", "xml_http_request")]
237    #[case("FIELD_NAME11", "field_name11")]
238    #[case("99BOTTLES", "99bottles")]
239    #[case("abc123def456", "abc123def456")]
240    #[case("abc123DEF456", "abc123_def456")]
241    #[case("abc123Def456", "abc123_def456")]
242    #[case("abc123DEf456", "abc123_d_ef456")]
243    #[case("ABC123def456", "abc123def456")]
244    #[case("ABC123DEF456", "abc123def456")]
245    #[case("ABC123Def456", "abc123_def456")]
246    #[case("ABC123DEf456", "abc123d_ef456")]
247    #[case("ABC123dEEf456FOO", "abc123d_e_ef456_foo")]
248    #[case("abcDEF", "abc_def")]
249    #[case("ABcDE", "a_bc_de")]
250    #[case("", "")]
251    #[case("A", "a")]
252    #[case("AB", "ab")]
253    #[case("PascalCase", "pascal_case")]
254    #[case("camelCase", "camel_case")]
255    #[case("getHTTPResponse", "get_http_response")]
256    #[case("Level1", "level1")]
257    #[case("OrderBookDelta", "order_book_delta")]
258    #[case("IOError", "io_error")]
259    #[case("SimpleHTTPServer", "simple_http_server")]
260    #[case("version2Release", "version2_release")]
261    #[case("ALLCAPS", "allcaps")]
262    #[case("nautilus_model::data::bar::Bar", "nautilus_model_data_bar_bar")] // nautilus-import-ok
263    fn test_to_snake_case(#[case] input: &str, #[case] expected: &str) {
264        assert_eq!(to_snake_case(input), expected);
265    }
266
267    #[rstest]
268    #[case("", "")]
269    #[case("a", "A")]
270    #[case("example", "Example")]
271    #[case("EXAMPLE", "Example")]
272    #[case("hello_world", "Hello_World")]
273    #[case("hello-world", "Hello-World")]
274    #[case("hello world", "Hello World")]
275    #[case("hELLO wORLD", "Hello World")]
276    #[case("123abc", "123Abc")]
277    #[case("_leading", "_Leading")]
278    fn test_title_case(#[case] input: &str, #[case] expected: &str) {
279        assert_eq!(title_case(input), expected);
280    }
281}