Skip to main content

nautilus_core/string/
parsing.rs

1// -------------------------------------------------------------------------------------------------
2//  Copyright (C) 2015-2026 Nautech Systems Pty Ltd. All rights reserved.
3//  https://nautechsystems.io
4//
5//  Licensed under the GNU Lesser General Public License Version 3.0 (the "License");
6//  You may not use this file except in compliance with the License.
7//  You may obtain a copy of the License at https://www.gnu.org/licenses/lgpl-3.0.en.html
8//
9//  Unless required by applicable law or agreed to in writing, software
10//  distributed under the License is distributed on an "AS IS" BASIS,
11//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12//  See the License for the specific language governing permissions and
13//  limitations under the License.
14// -------------------------------------------------------------------------------------------------
15
16//! Core parsing functions.
17
18/// Clamps a length to `u8::MAX` with optional debug logging.
19#[inline]
20#[must_use]
21#[expect(
22    clippy::cast_possible_truncation,
23    reason = "Intentional for parsing, value range validated"
24)]
25fn clamp_precision_with_log(len: usize, context: &str, input: &str) -> u8 {
26    if len > u8::MAX as usize {
27        log::debug!(
28            "{} precision clamped from {} to {} for input: {}",
29            context,
30            len,
31            u8::MAX,
32            input
33        );
34    }
35    len.min(u8::MAX as usize) as u8
36}
37
38/// Parses a scientific notation exponent and clamps to `u8::MAX`.
39///
40/// Returns `None` for invalid/empty exponents when `strict` is false,
41/// otherwise panics for malformed input.
42#[inline]
43#[must_use]
44#[expect(
45    clippy::cast_possible_truncation,
46    reason = "value is clamped to u8::MAX before the cast"
47)]
48fn parse_scientific_exponent(exponent_str: &str, strict: bool) -> Option<u8> {
49    if let Ok(exp) = exponent_str.parse::<u64>() {
50        Some(exp.min(u64::from(u8::MAX)) as u8)
51    } else {
52        assert!(
53            !(exponent_str.is_empty() && strict),
54            "Invalid scientific notation format: missing exponent after 'e-'"
55        );
56
57        // Empty string is invalid (not a large number that overflowed)
58        if exponent_str.is_empty() {
59            return None;
60        }
61
62        // If it's all digits but overflows u64, clamp to u8::MAX
63        if exponent_str.chars().all(|c| c.is_ascii_digit()) {
64            Some(u8::MAX)
65        } else if strict {
66            panic!("Invalid scientific notation exponent '{exponent_str}': must be a valid number")
67        } else {
68            None
69        }
70    }
71}
72
73/// Returns the decimal precision inferred from the given string.
74///
75/// For scientific notation with large negative exponents (e.g., "1e-300", "1e-4294967296"),
76/// the precision is clamped to `u8::MAX` (255) since that represents the maximum representable
77/// precision in this system. This handles arbitrarily large exponents without panicking.
78///
79/// # Panics
80///
81/// Panics if the input string is malformed (e.g., "1e-" with no exponent value, or non-numeric
82/// exponents like "1e-abc").
83#[must_use]
84pub fn precision_from_str(s: &str) -> u8 {
85    let s = s.trim().to_ascii_lowercase();
86
87    // Check for scientific notation
88    if s.contains("e-") {
89        let exponent_str = s
90            .split("e-")
91            .nth(1)
92            .expect("Invalid scientific notation format: missing exponent after 'e-'");
93
94        return parse_scientific_exponent(exponent_str, true)
95            .expect("parse_scientific_exponent should return Some in strict mode");
96    }
97
98    // Check for decimal precision
99    if let Some((_, decimal_part)) = s.split_once('.') {
100        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
101    } else {
102        0
103    }
104}
105
106/// Returns the minimum increment precision inferred from the given string,
107/// ignoring trailing zeros.
108///
109/// For scientific notation with large negative exponents (e.g., "1e-300"), the precision
110/// is clamped to `u8::MAX` (255) to match the behavior of `precision_from_str`.
111#[must_use]
112pub fn min_increment_precision_from_str(s: &str) -> u8 {
113    let s = s.trim().to_ascii_lowercase();
114
115    // Check for scientific notation
116    if let Some(pos) = s.find('e')
117        && s[pos + 1..].starts_with('-')
118    {
119        let exponent_str = &s[pos + 2..];
120        // Use lenient parsing (returns 0 for invalid, doesn't panic)
121        return parse_scientific_exponent(exponent_str, false).unwrap_or(0);
122    }
123
124    // Check for decimal precision
125    if let Some(dot_pos) = s.find('.') {
126        let decimal_part = &s[dot_pos + 1..];
127        if decimal_part.chars().any(|c| c != '0') {
128            let trimmed_len = decimal_part.trim_end_matches('0').len();
129            return clamp_precision_with_log(trimmed_len, "Minimum increment", &s);
130        }
131        clamp_precision_with_log(decimal_part.len(), "Decimal", &s)
132    } else {
133        0
134    }
135}
136
137/// Returns a `usize` from the given bytes.
138///
139/// # Errors
140///
141/// Returns an error if there are not enough bytes to represent a `usize`.
142pub fn bytes_to_usize(bytes: &[u8]) -> anyhow::Result<usize> {
143    // Check bytes width
144    if bytes.len() >= std::mem::size_of::<usize>() {
145        let mut buffer = [0u8; std::mem::size_of::<usize>()];
146        buffer.copy_from_slice(&bytes[..std::mem::size_of::<usize>()]);
147
148        Ok(usize::from_le_bytes(buffer))
149    } else {
150        anyhow::bail!("Not enough bytes to represent a `usize`");
151    }
152}
153
154#[cfg(test)]
155mod tests {
156    use rstest::rstest;
157
158    use super::*;
159
160    #[rstest]
161    #[case("", 0)]
162    #[case("0", 0)]
163    #[case("1.0", 1)]
164    #[case("1.00", 2)]
165    #[case("1.23456789", 8)]
166    #[case("123456.789101112", 9)]
167    #[case("0.000000001", 9)]
168    #[case("1e-1", 1)]
169    #[case("1e-2", 2)]
170    #[case("1e-3", 3)]
171    #[case("1e8", 0)]
172    #[case("-1.23", 2)]
173    #[case("-1e-2", 2)]
174    #[case("1E-2", 2)]
175    #[case("  1.23", 2)]
176    #[case("1.23  ", 2)]
177    fn test_precision_from_str(#[case] s: &str, #[case] expected: u8) {
178        let result = precision_from_str(s);
179        assert_eq!(result, expected);
180    }
181
182    #[rstest]
183    #[case("", 0)]
184    #[case("0", 0)]
185    #[case("1.0", 1)]
186    #[case("1.00", 2)]
187    #[case("1.23456789", 8)]
188    #[case("123456.789101112", 9)]
189    #[case("0.000000001", 9)]
190    #[case("1e-1", 1)]
191    #[case("1e-2", 2)]
192    #[case("1e-3", 3)]
193    #[case("1e8", 0)]
194    #[case("-1.23", 2)]
195    #[case("-1e-2", 2)]
196    #[case("1E-2", 2)]
197    #[case("  1.23", 2)]
198    #[case("1.23  ", 2)]
199    #[case("1.010", 2)]
200    #[case("1.00100", 3)]
201    #[case("0.0001000", 4)]
202    #[case("1.000000000", 9)]
203    fn test_min_increment_precision_from_str(#[case] s: &str, #[case] expected: u8) {
204        let result = min_increment_precision_from_str(s);
205        assert_eq!(result, expected);
206    }
207
208    #[rstest]
209    fn test_bytes_to_usize_empty() {
210        let payload: Vec<u8> = vec![];
211        let result = bytes_to_usize(&payload);
212        assert!(result.is_err());
213        assert_eq!(
214            result.err().unwrap().to_string(),
215            "Not enough bytes to represent a `usize`"
216        );
217    }
218
219    #[rstest]
220    fn test_bytes_to_usize_invalid() {
221        let payload: Vec<u8> = vec![0x01, 0x02, 0x03];
222        let result = bytes_to_usize(&payload);
223        assert!(result.is_err());
224        assert_eq!(
225            result.err().unwrap().to_string(),
226            "Not enough bytes to represent a `usize`"
227        );
228    }
229
230    #[rstest]
231    fn test_bytes_to_usize_valid() {
232        let payload: Vec<u8> = vec![0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08];
233        let result = bytes_to_usize(&payload).unwrap();
234        assert_eq!(result, 0x0807_0605_0403_0201);
235        assert_eq!(result, 578_437_695_752_307_201);
236    }
237
238    #[rstest]
239    fn test_precision_from_str_large_exponent_clamped() {
240        // u8::MAX is 255, so 999 should be clamped to 255
241        let result = precision_from_str("1e-999");
242        assert_eq!(result, 255);
243    }
244
245    #[rstest]
246    fn test_precision_from_str_very_large_exponent_clamped() {
247        // Very large exponents should also be clamped to u8::MAX
248        let result = precision_from_str("1e-300");
249        assert_eq!(result, 255);
250
251        let result = precision_from_str("1e-1000000");
252        assert_eq!(result, 255);
253    }
254
255    #[rstest]
256    #[should_panic(expected = "Invalid scientific notation exponent")]
257    fn test_precision_from_str_invalid_exponent_not_numeric() {
258        let _ = precision_from_str("1e-abc");
259    }
260
261    #[rstest]
262    #[should_panic(expected = "missing exponent after 'e-'")]
263    fn test_precision_from_str_malformed_scientific_notation() {
264        // "1e-" with empty exponent should panic (fail fast on malformed input)
265        let _ = precision_from_str("1e-");
266    }
267
268    #[rstest]
269    fn test_precision_from_str_edge_case_max_u8() {
270        // u8::MAX = 255, should work
271        let result = precision_from_str("1e-255");
272        assert_eq!(result, 255);
273    }
274
275    #[rstest]
276    fn test_precision_from_str_just_above_max_u8() {
277        // 256 should be clamped to 255
278        let result = precision_from_str("1e-256");
279        assert_eq!(result, 255);
280    }
281
282    #[rstest]
283    fn test_precision_from_str_u32_overflow() {
284        // Exponent > u32::MAX (4294967296) should be clamped to 255
285        let result = precision_from_str("1e-4294967296");
286        assert_eq!(result, 255);
287    }
288
289    #[rstest]
290    fn test_precision_from_str_u64_overflow() {
291        // Exponent > u64::MAX should be clamped to 255
292        let result = precision_from_str("1e-99999999999999999999");
293        assert_eq!(result, 255);
294    }
295
296    #[rstest]
297    fn test_min_increment_precision_from_str_large_exponent() {
298        // Large exponents should be clamped to u8::MAX (255), not return 0
299        let result = min_increment_precision_from_str("1e-300");
300        assert_eq!(result, 255);
301    }
302
303    #[rstest]
304    fn test_min_increment_precision_from_str_very_large_exponent() {
305        // Very large exponents should also be clamped to 255
306        let result = min_increment_precision_from_str("1e-99999999999999999999");
307        assert_eq!(result, 255);
308    }
309
310    #[rstest]
311    fn test_min_increment_precision_from_str_consistency() {
312        // Should match precision_from_str for large exponents
313        let input = "1e-1000";
314        let precision = precision_from_str(input);
315        let min_precision = min_increment_precision_from_str(input);
316        assert_eq!(precision, min_precision);
317        assert_eq!(precision, 255);
318    }
319
320    #[rstest]
321    fn test_min_increment_precision_from_str_empty_exponent() {
322        // Empty exponent should return 0, not u8::MAX
323        let result = min_increment_precision_from_str("1e-");
324        assert_eq!(result, 0);
325    }
326}