logicaffeine_base/
intern.rs

1//! String interning for O(1) equality comparison.
2//!
3//! Symbols are lightweight integer handles that point to interned strings.
4//! By storing each unique string exactly once and comparing integer handles,
5//! we achieve O(1) equality checks regardless of string length.
6//!
7//! ## Example
8//!
9//! ```
10//! use logicaffeine_base::{Interner, Symbol};
11//!
12//! let mut interner = Interner::new();
13//!
14//! let s1 = interner.intern("hello");
15//! let s2 = interner.intern("hello");  // Same string
16//! let s3 = interner.intern("world");  // Different string
17//!
18//! // Same strings produce same symbols (O(1) comparison)
19//! assert_eq!(s1, s2);
20//! assert_ne!(s1, s3);
21//!
22//! // Resolve back to strings when needed
23//! assert_eq!(interner.resolve(s1), "hello");
24//! ```
25//!
26//! ## Use Cases
27//!
28//! - **Variable names**: Compared frequently during scope lookup
29//! - **Keywords**: Compared during lexing and parsing
30//! - **Type names**: Compared during type checking
31
32use std::collections::HashMap;
33
34/// A lightweight handle to an interned string.
35///
36/// Symbols are `Copy` and compare in O(1) time regardless of string length.
37/// Use [`Interner::resolve`] to retrieve the original string.
38#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
39pub struct Symbol(u32);
40
41impl Symbol {
42    /// The empty string symbol, always at index 0.
43    pub const EMPTY: Symbol = Symbol(0);
44
45    /// Returns the internal index of this symbol.
46    ///
47    /// Useful for dense storage (e.g., indexing into a `Vec` instead of a `HashMap`).
48    pub fn index(self) -> usize {
49        self.0 as usize
50    }
51}
52
53impl Default for Symbol {
54    fn default() -> Self {
55        Self::EMPTY
56    }
57}
58
59/// A string interner providing O(1) equality comparison via [`Symbol`] handles.
60///
61/// Each unique string is stored exactly once. Interning the same string twice
62/// returns the same symbol, enabling fast equality checks by comparing integers.
63pub struct Interner {
64    map: HashMap<String, Symbol>,
65    vec: Vec<String>,
66}
67
68impl Interner {
69    /// Creates an interner with only the empty string pre-interned.
70    pub fn new() -> Self {
71        let mut interner = Interner {
72            map: HashMap::new(),
73            vec: Vec::new(),
74        };
75        interner.vec.push(String::new());
76        interner
77    }
78
79    /// Interns a string, returning its symbol.
80    ///
81    /// Returns the existing symbol if the string was already interned.
82    pub fn intern(&mut self, s: &str) -> Symbol {
83        if let Some(&sym) = self.map.get(s) {
84            return sym;
85        }
86        let sym = Symbol(self.vec.len() as u32);
87        self.vec.push(s.to_string());
88        self.map.insert(s.to_string(), sym);
89        sym
90    }
91
92    /// Returns the string for the given symbol.
93    ///
94    /// # Panics
95    ///
96    /// Panics if `sym` was not created by this interner.
97    pub fn resolve(&self, sym: Symbol) -> &str {
98        &self.vec[sym.0 as usize]
99    }
100
101    /// Looks up an existing interned string without creating a new entry.
102    ///
103    /// Returns `None` if the string has not been interned.
104    pub fn lookup(&self, s: &str) -> Option<Symbol> {
105        self.map.get(s).copied()
106    }
107
108    /// Returns the number of interned strings, including the empty string.
109    pub fn len(&self) -> usize {
110        self.vec.len()
111    }
112
113    /// Returns `true` if no strings have been interned (only the empty string is present).
114    pub fn is_empty(&self) -> bool {
115        self.vec.len() <= 1
116    }
117}
118
119impl Default for Interner {
120    fn default() -> Self {
121        Self::new()
122    }
123}
124
125/// Convenience trait for comparing a [`Symbol`] to a string literal.
126///
127/// Avoids the need to call `interner.resolve(sym) == "..."` repeatedly.
128pub trait SymbolEq {
129    /// Returns `true` if this symbol resolves to the given string.
130    fn is(&self, interner: &Interner, s: &str) -> bool;
131}
132
133impl SymbolEq for Symbol {
134    #[inline]
135    fn is(&self, interner: &Interner, s: &str) -> bool {
136        interner.resolve(*self) == s
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143
144    #[test]
145    fn intern_returns_same_symbol_for_same_string() {
146        let mut interner = Interner::new();
147        let s1 = interner.intern("hello");
148        let s2 = interner.intern("hello");
149        assert_eq!(s1, s2);
150    }
151
152    #[test]
153    fn intern_returns_different_symbols_for_different_strings() {
154        let mut interner = Interner::new();
155        let s1 = interner.intern("hello");
156        let s2 = interner.intern("world");
157        assert_ne!(s1, s2);
158    }
159
160    #[test]
161    fn resolve_returns_original_string() {
162        let mut interner = Interner::new();
163        let sym = interner.intern("test");
164        assert_eq!(interner.resolve(sym), "test");
165    }
166
167    #[test]
168    fn empty_symbol_resolves_to_empty_string() {
169        let interner = Interner::new();
170        assert_eq!(interner.resolve(Symbol::EMPTY), "");
171    }
172
173    #[test]
174    fn symbols_are_copy() {
175        let mut interner = Interner::new();
176        let s1 = interner.intern("copy_test");
177        let s2 = s1;
178        assert_eq!(s1, s2);
179        assert_eq!(interner.resolve(s1), interner.resolve(s2));
180    }
181
182    #[test]
183    fn symbol_equality_is_fast() {
184        let mut interner = Interner::new();
185        let s1 = interner.intern("a_very_long_string_that_would_be_slow_to_compare");
186        let s2 = interner.intern("a_very_long_string_that_would_be_slow_to_compare");
187        assert_eq!(s1, s2);
188    }
189
190    #[test]
191    fn len_tracks_interned_count() {
192        let mut interner = Interner::new();
193        assert_eq!(interner.len(), 1);
194        interner.intern("first");
195        assert_eq!(interner.len(), 2);
196        interner.intern("second");
197        assert_eq!(interner.len(), 3);
198        interner.intern("first");
199        assert_eq!(interner.len(), 3);
200    }
201
202    #[test]
203    fn is_empty_after_new() {
204        let interner = Interner::new();
205        assert!(interner.is_empty());
206    }
207
208    #[test]
209    fn not_empty_after_intern() {
210        let mut interner = Interner::new();
211        interner.intern("something");
212        assert!(!interner.is_empty());
213    }
214
215    #[test]
216    fn symbol_index_matches_position() {
217        let mut interner = Interner::new();
218        let s1 = interner.intern("first");
219        let s2 = interner.intern("second");
220        assert_eq!(s1.index(), 1);
221        assert_eq!(s2.index(), 2);
222    }
223
224    #[test]
225    fn symbol_is_matches_interned_string() {
226        let mut interner = Interner::new();
227        let sym = interner.intern("test");
228        assert!(sym.is(&interner, "test"));
229    }
230
231    #[test]
232    fn symbol_is_rejects_different_string() {
233        let mut interner = Interner::new();
234        let sym = interner.intern("hello");
235        assert!(!sym.is(&interner, "world"));
236    }
237
238    #[test]
239    fn symbol_is_case_sensitive() {
240        let mut interner = Interner::new();
241        let sym = interner.intern("Test");
242        assert!(!sym.is(&interner, "test"));
243        assert!(sym.is(&interner, "Test"));
244    }
245
246    #[test]
247    fn symbol_empty_is_empty_string() {
248        let interner = Interner::new();
249        assert!(Symbol::EMPTY.is(&interner, ""));
250    }
251}