logicaffeine_language/token.rs
1//! Token types for the LOGOS lexer and parser.
2//!
3//! This module defines the vocabulary of the LOGOS language at the token level.
4//! Tokens represent the atomic syntactic units produced by the lexer and consumed
5//! by the parser.
6//!
7//! ## Token Categories
8//!
9//! | Category | Examples | Description |
10//! |----------|----------|-------------|
11//! | **Quantifiers** | every, some, no | Bind variables over domains |
12//! | **Determiners** | the, a, this | Select referents |
13//! | **Nouns** | cat, philosopher | Predicates over individuals |
14//! | **Verbs** | runs, loves | Relations between arguments |
15//! | **Adjectives** | red, happy | Modify noun denotations |
16//! | **Connectives** | and, or, implies | Combine propositions |
17//! | **Pronouns** | he, she, it | Resolve to antecedents |
18//!
19//! ## Block Types
20//!
21//! LOGOS uses markdown-style block headers for structured documents:
22//!
23//! - `## Theorem`: Declares a proposition to be proved
24//! - `## Proof`: Contains the proof steps
25//! - `## Definition`: Introduces new terminology
26//! - `## Main`: Program entry point
27
28use logicaffeine_base::Symbol;
29use logicaffeine_lexicon::{Aspect, Case, Definiteness, Gender, Number, Time, VerbClass};
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
32pub struct Span {
33 pub start: usize,
34 pub end: usize,
35}
36
37impl Span {
38 pub fn new(start: usize, end: usize) -> Self {
39 Self { start, end }
40 }
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum PresupKind {
45 Stop,
46 Start,
47 Regret,
48 Continue,
49 Realize,
50 Know,
51}
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq)]
54pub enum FocusKind {
55 Only,
56 Even,
57 Just,
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum MeasureKind {
62 Much,
63 Little,
64}
65
66/// Document structure block type markers.
67///
68/// LOGOS uses markdown-style `## Header` syntax to delimit different
69/// sections of a program or proof document.
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum BlockType {
72 /// `## Theorem` - Declares a proposition to be proved.
73 Theorem,
74 /// `## Main` - Program entry point for imperative code.
75 Main,
76 /// `## Definition` - Introduces new terminology or type definitions.
77 Definition,
78 /// `## Proof` - Contains proof steps for a theorem.
79 Proof,
80 /// `## Example` - Illustrative examples.
81 Example,
82 /// `## Logic` - Direct logical notation input.
83 Logic,
84 /// `## Note` - Explanatory documentation.
85 Note,
86 /// `## To` - Function definition block.
87 Function,
88 /// Inline type definition: `## A Point has:` or `## A Color is one of:`.
89 TypeDef,
90 /// `## Policy` - Security policy rule definitions.
91 Policy,
92}
93
94#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum TokenType {
96 // Document Structure
97 BlockHeader { block_type: BlockType },
98
99 // Quantifiers
100 All,
101 No,
102 Some,
103 Any,
104 Both, // Correlative conjunction marker: "both X and Y"
105 Most,
106 Few,
107 Many,
108 Cardinal(u32),
109 AtLeast(u32),
110 AtMost(u32),
111
112 // Negative Polarity Items (NPIs)
113 Anything,
114 Anyone,
115 Nothing,
116 Nobody,
117 NoOne,
118 Nowhere,
119 Ever,
120 Never,
121
122 // Logical Connectives
123 And,
124 Or,
125 If,
126 Then,
127 Not,
128 Iff,
129 Because,
130
131 // Modal Operators
132 Must,
133 Shall,
134 Should,
135 Can,
136 May,
137 Cannot,
138 Would,
139 Could,
140 Might,
141 Had,
142
143 // Imperative Statement Keywords
144 Let,
145 Set,
146 Return,
147 Be,
148 While,
149 Repeat,
150 For,
151 In,
152 From,
153 Assert,
154 /// Documented assertion with justification string.
155 Trust,
156 Otherwise,
157 Call,
158 /// Constructor keyword for struct instantiation.
159 New,
160 /// Sum type definition keyword.
161 Either,
162 /// Pattern matching statement keyword.
163 Inspect,
164 /// Native function modifier for FFI bindings.
165 Native,
166
167 // Theorem Keywords
168 /// Premise marker in theorem blocks.
169 Given,
170 /// Goal marker in theorem blocks.
171 Prove,
172 /// Automatic proof strategy directive.
173 Auto,
174
175 // IO Keywords
176 /// "Read input from..."
177 Read,
178 /// "Write x to file..."
179 Write,
180 /// "...from the console"
181 Console,
182 /// "...from file..." or "...to file..."
183 File,
184
185 // Ownership Keywords (Move/Borrow Semantics)
186 /// Move ownership: "Give x to processor"
187 Give,
188 /// Immutable borrow: "Show x to console"
189 Show,
190
191 // Collection Operations
192 /// "Push x to items"
193 Push,
194 /// "Pop from items"
195 Pop,
196 /// "copy of slice" → slice.to_vec()
197 Copy,
198 /// "items 1 through 3" → inclusive slice
199 Through,
200 /// "length of items" → items.len()
201 Length,
202 /// "items at i" → `items[i]`
203 At,
204
205 // Set Operations
206 /// "Add x to set" (insert)
207 Add,
208 /// "Remove x from set"
209 Remove,
210 /// "set contains x"
211 Contains,
212 /// "a union b"
213 Union,
214 /// "a intersection b"
215 Intersection,
216
217 // Memory Management (Zones)
218 /// "Inside a new zone..."
219 Inside,
220 /// "...zone called..."
221 Zone,
222 /// "...called 'Scratch'"
223 Called,
224 /// "...of size 1 MB"
225 Size,
226 /// "...mapped from 'file.bin'"
227 Mapped,
228
229 // Structured Concurrency
230 /// "Attempt all of the following:" → concurrent (async, I/O-bound)
231 Attempt,
232 /// "the following"
233 Following,
234 /// "Simultaneously:" → parallel (CPU-bound)
235 Simultaneously,
236
237 // Agent System (Actor Model)
238 /// "Spawn a Worker called 'w1'" → create agent
239 Spawn,
240 /// "Send Ping to 'agent'" → send message to agent
241 Send,
242 /// "Await response from 'agent' into result" → receive message
243 Await,
244
245 // Serialization
246 /// "A Message is Portable and has:" → serde derives
247 Portable,
248
249 // Sipping Protocol
250 /// "the manifest of Zone" → FileSipper manifest
251 Manifest,
252 /// "the chunk at N in Zone" → FileSipper chunk
253 Chunk,
254
255 // CRDT Keywords
256 /// "A Counter is Shared and has:" → CRDT struct
257 Shared,
258 /// "Merge remote into local" → CRDT merge
259 Merge,
260 /// "Increase x's count by 10" → GCounter increment
261 Increase,
262
263 // Extended CRDT Keywords
264 /// "Decrease x's count by 5" → PNCounter decrement
265 Decrease,
266 /// "which is a Tally" → PNCounter type
267 Tally,
268 /// "which is a SharedSet of T" → ORSet type
269 SharedSet,
270 /// "which is a SharedSequence of T" → RGA type
271 SharedSequence,
272 /// "which is a CollaborativeSequence of T" → YATA type
273 CollaborativeSequence,
274 /// "which is a SharedMap from K to V" → ORMap type
275 SharedMap,
276 /// "which is a Divergent T" → MVRegister type
277 Divergent,
278 /// "Append x to seq" → RGA append
279 Append,
280 /// "Resolve x to value" → MVRegister resolve
281 Resolve,
282 /// "(RemoveWins)" → ORSet bias
283 RemoveWins,
284 /// "(AddWins)" → ORSet bias (default)
285 AddWins,
286 /// "(YATA)" → Sequence algorithm
287 YATA,
288 /// "x's values" → MVRegister values accessor
289 Values,
290
291 // Security Keywords
292 /// "Check that user is admin" → mandatory runtime guard
293 Check,
294
295 // P2P Networking Keywords
296 /// "Listen on \[addr\]" → bind to network address
297 Listen,
298 /// "Connect to \[addr\]" → dial a peer (NetConnect to avoid conflict)
299 NetConnect,
300 /// "Sleep N." → pause execution for N milliseconds
301 Sleep,
302
303 // GossipSub Keywords
304 /// "Sync x on 'topic'" → automatic CRDT replication
305 Sync,
306
307 // Persistence Keywords
308 /// "Mount x at \[path\]" → load/create persistent CRDT from journal
309 Mount,
310 /// "Persistent Counter" → type wrapped with journaling
311 Persistent,
312 /// "x combined with y" → string concatenation
313 Combined,
314
315 // Go-like Concurrency Keywords
316 /// "Launch a task to..." → spawn green thread
317 Launch,
318 /// "a task" → identifier for task context
319 Task,
320 /// "Pipe of Type" → channel creation
321 Pipe,
322 /// "Receive from pipe" → recv from channel
323 Receive,
324 /// "Stop handle" → abort task
325 Stop,
326 /// "Try to send/receive" → non-blocking variant
327 Try,
328 /// "Send value into pipe" → channel send
329 Into,
330 /// "Await the first of:" → select statement
331 First,
332 /// "After N seconds:" → timeout branch
333 After,
334
335 // Block Scoping
336 Colon,
337 Indent,
338 Dedent,
339 Newline,
340
341 // Content Words
342 Noun(Symbol),
343 Adjective(Symbol),
344 NonIntersectiveAdjective(Symbol),
345 Adverb(Symbol),
346 ScopalAdverb(Symbol),
347 TemporalAdverb(Symbol),
348 Verb {
349 lemma: Symbol,
350 time: Time,
351 aspect: Aspect,
352 class: VerbClass,
353 },
354 ProperName(Symbol),
355
356 /// Lexically ambiguous token (e.g., "fish" as noun or verb).
357 ///
358 /// The parser tries the primary interpretation first, then alternatives
359 /// if parsing fails. Used for parse forest generation.
360 Ambiguous {
361 primary: Box<TokenType>,
362 alternatives: Vec<TokenType>,
363 },
364
365 // Speech Acts (Performatives)
366 Performative(Symbol),
367 Exclamation,
368
369 // Articles (Definiteness)
370 Article(Definiteness),
371
372 // Temporal Auxiliaries
373 Auxiliary(Time),
374
375 // Copula & Functional
376 Is,
377 Are,
378 Was,
379 Were,
380 That,
381 Who,
382 What,
383 Where,
384 When,
385 Why,
386 Does,
387 Do,
388
389 // Identity & Reflexive (FOL)
390 Identity,
391 Equals,
392 Reflexive,
393 Reciprocal,
394 /// Pairwise list coordination: "A and B respectively love C and D"
395 Respectively,
396
397 // Pronouns (Discourse)
398 Pronoun {
399 gender: Gender,
400 number: Number,
401 case: Case,
402 },
403
404 // Prepositions (for N-ary relations)
405 Preposition(Symbol),
406
407 // Phrasal Verb Particles (up, down, out, in, off, on, away)
408 Particle(Symbol),
409
410 // Comparatives & Superlatives (Pillar 3 - Degree Semantics)
411 Comparative(Symbol),
412 Superlative(Symbol),
413 Than,
414
415 // Control Verbs (Chomsky's Control Theory)
416 To,
417
418 // Presupposition Triggers (Austin/Strawson)
419 PresupTrigger(PresupKind),
420
421 // Focus Particles (Rooth)
422 Focus(FocusKind),
423
424 // Mass Noun Measure
425 Measure(MeasureKind),
426
427 // Numeric Literals (prover-ready: stores raw string for symbolic math)
428 Number(Symbol),
429
430 /// String literal: `"hello world"`
431 StringLiteral(Symbol),
432
433 // Character literal: `x` (backtick syntax)
434 CharLiteral(Symbol),
435
436 // Index Access (1-indexed)
437 Item,
438 Items,
439
440 // Possession (Genitive Case)
441 Possessive,
442
443 // Punctuation
444 LParen,
445 RParen,
446 LBracket,
447 RBracket,
448 Comma,
449 Period,
450
451 // Arithmetic Operators
452 Plus,
453 Minus,
454 Star,
455 Slash,
456 Percent, // Modulo operator
457
458 // Comparison Operators
459 /// `<`
460 Lt,
461 /// `>`
462 Gt,
463 /// `<=`
464 LtEq,
465 /// `>=`
466 GtEq,
467 /// `==`
468 EqEq,
469 /// `!=`
470 NotEq,
471
472 /// Arrow for return type syntax: `->`
473 Arrow,
474
475 EOF,
476}
477
478#[derive(Debug, Clone)]
479pub struct Token {
480 pub kind: TokenType,
481 pub lexeme: Symbol,
482 pub span: Span,
483}
484
485impl Token {
486 pub fn new(kind: TokenType, lexeme: Symbol, span: Span) -> Self {
487 Token { kind, lexeme, span }
488 }
489}
490
491impl TokenType {
492 pub const WH_WORDS: &'static [TokenType] = &[
493 TokenType::Who,
494 TokenType::What,
495 TokenType::Where,
496 TokenType::When,
497 TokenType::Why,
498 ];
499
500 pub const MODALS: &'static [TokenType] = &[
501 TokenType::Must,
502 TokenType::Shall,
503 TokenType::Should,
504 TokenType::Can,
505 TokenType::May,
506 TokenType::Cannot,
507 TokenType::Would,
508 TokenType::Could,
509 TokenType::Might,
510 ];
511}
512
513#[cfg(test)]
514mod tests {
515 use super::*;
516
517 #[test]
518 fn span_new_stores_positions() {
519 let span = Span::new(5, 10);
520 assert_eq!(span.start, 5);
521 assert_eq!(span.end, 10);
522 }
523
524 #[test]
525 fn span_default_is_zero() {
526 let span = Span::default();
527 assert_eq!(span.start, 0);
528 assert_eq!(span.end, 0);
529 }
530
531 #[test]
532 fn token_has_span_field() {
533 use logicaffeine_base::Interner;
534 let mut interner = Interner::new();
535 let lexeme = interner.intern("test");
536 let token = Token::new(TokenType::Noun(lexeme), lexeme, Span::new(0, 4));
537 assert_eq!(token.span.start, 0);
538 assert_eq!(token.span.end, 4);
539 }
540
541 #[test]
542 fn wh_words_contains_all_wh_tokens() {
543 assert_eq!(TokenType::WH_WORDS.len(), 5);
544 assert!(TokenType::WH_WORDS.contains(&TokenType::Who));
545 assert!(TokenType::WH_WORDS.contains(&TokenType::What));
546 assert!(TokenType::WH_WORDS.contains(&TokenType::Where));
547 assert!(TokenType::WH_WORDS.contains(&TokenType::When));
548 assert!(TokenType::WH_WORDS.contains(&TokenType::Why));
549 }
550
551 #[test]
552 fn modals_contains_all_modal_tokens() {
553 assert_eq!(TokenType::MODALS.len(), 9);
554 assert!(TokenType::MODALS.contains(&TokenType::Must));
555 assert!(TokenType::MODALS.contains(&TokenType::Shall));
556 assert!(TokenType::MODALS.contains(&TokenType::Should));
557 assert!(TokenType::MODALS.contains(&TokenType::Can));
558 assert!(TokenType::MODALS.contains(&TokenType::May));
559 assert!(TokenType::MODALS.contains(&TokenType::Cannot));
560 assert!(TokenType::MODALS.contains(&TokenType::Would));
561 assert!(TokenType::MODALS.contains(&TokenType::Could));
562 assert!(TokenType::MODALS.contains(&TokenType::Might));
563 }
564}