diff --git a/forge-script-lang/src/pratt/mod.rs b/forge-script-lang/src/pratt/mod.rs index 80bf7ef391fddd11ee8e8547bdc63455ffc77c24..369e4b9c217415ba202104b014cc00f4c6a2aa53 100644 --- a/forge-script-lang/src/pratt/mod.rs +++ b/forge-script-lang/src/pratt/mod.rs @@ -2,3 +2,8 @@ mod parser; #[cfg(test)] mod test_cases; + +pub use parser::{ + PositionState, Scanner, ScannerError, ScannerErrorKind, ScannerIter, ScannerResult, + ScannerToken, TokenSpan, +}; diff --git a/forge-script-lang/src/pratt/parser.rs b/forge-script-lang/src/pratt/parser.rs index 93f49fee14767448ce30f1eecd9ed6a5b2d1d9c3..632948ee46d7b685dbb19ff62b5f58bb63543816 100644 --- a/forge-script-lang/src/pratt/parser.rs +++ b/forge-script-lang/src/pratt/parser.rs @@ -90,8 +90,19 @@ pub struct Scanner<'a> { pub enum ScannerErrorKind { BadIdentifier, UnexpectedEof, - UnexpectedToken { span: TokenSpan }, - InvalidLiteral { ltype: &'static str }, + UnexpectedToken { + found: ScriptTokenType, + }, + ExpectedDifferentToken { + found: ScriptTokenType, + expected: Vec<ScriptTokenType>, + }, + UnexpectedChar { + character: char, + }, + InvalidLiteral { + ltype: &'static str, + }, } #[derive(Clone, Debug, PartialEq)] @@ -163,7 +174,7 @@ impl<'a> Scanner<'a> { self.position.offset == self.source.len() } - pub fn scan_token(&'a mut self) -> ScannerResult { + pub fn scan_token(&mut self) -> ScannerResult { self.scan_state = ScanningState::new(self.position.offset); if self.is_finished() { @@ -208,8 +219,8 @@ impl<'a> Scanner<'a> { Ok(self.tokenise(ScriptTokenType::DoubleAmpersand)) } else { Err(ScannerError { - kind: ScannerErrorKind::UnexpectedToken { - span: gen_token_span(self), + kind: ScannerErrorKind::UnexpectedChar { + character: self.peek().unwrap_or('\0'), }, position: self.position, }) @@ -220,8 +231,8 @@ impl<'a> Scanner<'a> { Ok(self.tokenise(ScriptTokenType::DoublePipe)) } else { Err(ScannerError { - kind: ScannerErrorKind::UnexpectedToken { - span: gen_token_span(self), + kind: ScannerErrorKind::UnexpectedChar { + character: self.peek().unwrap_or('\0'), }, position: self.position, }) @@ -254,11 +265,8 @@ impl<'a> Scanner<'a> { Ok(self.tokenise(val)) } else { Err(ScannerError { + kind: ScannerErrorKind::UnexpectedChar { character: other }, position: self.position, - - kind: ScannerErrorKind::UnexpectedToken { - span: gen_token_span(self), - }, }) } } @@ -511,3 +519,52 @@ impl<'a> Scanner<'a> { } } } + +pub struct ScannerIter<'a> { + scanner: Scanner<'a>, + has_finished: bool, +} + +impl<'a> ScannerIter<'a> { + pub fn position(&self) -> PositionState { + self.scanner.position + } + + pub fn span(&self) -> TokenSpan { + gen_token_span(&self.scanner) + } +} + +impl<'a> Iterator for ScannerIter<'a> { + type Item = ScannerResult; + + fn next(&mut self) -> Option<Self::Item> { + if self.has_finished { + None + } else { + let next = self.scanner.scan_token(); + if matches!( + next, + Ok(ScannerToken { + token: ScriptTokenType::Eof, + .. + }) + ) { + self.has_finished = true; + } + Some(next) + } + } +} + +impl<'a> IntoIterator for Scanner<'a> { + type Item = ScannerResult; + type IntoIter = ScannerIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + ScannerIter { + scanner: self, + has_finished: false, + } + } +} diff --git a/forge-script-lang/src/runtime/vm/compile.rs b/forge-script-lang/src/runtime/vm/compile.rs new file mode 100644 index 0000000000000000000000000000000000000000..f985cd1e2d6890b6d1f3084d06dcfde3276ae69a --- /dev/null +++ b/forge-script-lang/src/runtime/vm/compile.rs @@ -0,0 +1,153 @@ +use crate::lexer::ScriptTokenType; +use crate::pratt::{Scanner, ScannerError, ScannerErrorKind, ScannerIter, ScannerToken}; +use crate::rule_table; +use crate::runtime::vm::Chunk; +use crate::runtime::vm::{ParsingPrecedence, ParsingRuleType, ParsingRules}; + +#[derive(Clone, Default)] +pub struct ParsingState { + previous: Option<ScannerToken>, + current: Option<ScannerToken>, +} + +impl ParsingState { + pub fn with_current(token: ScannerToken) -> Self { + Self { + previous: None, + current: Some(token), + } + } + pub fn set_current(&mut self, token: Option<ScannerToken>) { + self.previous = std::mem::take(&mut self.current); + self.current = token; + } + pub fn was_previously(&self, tok: ScriptTokenType) -> bool { + self.previous + .as_ref() + .map(|s| s.token == tok) + .unwrap_or(false) + } + pub fn is_currently(&self, tok: ScriptTokenType) -> bool { + self.current + .as_ref() + .map(|s| s.token == tok) + .unwrap_or(false) + } + + pub fn previous(&self) -> &Option<ScannerToken> { + &self.previous + } + pub fn current(&self) -> &Option<ScannerToken> { + &self.current + } +} + +pub struct Compiler<'a> { + chunk: Chunk, + source: ScannerIter<'a>, + state: ParsingState, +} + +type CompilerOp = Result<(), ScannerError>; + +impl<'a> Compiler<'a> { + pub fn new(source: &'a str) -> Compiler<'a> { + Self { + chunk: Chunk::default(), + source: Scanner::new(source).into_iter(), + state: ParsingState::default(), + } + } + + pub fn init_new(source: &'a str) -> Result<Compiler<'a>, ScannerError> { + let mut cmp = Self::new(source); + cmp.advance()?; + Ok(cmp) + } + + pub fn as_parsing_table(&mut self) -> ParsingRules { + rule_table!(( + ParsingRuleType::LeftParen, + Some(Box::new(compile_expression)), + None, + RulePrecedence::None, + ),) + } + + pub fn chunk(&self) -> &Chunk { + &self.chunk + } + + pub fn chunk_mut(&mut self) -> &mut Chunk { + &mut self.chunk + } + + pub fn compile(&mut self) -> Result<&Chunk, ScannerError> { + self.compile_expression()?; + self.assert_next(ScriptTokenType::Eof)?; + Ok(self.chunk()) + } + + fn process_precedence(&mut self, precedence: ParsingPrecedence) -> CompilerOp { + Ok(()) + } + + fn compile_expression(&mut self) -> CompilerOp { + compile_expression(self) + } + + fn advance(&mut self) -> CompilerOp { + if let Some(tok) = self.source.next() { + self.state.set_current(Some(tok?)); + } + Ok(()) + } + + fn assert_next(&mut self, next: ScriptTokenType) -> CompilerOp { + self.advance()?; + match self.current() { + Some(val) => { + if next == val.token { + Ok(()) + } else { + Err(ScannerError { + kind: ScannerErrorKind::ExpectedDifferentToken { + found: val.token.clone(), + expected: vec![next], + }, + position: self.source.position(), + }) + } + } + None => Err(ScannerError { + kind: ScannerErrorKind::UnexpectedEof, + position: self.source.position(), + }), + } + } + + fn discard_until(&mut self, until: ScriptTokenType) -> Result<(), ScannerError> { + for tok in self.source.by_ref() { + let tok = tok?; + if until == tok.token { + return Ok(()); + } + } + + Err(ScannerError { + kind: ScannerErrorKind::UnexpectedEof, + position: self.source.position(), + }) + } + + pub fn current(&self) -> &Option<ScannerToken> { + &self.state.current + } + pub fn previous(&self) -> &Option<ScannerToken> { + &self.state.previous + } +} + +fn compile_expression(compiler: &mut Compiler) -> CompilerOp { + Ok(()) +} diff --git a/forge-script-lang/src/runtime/vm/mod.rs b/forge-script-lang/src/runtime/vm/mod.rs index 3486e51db982c37d2ee167f2017bd183dbb9918a..80eb0893fb139c835889c0e9e648ef9a4f8d41b2 100644 --- a/forge-script-lang/src/runtime/vm/mod.rs +++ b/forge-script-lang/src/runtime/vm/mod.rs @@ -1,10 +1,14 @@ mod chunk_builder; mod chunks; +mod compile; mod const_data; mod machine; mod opcode; +mod parsing_rules; pub use chunks::{Chunk, ChunkOps, ChunkRef}; +pub use compile::Compiler; pub use const_data::{ConstData, ConstDataRef}; pub use machine::{Forge, VmError, VmResult}; pub use opcode::{OpCode, OpCodeError}; +pub use parsing_rules::{ParsingPrecedence, ParsingRule, ParsingRuleType, ParsingRules}; diff --git a/forge-script-lang/src/runtime/vm/parsing_rules.rs b/forge-script-lang/src/runtime/vm/parsing_rules.rs new file mode 100644 index 0000000000000000000000000000000000000000..3753ce7facc0b474818de36930681f94a59881f9 --- /dev/null +++ b/forge-script-lang/src/runtime/vm/parsing_rules.rs @@ -0,0 +1,111 @@ +use crate::runtime::vm::Compiler; +use std::collections::HashMap; + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug, Hash)] +pub enum ParsingRuleType { + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Semicolon, + Bang, + Minus, + Asterisk, + Slash, + Plus, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + DoublePipe, + DoubleAmpersand, + Modulo, + Caret, + Identifier, + String, + OwnedString, + Integer, + Float, + Boolean, + Class, + Else, + Function, + For, + If, + Null, + Print, + Return, + Super, + This, + Let, + While, + Export, + Import, + Alias, + From, + Typeof, + Finally, +} + +#[repr(u8)] +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug, Hash)] +/// A given precedence level represents parsing for that level and all levels above it +pub enum ParsingPrecedence { + None = 0, + /// Direct assignment statements + Assignment = 1, + /// Boolean operators || and && + Boolean = 2, + /// Equal / Not Equal operators == and != + Equality = 3, + /// Ordering Comparison Operators >, <, <=, and >= + Ordering = 4, + /// Lower precedence binary operators + (add) and - (subtract) + BinaryTerm = 5, + /// Higher precedence binary operators * (multiply) and / (divide) + BinaryFactor = 6, + /// Unary operators ! (negation) and - (inversion) + Unary = 7, + /// Operators for data or process drilling . (property lookup) and `()` (function call) + CallLookup = 8, + Primary = 100, +} + +pub struct ParsingRule { + pub precedence: ParsingPrecedence, + pub unary_rule: Option<Box<dyn Fn(&mut Compiler)>>, + pub binary_rule: Option<Box<dyn Fn(&mut Compiler)>>, +} + +pub type ParsingRules = HashMap<ParsingRuleType, ParsingRule>; + +macro_rules! _inr_replace { + ($_t:tt $out:expr) => { + $expr + }; +} +macro_rules! _inr_count { + ($($tts:tt)*) => {<[()]>::len(&[$(_int_replace!($tts ())),*])}; +} + +#[macro_export] +macro_rules! rule_table { + ($($token: expr, $unary_rule: expr, $binary_rule: expr, $precedence: expr),+) => { + { + let table = HashMap::with_capacity(_inr_count!($($token)*)); + $( + table.insert($token, ParsingRule { + precedence: $precedence, + unary_rule: $unary_rule, + binary_rule: $binary_rule, + }); + )+; + table + } + }; +}