From 73b53885a6b976bcd3b4fe3acf6d158812954aed Mon Sep 17 00:00:00 2001 From: Louis Capitanchik <contact@louiscap.co> Date: Sun, 4 Jun 2023 14:22:04 +0100 Subject: [PATCH] Skeleton scanner impl --- forge-script-lang/src/lib.rs | 1 + forge-script-lang/src/pratt/mod.rs | 4 + forge-script-lang/src/pratt/parser.rs | 147 ++++++++++++++++++++++ forge-script-lang/src/pratt/test_cases.rs | 12 ++ 4 files changed, 164 insertions(+) create mode 100644 forge-script-lang/src/pratt/mod.rs create mode 100644 forge-script-lang/src/pratt/parser.rs create mode 100644 forge-script-lang/src/pratt/test_cases.rs diff --git a/forge-script-lang/src/lib.rs b/forge-script-lang/src/lib.rs index 4052630..37382eb 100644 --- a/forge-script-lang/src/lib.rs +++ b/forge-script-lang/src/lib.rs @@ -1,6 +1,7 @@ mod error; mod lexer; mod parser; +mod pratt; pub mod runtime; mod utilities; diff --git a/forge-script-lang/src/pratt/mod.rs b/forge-script-lang/src/pratt/mod.rs new file mode 100644 index 0000000..80bf7ef --- /dev/null +++ b/forge-script-lang/src/pratt/mod.rs @@ -0,0 +1,4 @@ +mod parser; + +#[cfg(test)] +mod test_cases; diff --git a/forge-script-lang/src/pratt/parser.rs b/forge-script-lang/src/pratt/parser.rs new file mode 100644 index 0000000..be77554 --- /dev/null +++ b/forge-script-lang/src/pratt/parser.rs @@ -0,0 +1,147 @@ +use crate::lexer::ScriptTokenType; + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct PositionState { + offset: usize, + line: usize, + column: usize, +} + +impl Default for PositionState { + fn default() -> Self { + Self { + offset: 0, + column: 0, + line: 1, + } + } +} + +impl PositionState { + pub fn advance(&mut self, amount: usize) { + self.offset += amount; + self.column += amount; + } + + pub fn new_line(&mut self) { + self.offset += 1; + self.line += 1; + self.column = 0; + } + + pub fn offset(&self) -> usize { + self.offset + } + pub fn line(&self) -> usize { + self.line + } + pub fn column(&self) -> usize { + self.column + } +} + +#[derive(Clone, Copy, PartialEq, Debug, Default)] +pub struct ScanningState { + lexeme_start: usize, + lexeme_current: usize, +} + +impl ScanningState { + pub fn new(idx: usize) -> Self { + ScanningState { + lexeme_start: idx, + lexeme_current: idx, + } + } + + pub fn with_width(idx: usize, width: usize) -> Self { + ScanningState { + lexeme_start: idx, + lexeme_current: idx + width, + } + } + + pub fn advance(&mut self, amount: usize) { + self.lexeme_current += amount; + } + + pub fn start(&self) -> usize { + self.lexeme_start + } + pub fn current(&self) -> usize { + self.lexeme_current + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Scanner<'a> { + source: &'a str, + position: PositionState, + scan_state: ScanningState, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum ScannerErrorKind<'a> { + UnexpectedEof, + UnexpectedCharacter { span: TokenSpan<'a> }, +} + +#[derive(Clone, Debug, PartialEq)] +pub struct ScannerError<'a> { + pub kind: ScannerErrorKind<'a>, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct TokenSpan<'a> { + pub position: PositionState, + pub length: usize, + pub source: &'a str, +} + +impl<'a> From<Scanner<'a>> for TokenSpan<'a> { + fn from(value: Scanner<'a>) -> TokenSpan<'a> { + TokenSpan { + source: value.source, + position: value.position, + length: value + .scan_state + .lexeme_current + .saturating_sub(value.scan_state.lexeme_start), + } + } +} + +#[derive(Clone, Debug)] +pub struct ScannerToken<'a> { + pub location: TokenSpan<'a>, + pub token: ScriptTokenType<'a>, +} + +pub type ScannerResult<'a> = Result<ScannerToken<'a>, ScannerError<'a>>; + +impl<'a> Scanner<'a> { + pub fn new(source: &str) -> Scanner { + Scanner { + source, + position: PositionState::default(), + scan_state: ScanningState::default(), + } + } + + pub fn is_finished(&self) -> bool { + self.position.offset == self.source.len() + } + + pub fn scan_token(&mut self) -> Result<ScannerToken<'a>, ScannerError> { + if self.is_finished() { + Ok(ScannerToken { + token: ScriptTokenType::Eof, + location: TokenSpan::from(*self), + }) + } else { + Err(ScannerError { + kind: ScannerErrorKind::UnexpectedEof, + }) + } + } +} diff --git a/forge-script-lang/src/pratt/test_cases.rs b/forge-script-lang/src/pratt/test_cases.rs new file mode 100644 index 0000000..619c04b --- /dev/null +++ b/forge-script-lang/src/pratt/test_cases.rs @@ -0,0 +1,12 @@ +use crate::lexer::ScriptTokenType; +use crate::pratt::parser::{Scanner, ScannerError, ScannerResult}; +use test_case::test_case; + +#[test_case("1 + 2" => Ok(ScriptTokenType::Integer(1)) ; "expects integer")] +#[test_case("print 1 + 2" => Ok(ScriptTokenType::Print) ; "expects print")] +#[test_case("\"Foo\"" => matches Ok(ScriptTokenType::OwnedString(_)) ; "expects string")] +#[test_case("" => Ok(ScriptTokenType::Eof) ; "expects eof")] +fn next_token(source: &'static str) -> Result<ScriptTokenType<'static>, ScannerError> { + let mut scanner = Scanner::new(source); + scanner.scan_token().map(|t| t.token) +} -- GitLab