From 73b53885a6b976bcd3b4fe3acf6d158812954aed Mon Sep 17 00:00:00 2001
From: Louis Capitanchik <contact@louiscap.co>
Date: Sun, 4 Jun 2023 14:22:04 +0100
Subject: [PATCH] Skeleton scanner impl

---
 forge-script-lang/src/lib.rs              |   1 +
 forge-script-lang/src/pratt/mod.rs        |   4 +
 forge-script-lang/src/pratt/parser.rs     | 147 ++++++++++++++++++++++
 forge-script-lang/src/pratt/test_cases.rs |  12 ++
 4 files changed, 164 insertions(+)
 create mode 100644 forge-script-lang/src/pratt/mod.rs
 create mode 100644 forge-script-lang/src/pratt/parser.rs
 create mode 100644 forge-script-lang/src/pratt/test_cases.rs

diff --git a/forge-script-lang/src/lib.rs b/forge-script-lang/src/lib.rs
index 4052630..37382eb 100644
--- a/forge-script-lang/src/lib.rs
+++ b/forge-script-lang/src/lib.rs
@@ -1,6 +1,7 @@
 mod error;
 mod lexer;
 mod parser;
+mod pratt;
 pub mod runtime;
 mod utilities;
 
diff --git a/forge-script-lang/src/pratt/mod.rs b/forge-script-lang/src/pratt/mod.rs
new file mode 100644
index 0000000..80bf7ef
--- /dev/null
+++ b/forge-script-lang/src/pratt/mod.rs
@@ -0,0 +1,4 @@
+mod parser;
+
+#[cfg(test)]
+mod test_cases;
diff --git a/forge-script-lang/src/pratt/parser.rs b/forge-script-lang/src/pratt/parser.rs
new file mode 100644
index 0000000..be77554
--- /dev/null
+++ b/forge-script-lang/src/pratt/parser.rs
@@ -0,0 +1,147 @@
+use crate::lexer::ScriptTokenType;
+
+#[derive(Clone, Copy, PartialEq, Debug)]
+pub struct PositionState {
+	offset: usize,
+	line: usize,
+	column: usize,
+}
+
+impl Default for PositionState {
+	fn default() -> Self {
+		Self {
+			offset: 0,
+			column: 0,
+			line: 1,
+		}
+	}
+}
+
+impl PositionState {
+	pub fn advance(&mut self, amount: usize) {
+		self.offset += amount;
+		self.column += amount;
+	}
+
+	pub fn new_line(&mut self) {
+		self.offset += 1;
+		self.line += 1;
+		self.column = 0;
+	}
+
+	pub fn offset(&self) -> usize {
+		self.offset
+	}
+	pub fn line(&self) -> usize {
+		self.line
+	}
+	pub fn column(&self) -> usize {
+		self.column
+	}
+}
+
+#[derive(Clone, Copy, PartialEq, Debug, Default)]
+pub struct ScanningState {
+	lexeme_start: usize,
+	lexeme_current: usize,
+}
+
+impl ScanningState {
+	pub fn new(idx: usize) -> Self {
+		ScanningState {
+			lexeme_start: idx,
+			lexeme_current: idx,
+		}
+	}
+
+	pub fn with_width(idx: usize, width: usize) -> Self {
+		ScanningState {
+			lexeme_start: idx,
+			lexeme_current: idx + width,
+		}
+	}
+
+	pub fn advance(&mut self, amount: usize) {
+		self.lexeme_current += amount;
+	}
+
+	pub fn start(&self) -> usize {
+		self.lexeme_start
+	}
+	pub fn current(&self) -> usize {
+		self.lexeme_current
+	}
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct Scanner<'a> {
+	source: &'a str,
+	position: PositionState,
+	scan_state: ScanningState,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum ScannerErrorKind<'a> {
+	UnexpectedEof,
+	UnexpectedCharacter { span: TokenSpan<'a> },
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct ScannerError<'a> {
+	pub kind: ScannerErrorKind<'a>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct TokenSpan<'a> {
+	pub position: PositionState,
+	pub length: usize,
+	pub source: &'a str,
+}
+
+impl<'a> From<Scanner<'a>> for TokenSpan<'a> {
+	fn from(value: Scanner<'a>) -> TokenSpan<'a> {
+		TokenSpan {
+			source: value.source,
+			position: value.position,
+			length: value
+				.scan_state
+				.lexeme_current
+				.saturating_sub(value.scan_state.lexeme_start),
+		}
+	}
+}
+
+#[derive(Clone, Debug)]
+pub struct ScannerToken<'a> {
+	pub location: TokenSpan<'a>,
+	pub token: ScriptTokenType<'a>,
+}
+
+pub type ScannerResult<'a> = Result<ScannerToken<'a>, ScannerError<'a>>;
+
+impl<'a> Scanner<'a> {
+	pub fn new(source: &str) -> Scanner {
+		Scanner {
+			source,
+			position: PositionState::default(),
+			scan_state: ScanningState::default(),
+		}
+	}
+
+	pub fn is_finished(&self) -> bool {
+		self.position.offset == self.source.len()
+	}
+
+	pub fn scan_token(&mut self) -> Result<ScannerToken<'a>, ScannerError> {
+		if self.is_finished() {
+			Ok(ScannerToken {
+				token: ScriptTokenType::Eof,
+				location: TokenSpan::from(*self),
+			})
+		} else {
+			Err(ScannerError {
+				kind: ScannerErrorKind::UnexpectedEof,
+			})
+		}
+	}
+}
diff --git a/forge-script-lang/src/pratt/test_cases.rs b/forge-script-lang/src/pratt/test_cases.rs
new file mode 100644
index 0000000..619c04b
--- /dev/null
+++ b/forge-script-lang/src/pratt/test_cases.rs
@@ -0,0 +1,12 @@
+use crate::lexer::ScriptTokenType;
+use crate::pratt::parser::{Scanner, ScannerError, ScannerResult};
+use test_case::test_case;
+
+#[test_case("1 + 2" => Ok(ScriptTokenType::Integer(1)) ; "expects integer")]
+#[test_case("print 1 + 2" => Ok(ScriptTokenType::Print) ; "expects print")]
+#[test_case("\"Foo\"" => matches Ok(ScriptTokenType::OwnedString(_)) ; "expects string")]
+#[test_case("" => Ok(ScriptTokenType::Eof) ; "expects eof")]
+fn next_token(source: &'static str) -> Result<ScriptTokenType<'static>, ScannerError> {
+	let mut scanner = Scanner::new(source);
+	scanner.scan_token().map(|t| t.token)
+}
-- 
GitLab