Skip to content
Snippets Groups Projects
Verified Commit 6396ba26 authored by Louis's avatar Louis :fire:
Browse files

Hand parsing basic tokens

parent 73b53885
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,7 @@ use std::error::Error; ...@@ -6,7 +6,7 @@ use std::error::Error;
use std::fmt::{Display, Formatter}; use std::fmt::{Display, Formatter};
use std::process::{ExitCode, Termination}; use std::process::{ExitCode, Termination};
pub type LalrError<'a> = BaseLalrError<usize, ScriptTokenType<'a>, TokenError<'a>>; pub type LalrError<'a> = BaseLalrError<usize, ScriptTokenType, TokenError<'a>>;
#[derive(Debug)] #[derive(Debug)]
pub enum TokenErrorKind<'a> { pub enum TokenErrorKind<'a> {
...@@ -72,12 +72,12 @@ pub enum ForgeErrorKind<'a> { ...@@ -72,12 +72,12 @@ pub enum ForgeErrorKind<'a> {
expected: Vec<String>, expected: Vec<String>,
}, },
UnrecognizedToken { UnrecognizedToken {
token: ScriptTokenType<'a>, token: ScriptTokenType,
span: ErrorSpan, span: ErrorSpan,
expected: Vec<String>, expected: Vec<String>,
}, },
ExpectedEof { ExpectedEof {
token: ScriptTokenType<'a>, token: ScriptTokenType,
span: ErrorSpan, span: ErrorSpan,
}, },
Custom(String), Custom(String),
......
...@@ -19,7 +19,7 @@ pub fn token_ident(input: Span) -> IResult<Span, ScriptToken> { ...@@ -19,7 +19,7 @@ pub fn token_ident(input: Span) -> IResult<Span, ScriptToken> {
input, input,
ScriptToken { ScriptToken {
position: pos, position: pos,
token_type: ScriptTokenType::Identifier(value.fragment()), token_type: ScriptTokenType::Identifier(value.fragment().to_string()),
}, },
)) ))
} }
...@@ -147,7 +147,10 @@ mod parsing_tests { ...@@ -147,7 +147,10 @@ mod parsing_tests {
for expected in positive_cases { for expected in positive_cases {
map_result!(token_ident(Span::new(expected)), |_, token: ScriptToken| { map_result!(token_ident(Span::new(expected)), |_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::Identifier(expected)) assert_eq!(
token.token_type,
ScriptTokenType::Identifier(expected.to_string())
)
}); });
} }
} }
......
...@@ -3,7 +3,7 @@ use std::error::Error; ...@@ -3,7 +3,7 @@ use std::error::Error;
use std::fmt::{format, Debug, Display, Formatter}; use std::fmt::{format, Debug, Display, Formatter};
#[derive(PartialEq, Clone, Debug)] #[derive(PartialEq, Clone, Debug)]
pub enum ScriptTokenType<'a> { pub enum ScriptTokenType {
// Structural Tokens // Structural Tokens
LeftParen, LeftParen,
RightParen, RightParen,
...@@ -34,8 +34,8 @@ pub enum ScriptTokenType<'a> { ...@@ -34,8 +34,8 @@ pub enum ScriptTokenType<'a> {
Caret, Caret,
// Literals // Literals
Identifier(&'a str), Identifier(String),
String(&'a str), String(String),
OwnedString(String), OwnedString(String),
Integer(i64), Integer(i64),
Float(f64), Float(f64),
...@@ -65,7 +65,7 @@ pub enum ScriptTokenType<'a> { ...@@ -65,7 +65,7 @@ pub enum ScriptTokenType<'a> {
Eof, Eof,
} }
impl<'a> ScriptTokenType<'a> { impl ScriptTokenType {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
match self { match self {
ScriptTokenType::LeftParen => 1, ScriptTokenType::LeftParen => 1,
...@@ -126,7 +126,7 @@ impl<'a> ScriptTokenType<'a> { ...@@ -126,7 +126,7 @@ impl<'a> ScriptTokenType<'a> {
} }
} }
impl<'a> Display for ScriptTokenType<'a> { impl Display for ScriptTokenType {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self { match self {
ScriptTokenType::LeftParen => write!(f, "("), ScriptTokenType::LeftParen => write!(f, "("),
...@@ -192,7 +192,7 @@ impl<'a> Display for TokenFromStringError<'a> { ...@@ -192,7 +192,7 @@ impl<'a> Display for TokenFromStringError<'a> {
} }
impl<'a> Error for TokenFromStringError<'a> {} impl<'a> Error for TokenFromStringError<'a> {}
impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> { impl<'a> TryFrom<&'a str> for ScriptTokenType {
type Error = TokenFromStringError<'a>; type Error = TokenFromStringError<'a>;
fn try_from(value: &'a str) -> Result<Self, Self::Error> { fn try_from(value: &'a str) -> Result<Self, Self::Error> {
...@@ -248,7 +248,7 @@ impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> { ...@@ -248,7 +248,7 @@ impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> {
#[derive(Clone, PartialEq)] #[derive(Clone, PartialEq)]
pub struct ScriptToken<'a> { pub struct ScriptToken<'a> {
pub position: Span<'a>, pub position: Span<'a>,
pub token_type: ScriptTokenType<'a>, pub token_type: ScriptTokenType,
} }
impl<'a> Display for ScriptToken<'a> { impl<'a> Display for ScriptToken<'a> {
......
...@@ -190,9 +190,9 @@ ListOf<T>: Vec<T> = { ...@@ -190,9 +190,9 @@ ListOf<T>: Vec<T> = {
extern { extern {
type Location = usize; type Location = usize;
type Error = crate::TokenError<'a>; type Error = String;
enum ScriptTokenType<'a> { enum ScriptTokenType {
"(" => ScriptTokenType::LeftParen, "(" => ScriptTokenType::LeftParen,
")" => ScriptTokenType::RightParen, ")" => ScriptTokenType::RightParen,
"{" => ScriptTokenType::LeftBrace, "{" => ScriptTokenType::LeftBrace,
...@@ -238,7 +238,7 @@ extern { ...@@ -238,7 +238,7 @@ extern {
"float" => ScriptTokenType::Float(<f64>), "float" => ScriptTokenType::Float(<f64>),
"integer" => ScriptTokenType::Integer(<i64>), "integer" => ScriptTokenType::Integer(<i64>),
"owned_string" => ScriptTokenType::OwnedString(<String>), "owned_string" => ScriptTokenType::OwnedString(<String>),
"string" => ScriptTokenType::String(<&'a str>), "string" => ScriptTokenType::String(<String>),
"identifier" => ScriptTokenType::Identifier(<&'a str>), "identifier" => ScriptTokenType::Identifier(<String>),
} }
} }
\ No newline at end of file
...@@ -4,13 +4,14 @@ use crate::parser::ast::{Expression, Program}; ...@@ -4,13 +4,14 @@ use crate::parser::ast::{Expression, Program};
use crate::TokenError; use crate::TokenError;
use peg::Parse; use peg::Parse;
pub type InputSpan<'a, Loc, Tok> = Result<(Loc, Tok, Loc), TokenError<'a>>; pub type InputSpan<'a, Loc, Tok> = Result<(Loc, Tok, Loc), String>;
type ExprSpan<'a> = InputSpan<'a, usize, ScriptTokenType<'a>>; type ExprSpan<'a> = InputSpan<'a, usize, ScriptTokenType>;
macro_rules! export_grammar_fn { macro_rules! export_grammar_fn {
($name:ident = $output:ty => $part: tt) => { ($name:ident = $output:ty => $part: tt) => {
pub fn $name(source: &str) -> ForgeResult<$output> { pub fn $name(source: &str) -> Result<$output, String> {
let tokens = script_to_tokens(source)? let tokens = script_to_tokens(source)
.map_err(|e| format!("{}", e))?
.iter() .iter()
.map(|tok| { .map(|tok| {
Ok(( Ok((
...@@ -21,8 +22,9 @@ macro_rules! export_grammar_fn { ...@@ -21,8 +22,9 @@ macro_rules! export_grammar_fn {
}) })
.collect::<Vec<ExprSpan>>(); .collect::<Vec<ExprSpan>>();
let value = let value = super::forge_grammar::$part::new()
super::forge_grammar::$part::new().parse::<ExprSpan, Vec<ExprSpan>>(tokens)?; .parse::<ExprSpan, Vec<ExprSpan>>(tokens)
.map_err(|e| format!("{}", e))?;
Ok(value) Ok(value)
} }
...@@ -46,7 +48,7 @@ mod grammar_test { ...@@ -46,7 +48,7 @@ mod grammar_test {
#[test_case("null" => matches Ok(_) ; "Parse literal null")] #[test_case("null" => matches Ok(_) ; "Parse literal null")]
#[test_case("if foo {}" => matches Ok(_) ; "Parse conditional")] #[test_case("if foo {}" => matches Ok(_) ; "Parse conditional")]
#[test_case("2 * 4 - 3" => matches Ok(_) ; "Parse arithmetic")] #[test_case("2 * 4 - 3" => matches Ok(_) ; "Parse arithmetic")]
fn expression_parsing(prog: &str) -> ForgeResult<Expression> { fn expression_parsing(prog: &str) -> Result<Expression, String> {
parse_expression(prog) parse_expression(prog)
} }
} }
...@@ -134,19 +134,19 @@ peg::parser! { ...@@ -134,19 +134,19 @@ peg::parser! {
= base:simple_identifier() "as" alias:simple_identifier() { IdentifierAlias(base.0, alias.0) } = base:simple_identifier() "as" alias:simple_identifier() { IdentifierAlias(base.0, alias.0) }
rule simple_identifier() -> Identifier rule simple_identifier() -> Identifier
= [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(String::from(*vl)) } = [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(String::from(vl)) }
rule literal() -> LiteralNode rule literal() -> LiteralNode
= "true" { LiteralNode::Boolean(true) } = "true" { LiteralNode::Boolean(true) }
/ "false" { LiteralNode::Boolean(false) } / "false" { LiteralNode::Boolean(false) }
/ "null" { LiteralNode::Null } / "null" { LiteralNode::Null }
/ [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(String::from(*vl)) } / [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(String::from(vl)) }
/ [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.clone()) } / [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.clone()) }
/ [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) } / [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) }
/ [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) } / [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) }
rule string_value() -> String rule string_value() -> String
= [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { String::from(*vl) } = [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { String::from(vl) }
/ [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { vl.clone() } / [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { vl.clone() }
rule eof() = ![_] rule eof() = ![_]
......
...@@ -61,6 +61,12 @@ impl ScanningState { ...@@ -61,6 +61,12 @@ impl ScanningState {
} }
} }
pub fn skip_first_n(&mut self, amount: usize) {
self.lexeme_start = self
.lexeme_current
.min(self.lexeme_start.saturating_add(amount));
}
pub fn advance(&mut self, amount: usize) { pub fn advance(&mut self, amount: usize) {
self.lexeme_current += amount; self.lexeme_current += amount;
} }
...@@ -81,27 +87,28 @@ pub struct Scanner<'a> { ...@@ -81,27 +87,28 @@ pub struct Scanner<'a> {
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub enum ScannerErrorKind<'a> { pub enum ScannerErrorKind {
BadIdentifier,
UnexpectedEof, UnexpectedEof,
UnexpectedCharacter { span: TokenSpan<'a> }, UnexpectedToken { span: TokenSpan },
InvalidLiteral { ltype: &'static str },
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]
pub struct ScannerError<'a> { pub struct ScannerError {
pub kind: ScannerErrorKind<'a>, pub kind: ScannerErrorKind,
pub position: PositionState,
} }
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub struct TokenSpan<'a> { pub struct TokenSpan {
pub position: PositionState, pub position: PositionState,
pub length: usize, pub length: usize,
pub source: &'a str,
} }
impl<'a> From<Scanner<'a>> for TokenSpan<'a> { impl<'a, 'b: 'a> From<&'b Scanner<'a>> for TokenSpan {
fn from(value: Scanner<'a>) -> TokenSpan<'a> { fn from(value: &Scanner) -> TokenSpan {
TokenSpan { TokenSpan {
source: value.source,
position: value.position, position: value.position,
length: value length: value
.scan_state .scan_state
...@@ -111,16 +118,40 @@ impl<'a> From<Scanner<'a>> for TokenSpan<'a> { ...@@ -111,16 +118,40 @@ impl<'a> From<Scanner<'a>> for TokenSpan<'a> {
} }
} }
fn gen_token_span(scanner: &Scanner) -> TokenSpan {
TokenSpan {
position: scanner.position,
length: scanner
.scan_state
.lexeme_current
.saturating_sub(scanner.scan_state.lexeme_start),
}
}
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct ScannerToken<'a> { pub struct ScannerToken {
pub location: TokenSpan<'a>, pub location: TokenSpan,
pub token: ScriptTokenType<'a>, pub token: ScriptTokenType,
} }
pub type ScannerResult<'a> = Result<ScannerToken<'a>, ScannerError<'a>>; pub type ScannerResult = Result<ScannerToken, ScannerError>;
macro_rules! next_match {
($target: expr, $val: expr) => {{
if $target.is_finished() {
false
} else if $target.source.chars().nth($target.position.offset) == Some($val) {
$target.position.advance(1);
$target.scan_state.advance(1);
true
} else {
false
}
}};
}
impl<'a> Scanner<'a> { impl<'a> Scanner<'a> {
pub fn new(source: &str) -> Scanner { pub fn new<'b>(source: &'b str) -> Scanner<'b> {
Scanner { Scanner {
source, source,
position: PositionState::default(), position: PositionState::default(),
...@@ -132,16 +163,330 @@ impl<'a> Scanner<'a> { ...@@ -132,16 +163,330 @@ impl<'a> Scanner<'a> {
self.position.offset == self.source.len() self.position.offset == self.source.len()
} }
pub fn scan_token(&mut self) -> Result<ScannerToken<'a>, ScannerError> { pub fn scan_token(&'a mut self) -> ScannerResult {
self.scan_state = ScanningState::new(self.position.offset);
if self.is_finished() { if self.is_finished() {
Ok(ScannerToken { return Ok(ScannerToken {
token: ScriptTokenType::Eof, token: ScriptTokenType::Eof,
location: TokenSpan::from(*self), location: gen_token_span(self),
}) });
}
let next_char = self.source.chars().nth(self.position.offset);
self.position.advance(1);
self.scan_state.advance(1);
match next_char {
Some('=') => {
if next_match!(self, '=') {
Ok(self.tokenise(ScriptTokenType::EqualEqual))
} else {
Ok(self.tokenise(ScriptTokenType::Equal))
}
}
Some('%') => Ok(self.tokenise(ScriptTokenType::Modulo)),
Some('^') => Ok(self.tokenise(ScriptTokenType::Caret)),
Some('!') => {
if next_match!(self, '=') {
Ok(self.tokenise(ScriptTokenType::BangEqual))
} else {
Ok(self.tokenise(ScriptTokenType::Bang))
}
}
Some('(') => Ok(self.tokenise(ScriptTokenType::LeftParen)),
Some(')') => Ok(self.tokenise(ScriptTokenType::RightParen)),
Some('{') => Ok(self.tokenise(ScriptTokenType::LeftBrace)),
Some('}') => Ok(self.tokenise(ScriptTokenType::RightBrace)),
Some(',') => Ok(self.tokenise(ScriptTokenType::Comma)),
Some('.') => Ok(self.tokenise(ScriptTokenType::Dot)),
Some('-') => Ok(self.tokenise(ScriptTokenType::Minus)),
Some('+') => Ok(self.tokenise(ScriptTokenType::Plus)),
Some(';') => Ok(self.tokenise(ScriptTokenType::Semicolon)),
Some('/') => Ok(self.tokenise(ScriptTokenType::Slash)),
Some('*') => Ok(self.tokenise(ScriptTokenType::Asterisk)),
Some('<') => {
if next_match!(self, '=') {
Ok(self.tokenise(ScriptTokenType::LessEqual))
} else {
Ok(self.tokenise(ScriptTokenType::Less))
}
}
Some('>') => {
if next_match!(self, '=') {
Ok(self.tokenise(ScriptTokenType::GreaterEqual))
} else {
Ok(self.tokenise(ScriptTokenType::Greater))
}
}
Some('"') => {
let val = self.capture_string()?;
Ok(self.tokenise(val))
}
Some(other) => {
if other.is_numeric() {
let val = self.capture_number()?;
Ok(self.tokenise(val))
} else if other.is_alphabetic() {
let val = self.capture_keyword_or_ident()?;
Ok(self.tokenise(val))
} else {
Err(ScannerError {
position: self.position,
kind: ScannerErrorKind::UnexpectedToken {
span: gen_token_span(self),
},
})
}
}
None => Err(ScannerError {
position: self.position,
kind: ScannerErrorKind::UnexpectedEof,
}),
}
}
pub fn consume_ws(&mut self) {
loop {
match self.peek() {
Some('\n') => self.increment_line(),
Some('/') => {
if self.peek_nth(1) == Some('/') {
while !self.is_finished() && self.peek() != Some('\n') {
self.increment_cursor();
}
}
}
Some(other) => {
if other.is_whitespace() {
self.increment_cursor()
} else {
break;
}
}
None => break,
}
}
}
fn capture_string(&mut self) -> Result<ScriptTokenType, ScannerError> {
loop {
if self.is_finished() {
return Err(ScannerError {
position: self.position,
kind: ScannerErrorKind::UnexpectedEof,
});
}
match self.peek() {
Some('\"') => {
self.scan_state.skip_first_n(1); // Don't include first quote in capture
let tok = ScriptTokenType::String(String::from(
&self.source[self.scan_state.lexeme_start..self.scan_state.lexeme_current],
));
self.increment_cursor();
return Ok(tok);
}
Some('\n') => self.increment_line(),
Some(_) => self.increment_cursor(),
_ => {}
}
}
}
fn capture_number(&mut self) -> Result<ScriptTokenType, ScannerError> {
let mut can_have_dot = true;
let mut can_have_underscore = true;
loop {
if self.is_finished() {
break;
}
let char = self.peek().ok_or(ScannerError {
position: self.position,
kind: ScannerErrorKind::UnexpectedEof,
})?;
if char.is_numeric() {
self.increment_cursor();
can_have_underscore = true;
} else if char == '_' && can_have_underscore {
self.increment_cursor();
can_have_underscore = false;
} else if char == '.' && can_have_dot {
self.increment_cursor();
can_have_dot = false;
can_have_underscore = false;
} else {
break;
}
}
let result = if can_have_dot {
// We haven't consumed a dot, it's an int
(&self.source[self.scan_state.lexeme_start..self.scan_state.lexeme_current])
.replace('_', "")
.parse::<i64>()
.map_err(|e| ScannerError {
position: self.position,
kind: ScannerErrorKind::InvalidLiteral { ltype: "integer" },
})
.map(ScriptTokenType::Integer)
} else { } else {
// We have consumed a dot, it's a float
(&self.source[self.scan_state.lexeme_start..self.scan_state.lexeme_current])
.replace('_', "")
.parse::<f64>()
.map_err(|e| ScannerError {
position: self.position,
kind: ScannerErrorKind::InvalidLiteral { ltype: "float" },
})
.map(ScriptTokenType::Float)
};
self.increment_cursor();
result
}
fn capture_keyword_or_ident(&mut self) -> Result<ScriptTokenType, ScannerError> {
while self
.peek()
.map(|c| c.is_alphanumeric() || c == '_')
.unwrap_or(false)
{
self.increment_cursor();
}
let val = self.peek_current_slice().ok_or(ScannerError {
position: self.position,
kind: ScannerErrorKind::BadIdentifier,
})?;
let bad_kw = || {
Err(ScannerError { Err(ScannerError {
kind: ScannerErrorKind::UnexpectedEof, position: self.position,
kind: ScannerErrorKind::BadIdentifier,
}) })
};
let ident = || Ok(ScriptTokenType::Identifier(String::from(val)));
eprintln!("Found ident {}", val);
let mut val_chars = val.chars();
match val_chars.next() {
Some('a') => self.check_ident(1, val, "s", ScriptTokenType::Alias),
Some('e') => match val_chars.next() {
Some('l') => self.check_ident(2, val, "se", ScriptTokenType::Else),
Some('x') => self.check_ident(2, val, "port", ScriptTokenType::Export),
_ => ident(),
},
Some('f') => match val_chars.next() {
Some('n') => self.check_ident(2, val, "", ScriptTokenType::Function),
Some('r') => self.check_ident(2, val, "om", ScriptTokenType::From),
Some('o') => self.check_ident(2, val, "r", ScriptTokenType::For),
Some('i') => self.check_ident(2, val, "nally", ScriptTokenType::Finally),
_ => ident(),
},
Some('i') => match val_chars.next() {
Some('m') => self.check_ident(2, val, "port", ScriptTokenType::Import),
Some('f') => self.check_ident(2, val, "", ScriptTokenType::If),
_ => ident(),
},
Some('l') => self.check_ident(1, val, "et", ScriptTokenType::Let),
Some('n') => self.check_ident(1, val, "ull", ScriptTokenType::Null),
Some('p') => self.check_ident(1, val, "rint", ScriptTokenType::Print),
Some('r') => self.check_ident(1, val, "eturn", ScriptTokenType::Return),
Some('s') => match val_chars.next() {
Some('t') => self.check_ident(2, val, "ruct", ScriptTokenType::Class),
Some('u') => self.check_ident(2, val, "per", ScriptTokenType::Super),
_ => ident(),
},
Some('t') => match val_chars.next() {
Some('h') => self.check_ident(2, val, "is", ScriptTokenType::This),
Some('y') => self.check_ident(2, val, "peof", ScriptTokenType::Typeof),
_ => ident(),
},
Some('w') => self.check_ident(1, val, "hile", ScriptTokenType::While),
Some(_) => ident(),
None => bad_kw(),
}
}
fn check_ident(
&self,
start: usize,
val: &str,
expected: &str,
success: ScriptTokenType,
) -> Result<ScriptTokenType, ScannerError> {
let sub = val.get(start..).ok_or(ScannerError {
position: self.position,
kind: ScannerErrorKind::BadIdentifier,
})?;
if sub == expected {
Ok(success)
} else {
Ok(ScriptTokenType::Identifier(String::from(val)))
}
}
#[inline(always)]
fn peek(&self) -> Option<char> {
self.peek_nth(0)
}
fn peek_nth(&self, n: usize) -> Option<char> {
self.source.chars().nth(self.position.offset + n)
}
fn peek_current_slice(&self) -> Option<&str> {
self.source
.get(self.scan_state.lexeme_start..self.scan_state.lexeme_current)
}
fn increment_cursor(&mut self) {
self.position.advance(1);
self.scan_state.advance(1);
}
fn increment_line(&mut self) {
self.position.new_line();
self.scan_state.advance(1);
}
fn next_matches(&mut self, ch: char) -> bool {
if self.is_finished() {
false
} else if self.source.chars().nth(self.position.offset) == Some(ch) {
self.position.advance(1);
self.scan_state.advance(1);
true
} else {
false
}
}
fn next(&mut self) -> Option<char> {
let ch = self.source.chars().nth(self.position.offset);
self.position.advance(1);
self.scan_state.advance(1);
ch
}
fn next_checked(&mut self) -> Result<char, ScannerError> {
self.next().ok_or(ScannerError {
position: self.position,
kind: ScannerErrorKind::UnexpectedEof,
})
}
fn tokenise(&self, inner: ScriptTokenType) -> ScannerToken {
ScannerToken {
token: inner,
location: gen_token_span(self),
} }
} }
} }
...@@ -2,11 +2,56 @@ use crate::lexer::ScriptTokenType; ...@@ -2,11 +2,56 @@ use crate::lexer::ScriptTokenType;
use crate::pratt::parser::{Scanner, ScannerError, ScannerResult}; use crate::pratt::parser::{Scanner, ScannerError, ScannerResult};
use test_case::test_case; use test_case::test_case;
#[test_case("1 + 2" => Ok(ScriptTokenType::Integer(1)) ; "expects integer")] #[test_case("", Ok(ScriptTokenType::Eof) ; "expects eof")]
#[test_case("print 1 + 2" => Ok(ScriptTokenType::Print) ; "expects print")] #[test_case("%", Ok(ScriptTokenType::Modulo) ; "Expects ScriptTokenType::Modulo")]
#[test_case("\"Foo\"" => matches Ok(ScriptTokenType::OwnedString(_)) ; "expects string")] #[test_case("^", Ok(ScriptTokenType::Caret) ; "Expects ScriptTokenType::Caret")]
#[test_case("" => Ok(ScriptTokenType::Eof) ; "expects eof")] #[test_case("!", Ok(ScriptTokenType::Bang) ; "Expects ScriptTokenType::Bang")]
fn next_token(source: &'static str) -> Result<ScriptTokenType<'static>, ScannerError> { #[test_case("(", Ok(ScriptTokenType::LeftParen) ; "Expects ScriptTokenType::LeftParen")]
let mut scanner = Scanner::new(source); #[test_case(")", Ok(ScriptTokenType::RightParen) ; "Expects ScriptTokenType::RightParen")]
scanner.scan_token().map(|t| t.token) #[test_case("{", Ok(ScriptTokenType::LeftBrace) ; "Expects ScriptTokenType::LeftBrace")]
#[test_case("}", Ok(ScriptTokenType::RightBrace) ; "Expects ScriptTokenType::RightBrace")]
#[test_case(",", Ok(ScriptTokenType::Comma) ; "Expects ScriptTokenType::Comma")]
#[test_case(".", Ok(ScriptTokenType::Dot) ; "Expects ScriptTokenType::Dot")]
#[test_case("-", Ok(ScriptTokenType::Minus) ; "Expects ScriptTokenType::Minus")]
#[test_case("+", Ok(ScriptTokenType::Plus) ; "Expects ScriptTokenType::Plus")]
#[test_case(";", Ok(ScriptTokenType::Semicolon) ; "Expects ScriptTokenType::Semicolon")]
#[test_case("/", Ok(ScriptTokenType::Slash) ; "Expects ScriptTokenType::Slash")]
#[test_case("*", Ok(ScriptTokenType::Asterisk) ; "Expects ScriptTokenType::Asterisk")]
#[test_case("=", Ok(ScriptTokenType::Equal) ; "Expects ScriptTokenType::Equal")]
#[test_case("!=", Ok(ScriptTokenType::BangEqual) ; "Expects ScriptTokenType::BangEqual")]
#[test_case("==", Ok(ScriptTokenType::EqualEqual) ; "Expects ScriptTokenType::EqualEqual")]
#[test_case(">", Ok(ScriptTokenType::Greater) ; "Expects ScriptTokenType::Greater")]
#[test_case(">=", Ok(ScriptTokenType::GreaterEqual) ; "Expects ScriptTokenType::GreaterEqual")]
#[test_case("<", Ok(ScriptTokenType::Less) ; "Expects ScriptTokenType::Less")]
#[test_case("<=", Ok(ScriptTokenType::LessEqual) ; "Expects ScriptTokenType::LessEqual")]
#[test_case("\"Foo\"", Ok(ScriptTokenType::String(String::from("Foo"))) ; "Expects ScriptTokenType::String")]
#[test_case("123", Ok(ScriptTokenType::Integer(123)) ; "Expects ScriptTokenType::Integer")]
#[test_case("1_2_3", Ok(ScriptTokenType::Integer(123)) ; "Expects ScriptTokenType::Integer underscored")]
#[test_case("123.123", Ok(ScriptTokenType::Float(123.123)) ; "Expects ScriptTokenType::Float")]
#[test_case("12_3.1_23", Ok(ScriptTokenType::Float(123.123)) ; "Expects ScriptTokenType::Float underscored")]
#[test_case("struct", Ok(ScriptTokenType::Class) ; "Expects ScriptTokenType::Class")]
#[test_case("else", Ok(ScriptTokenType::Else) ; "Expects ScriptTokenType::Else")]
#[test_case("fn", Ok(ScriptTokenType::Function) ; "Expects ScriptTokenType::Function")]
#[test_case("for", Ok(ScriptTokenType::For) ; "Expects ScriptTokenType::For")]
#[test_case("if", Ok(ScriptTokenType::If) ; "Expects ScriptTokenType::If")]
#[test_case("null", Ok(ScriptTokenType::Null) ; "Expects ScriptTokenType::Null")]
#[test_case("print", Ok(ScriptTokenType::Print) ; "Expects ScriptTokenType::Print")]
#[test_case("return", Ok(ScriptTokenType::Return) ; "Expects ScriptTokenType::Return")]
#[test_case("super", Ok(ScriptTokenType::Super) ; "Expects ScriptTokenType::Super")]
#[test_case("this", Ok(ScriptTokenType::This) ; "Expects ScriptTokenType::This")]
#[test_case("let", Ok(ScriptTokenType::Let) ; "Expects ScriptTokenType::Let")]
#[test_case("while", Ok(ScriptTokenType::While) ; "Expects ScriptTokenType::While")]
#[test_case("export", Ok(ScriptTokenType::Export) ; "Expects ScriptTokenType::Export")]
#[test_case("import", Ok(ScriptTokenType::Import) ; "Expects ScriptTokenType::Import")]
#[test_case("as", Ok(ScriptTokenType::Alias) ; "Expects ScriptTokenType::Alias")]
#[test_case("from", Ok(ScriptTokenType::From) ; "Expects ScriptTokenType::From")]
#[test_case("typeof", Ok(ScriptTokenType::Typeof) ; "Expects ScriptTokenType::Typeof")]
#[test_case("finally", Ok(ScriptTokenType::Finally) ; "Expects ScriptTokenType::Finally")]
#[test_case("foo", Ok(ScriptTokenType::Identifier(String::from("foo"))) ; "Expects ScriptTokenType::Identifier foo")]
#[test_case("foo_bar", Ok(ScriptTokenType::Identifier(String::from("foo_bar"))) ; "Expects ScriptTokenType::Identifier foo_bar")]
#[test_case("fo2ob4r", Ok(ScriptTokenType::Identifier(String::from("fo2ob4r"))) ; "Expects ScriptTokenType::Identifier fo2ob4r")]
fn next_token(source: &'static str, expected: Result<ScriptTokenType, ScannerError>) {
let mut scanner = Scanner::<'static>::new(source);
let token = scanner.scan_token().map(|t| t.token);
assert_eq!(token, expected);
} }
...@@ -19,8 +19,7 @@ impl ChunkBuilder { ...@@ -19,8 +19,7 @@ impl ChunkBuilder {
pub fn parse(code: &str) -> Result<Chunk, VmError> { pub fn parse(code: &str) -> Result<Chunk, VmError> {
let mut builder = ChunkBuilder::new(); let mut builder = ChunkBuilder::new();
let program = let program = parse_program(code)?; //.map_err(|fe| VmError::ast_parser(format_forge_error(code, &fe)))?;
parse_program(code).map_err(|fe| VmError::ast_parser(format_forge_error(code, &fe)))?;
builder.evaluate_program(&program)?; builder.evaluate_program(&program)?;
Ok(builder.take_chunk()) Ok(builder.take_chunk())
} }
......
...@@ -60,6 +60,12 @@ impl From<UnsupportedOperation> for VmError { ...@@ -60,6 +60,12 @@ impl From<UnsupportedOperation> for VmError {
} }
} }
impl From<String> for VmError {
fn from(value: String) -> Self {
VmError::CompilerError(CompilerErrorKind::Chunkbuilder(value))
}
}
impl Display for VmError { impl Display for VmError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self { match self {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment