Skip to content
Snippets Groups Projects
Verified Commit 34808b2b authored by Louis's avatar Louis :fire:
Browse files

Update keyword parser to avoid collisions

parent 97edbf3f
No related branches found
No related tags found
No related merge requests found
Pipeline #512 passed with stages
in 2 minutes and 35 seconds
Showing with 262 additions and 786 deletions
use crate::lexer::{ScriptTokenType, Span};
use crate::parse::ScriptToken;
use crate::pratt::ScannerError;
use crate::utilities::offset_to_line_column;
use lalrpop_util::ParseError as BaseLalrError;
use std::error::Error;
......@@ -7,6 +8,8 @@ use std::fmt::{Display, Formatter};
use std::process::{ExitCode, Termination};
pub type LalrError<'a> = BaseLalrError<usize, ScriptTokenType, TokenError<'a>>;
pub type LalrScannerError<'a> = BaseLalrError<usize, ScriptTokenType, ScannerError>;
pub type AnyLalrError<'a, Err> = BaseLalrError<usize, ScriptTokenType, Err>;
#[derive(Debug)]
pub enum TokenErrorKind<'a> {
......@@ -135,27 +138,29 @@ impl<'a> From<TokenError<'a>> for ForgeError<'a> {
}
}
impl<'a> From<LalrError<'a>> for ForgeError<'a> {
fn from(value: LalrError<'a>) -> Self {
impl<'a, T: Display> From<AnyLalrError<'a, T>> for ForgeError<'a> {
fn from(value: AnyLalrError<'a, T>) -> Self {
match value {
LalrError::InvalidToken { location } => {
AnyLalrError::InvalidToken { location } => {
ForgeErrorKind::InvalidToken { location }.into()
}
LalrError::UnrecognizedEof { expected, .. } => {
AnyLalrError::UnrecognizedEof { expected, .. } => {
ForgeErrorKind::UnexpectedEof { expected }.into()
}
LalrError::UnrecognizedToken { token, expected } => ForgeErrorKind::UnrecognizedToken {
expected,
token: token.1,
span: (token.0, token.2),
AnyLalrError::UnrecognizedToken { token, expected } => {
ForgeErrorKind::UnrecognizedToken {
expected,
token: token.1,
span: (token.0, token.2),
}
.into()
}
.into(),
LalrError::ExtraToken { token } => ForgeErrorKind::ExpectedEof {
AnyLalrError::ExtraToken { token } => ForgeErrorKind::ExpectedEof {
token: token.1,
span: (token.0, token.2),
}
.into(),
LalrError::User { error } => ForgeErrorKind::Custom(format!("{}", error)).into(),
AnyLalrError::User { error } => ForgeErrorKind::Custom(format!("{}", error)).into(),
}
}
}
......
use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0, multispace1, one_of};
use nom::combinator::value;
use nom::branch::alt;
use nom::bytes::complete::{is_not, tag};
use nom::character::complete::{alphanumeric1, char, multispace0, multispace1, none_of, one_of};
use nom::combinator::{not, peek, value};
use nom::multi::{many0, many1};
use nom::sequence::{delimited, terminated};
use nom::IResult;
......@@ -34,5 +35,9 @@ pub fn raw_decimal(input: Span) -> IResult<Span, OwnedSpan> {
}
}
pub fn tag_ws<'a, 'b: 'a>(tag_val: &'b str, sp: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
delimited(multispace0, tag(tag_val), multispace1)(sp)
delimited(
multispace0,
tag(tag_val),
not(alt((alphanumeric1, tag("_")))),
)(sp)
}
use crate::lexer::atoms::tag_ws;
use crate::lexer::{ScriptToken, ScriptTokenType, Span};
use nom::bytes::complete::tag;
use crate::parse_ops;
use nom::IResult;
use nom_locate::position;
pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("struct", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Class,
},
))
}
pub fn token_else(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("else", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Else,
},
))
}
pub fn token_function(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("fn", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Function,
},
))
}
pub fn token_for(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("for", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::For,
},
))
}
pub fn token_if(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("if", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::If,
},
))
}
pub fn token_null(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("null")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Null,
},
))
}
pub fn token_print(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("print", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Print,
},
))
}
pub fn token_return(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("return", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Return,
},
))
}
pub fn token_super(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("super", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Super,
},
))
}
pub fn token_this(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("this", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::This,
},
))
}
pub fn token_let(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("let", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Let,
},
))
}
pub fn token_while(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("while", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::While,
},
))
}
pub fn token_export(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("export", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Export,
},
))
}
pub fn token_import(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("import", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Import,
},
))
}
pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("as", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Alias,
},
))
}
pub fn token_from(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("from", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::From,
},
))
}
pub fn token_typeof(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("typeof", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Typeof,
},
))
}
pub fn token_finally(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws("finally", input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Finally,
},
))
parse_ops!(
tag_ws ;
(token_struct, "struct" => ScriptTokenType::Class),
(token_else, "else" => ScriptTokenType::Else),
(token_function, "fn" => ScriptTokenType::Function),
(token_for, "for" => ScriptTokenType::For),
(token_if, "if" => ScriptTokenType::If),
(token_null, "null" => ScriptTokenType::Null),
(token_print, "print" => ScriptTokenType::Print),
(token_return, "return" => ScriptTokenType::Return),
(token_super, "super" => ScriptTokenType::Super),
(token_this, "this" => ScriptTokenType::This),
(token_let, "let" => ScriptTokenType::Let),
(token_while, "while" => ScriptTokenType::While),
(token_export, "export" => ScriptTokenType::Export),
(token_import, "import" => ScriptTokenType::Import),
(token_alias, "as" => ScriptTokenType::Alias),
(token_from, "from" => ScriptTokenType::From),
(token_typeof, "typeof" => ScriptTokenType::Typeof),
(token_finally, "finally" => ScriptTokenType::Finally)
);
#[cfg(test)]
mod keyword_checks {
use super::*;
use crate::lexer::Span;
use test_case::test_case;
fn s(st: &str) -> Span {
Span::new(st)
}
#[test_case(token_struct, "struct" => Ok(ScriptTokenType::Class) ; "Scan token_struct")]
#[test_case(token_else, "else" => Ok(ScriptTokenType::Else) ; "Scan token_else")]
#[test_case(token_function, "fn" => Ok(ScriptTokenType::Function) ; "Scan token_function")]
#[test_case(token_for, "for" => Ok(ScriptTokenType::For) ; "Scan token_for")]
#[test_case(token_if, "if" => Ok(ScriptTokenType::If) ; "Scan token_if")]
#[test_case(token_null, "null" => Ok(ScriptTokenType::Null) ; "Scan token_null")]
#[test_case(token_print, "print" => Ok(ScriptTokenType::Print) ; "Scan token_print")]
#[test_case(token_return, "return" => Ok(ScriptTokenType::Return) ; "Scan token_return")]
#[test_case(token_super, "super" => Ok(ScriptTokenType::Super) ; "Scan token_super")]
#[test_case(token_this, "this" => Ok(ScriptTokenType::This) ; "Scan token_this")]
#[test_case(token_let, "let" => Ok(ScriptTokenType::Let) ; "Scan token_let")]
#[test_case(token_while, "while" => Ok(ScriptTokenType::While) ; "Scan token_while")]
#[test_case(token_export, "export" => Ok(ScriptTokenType::Export) ; "Scan token_export")]
#[test_case(token_import, "import" => Ok(ScriptTokenType::Import) ; "Scan token_import")]
#[test_case(token_alias, "as" => Ok(ScriptTokenType::Alias) ; "Scan token_alias")]
#[test_case(token_from, "from" => Ok(ScriptTokenType::From) ; "Scan token_from")]
#[test_case(token_typeof, "typeof" => Ok(ScriptTokenType::Typeof) ; "Scan token_typeof")]
#[test_case(token_finally, "finally" => Ok(ScriptTokenType::Finally) ; "Scan token_finally")]
fn parse_token(
func: impl Fn(Span) -> IResult<Span, ScriptToken>,
tok: &str,
) -> Result<ScriptTokenType, nom::Err<nom::error::Error<Span>>> {
func(s(tok)).map(|t| t.1.token_type)
}
}
......@@ -7,12 +7,12 @@ mod strings;
mod tokens;
use keywords::{
token_alias, token_else, token_export, token_for, token_from, token_function, token_if,
token_import, token_let, token_null, token_print, token_return, token_struct, token_super,
token_this, token_typeof, token_while,
token_alias, token_else, token_export, token_finally, token_for, token_from, token_function,
token_if, token_import, token_let, token_null, token_print, token_return, token_struct,
token_super, token_this, token_typeof, token_while,
};
use operators::{
token_asterisk, token_bang, token_bang_equal, token_caret, token_comma, token_dot,
token_asterisk, token_bang, token_bang_equal, token_caret, token_colon, token_comma, token_dot,
token_double_ampersand, token_double_pipe, token_equal, token_equal_equal, token_greater,
token_greater_equal, token_left_brace, token_left_paren, token_less, token_less_equal,
token_minus, token_modulo, token_plus, token_right_brace, token_right_paren, token_semicolon,
......@@ -30,7 +30,6 @@ mod _lex {
use nom::branch::alt;
use nom::IResult;
use crate::lexer::keywords::token_finally;
use nom::character::complete::multispace0;
use nom::multi::fold_many0;
use nom::sequence::delimited;
......@@ -57,6 +56,15 @@ mod _lex {
token_typeof,
token_finally,
)),
// Double character tokens
alt((
token_equal_equal,
token_bang_equal,
token_less_equal,
token_greater_equal,
token_double_ampersand,
token_double_pipe,
)),
alt((
token_plus,
token_minus,
......@@ -71,19 +79,10 @@ mod _lex {
token_left_paren,
token_right_brace,
token_right_paren,
token_double_ampersand,
token_double_pipe,
token_semicolon,
token_colon,
)),
alt((
token_equal_equal,
token_bang_equal,
token_less_equal,
token_greater_equal,
token_less,
token_greater,
token_equal,
)),
alt((token_less, token_greater, token_equal)),
alt((
token_float,
token_int,
......@@ -115,12 +114,56 @@ mod _lex {
use super::*;
use test_case::test_case;
#[test_case("123" => matches Ok(ScriptTokenType::Integer(123)))]
#[test_case("0.123" => matches Ok(ScriptTokenType::Float(_)))]
#[test_case("null" => matches Ok(ScriptTokenType::Null))]
#[test_case("%" => Ok(ScriptTokenType::Modulo) ; "Expects ScriptTokenType::Modulo")]
#[test_case("^" => Ok(ScriptTokenType::Caret) ; "Expects ScriptTokenType::Caret")]
#[test_case("!" => Ok(ScriptTokenType::Bang) ; "Expects ScriptTokenType::Bang")]
#[test_case("(" => Ok(ScriptTokenType::LeftParen) ; "Expects ScriptTokenType::LeftParen")]
#[test_case(")" => Ok(ScriptTokenType::RightParen) ; "Expects ScriptTokenType::RightParen")]
#[test_case("{" => Ok(ScriptTokenType::LeftBrace) ; "Expects ScriptTokenType::LeftBrace")]
#[test_case("}" => Ok(ScriptTokenType::RightBrace) ; "Expects ScriptTokenType::RightBrace")]
#[test_case("," => Ok(ScriptTokenType::Comma) ; "Expects ScriptTokenType::Comma")]
#[test_case("." => Ok(ScriptTokenType::Dot) ; "Expects ScriptTokenType::Dot")]
#[test_case("-" => Ok(ScriptTokenType::Minus) ; "Expects ScriptTokenType::Minus")]
#[test_case("+" => Ok(ScriptTokenType::Plus) ; "Expects ScriptTokenType::Plus")]
#[test_case(";" => Ok(ScriptTokenType::Semicolon) ; "Expects ScriptTokenType::Semicolon")]
#[test_case("/" => Ok(ScriptTokenType::Slash) ; "Expects ScriptTokenType::Slash")]
#[test_case("*" => Ok(ScriptTokenType::Asterisk) ; "Expects ScriptTokenType::Asterisk")]
#[test_case("=" => Ok(ScriptTokenType::Equal) ; "Expects ScriptTokenType::Equal")]
#[test_case("!=" => Ok(ScriptTokenType::BangEqual) ; "Expects ScriptTokenType::BangEqual")]
#[test_case("==" => Ok(ScriptTokenType::EqualEqual) ; "Expects ScriptTokenType::EqualEqual")]
#[test_case(">" => Ok(ScriptTokenType::Greater) ; "Expects ScriptTokenType::Greater")]
#[test_case(">=" => Ok(ScriptTokenType::GreaterEqual) ; "Expects ScriptTokenType::GreaterEqual")]
#[test_case("&&" => Ok(ScriptTokenType::DoubleAmpersand) ; "Expects ScriptTokenType::DoubleAmpersand")]
#[test_case("||" => Ok(ScriptTokenType::DoublePipe) ; "Expects ScriptTokenType::DoublePipe")]
#[test_case("<" => Ok(ScriptTokenType::Less) ; "Expects ScriptTokenType::Less")]
#[test_case("<=" => Ok(ScriptTokenType::LessEqual) ; "Expects ScriptTokenType::LessEqual")]
#[test_case("\"Foo\"" => Ok(ScriptTokenType::OwnedString(String::from("Foo"))) ; "Expects ScriptTokenType::OwnedString")]
#[test_case("123" => Ok(ScriptTokenType::Integer(123)) ; "Expects ScriptTokenType::Integer")]
#[test_case("1_2_3" => Ok(ScriptTokenType::Integer(123)) ; "Expects ScriptTokenType::Integer underscored")]
#[test_case("123.123" => Ok(ScriptTokenType::Float(123.123)) ; "Expects ScriptTokenType::Float")]
#[test_case("12_3.1_23" => Ok(ScriptTokenType::Float(123.123)) ; "Expects ScriptTokenType::Float underscored")]
#[test_case("struct" => Ok(ScriptTokenType::Class) ; "Expects ScriptTokenType::Class")]
#[test_case("else" => Ok(ScriptTokenType::Else) ; "Expects ScriptTokenType::Else")]
#[test_case("fn" => Ok(ScriptTokenType::Function) ; "Expects ScriptTokenType::Function")]
#[test_case("for" => Ok(ScriptTokenType::For) ; "Expects ScriptTokenType::For")]
#[test_case("if" => Ok(ScriptTokenType::If) ; "Expects ScriptTokenType::If")]
#[test_case("null" => Ok(ScriptTokenType::Null) ; "Expects ScriptTokenType::Null")]
#[test_case("print" => Ok(ScriptTokenType::Print) ; "Expects ScriptTokenType::Print")]
#[test_case("return" => Ok(ScriptTokenType::Return) ; "Expects ScriptTokenType::Return")]
#[test_case("super" => Ok(ScriptTokenType::Super) ; "Expects ScriptTokenType::Super")]
#[test_case("this" => Ok(ScriptTokenType::This) ; "Expects ScriptTokenType::This")]
#[test_case("let" => Ok(ScriptTokenType::Let) ; "Expects ScriptTokenType::Let")]
#[test_case("while" => Ok(ScriptTokenType::While) ; "Expects ScriptTokenType::While")]
#[test_case("export" => Ok(ScriptTokenType::Export) ; "Expects ScriptTokenType::Export")]
#[test_case("import" => Ok(ScriptTokenType::Import) ; "Expects ScriptTokenType::Import")]
#[test_case("as" => Ok(ScriptTokenType::Alias) ; "Expects ScriptTokenType::Alias")]
#[test_case("from" => Ok(ScriptTokenType::From) ; "Expects ScriptTokenType::From")]
#[test_case("typeof" => Ok(ScriptTokenType::Typeof) ; "Expects ScriptTokenType::Typeof")]
#[test_case("finally" => Ok(ScriptTokenType::Finally) ; "Expects ScriptTokenType::Finally")]
#[test_case("foo" => Ok(ScriptTokenType::Identifier(String::from("foo"))) ; "Expects ScriptTokenType::Identifier foo")]
#[test_case("foo_bar" => Ok(ScriptTokenType::Identifier(String::from("foo_bar"))) ; "Expects ScriptTokenType::Identifier foo_bar")]
#[test_case("fo2ob4r" => Ok(ScriptTokenType::Identifier(String::from("fo2ob4r"))) ; "Expects ScriptTokenType::Identifier fo2ob4r")]
#[test_case("foobar" => matches Ok(ScriptTokenType::Identifier(_)))]
#[test_case("true" => matches Ok(ScriptTokenType::Boolean(true)))]
#[test_case("false" => matches Ok(ScriptTokenType::Boolean(false)))]
fn correct_lexing(inp: &str) -> Result<ScriptTokenType, ()> {
any_token(Span::new(inp))
.map_err(|_| ())
......
......@@ -3,324 +3,74 @@ use crate::lexer::{ScriptToken, ScriptTokenType};
use nom::bytes::complete::tag;
use nom::IResult;
use nom_locate::position;
pub fn token_left_paren(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("(")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LeftParen,
},
))
}
pub fn token_right_paren(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(")")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::RightParen,
},
))
}
pub fn token_left_brace(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("{")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LeftBrace,
},
))
}
pub fn token_right_brace(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("}")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::RightBrace,
},
))
}
pub fn token_comma(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(",")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Comma,
},
))
}
pub fn token_dot(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(".")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Dot,
},
))
}
pub fn token_minus(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("-")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Minus,
},
))
}
pub fn token_plus(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("+")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Plus,
},
))
}
pub fn token_semicolon(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(";")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Semicolon,
},
))
}
pub fn token_slash(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("/")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Slash,
},
))
}
pub fn token_asterisk(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("*")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Asterisk,
},
))
}
pub fn token_bang(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("!")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Bang,
},
))
}
pub fn token_bang_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("!=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::BangEqual,
},
))
}
pub fn token_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Equal,
},
))
}
pub fn token_equal_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("==")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::EqualEqual,
},
))
}
pub fn token_greater(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(">")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Greater,
},
))
}
pub fn token_greater_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(">=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::GreaterEqual,
},
))
}
pub fn token_less(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("<")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Less,
},
))
}
pub fn token_less_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("<=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LessEqual,
},
))
}
pub fn token_double_pipe(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("||")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::DoublePipe,
},
))
}
pub fn token_double_ampersand(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("&&")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::DoubleAmpersand,
},
))
}
pub fn token_modulo(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("%")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Modulo,
},
))
}
pub fn token_caret(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("^")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Caret,
},
))
}
use crate::parse_ops;
parse_ops!(
tag ;
(token_colon, ":" => ScriptTokenType::Colon),
(token_left_paren, "(" => ScriptTokenType::LeftParen),
(token_right_paren, ")" => ScriptTokenType::RightParen),
(token_left_brace, "{" => ScriptTokenType::LeftBrace),
(token_right_brace, "}" => ScriptTokenType::RightBrace),
(token_comma, "," => ScriptTokenType::Comma),
(token_dot, "." => ScriptTokenType::Dot),
(token_minus, "-" => ScriptTokenType::Minus),
(token_plus, "+" => ScriptTokenType::Plus),
(token_semicolon, ";" => ScriptTokenType::Semicolon),
(token_slash, "/" => ScriptTokenType::Slash),
(token_asterisk, "*" => ScriptTokenType::Asterisk),
(token_bang, "!" => ScriptTokenType::Bang),
(token_bang_equal, "!=" => ScriptTokenType::BangEqual),
(token_equal, "=" => ScriptTokenType::Equal),
(token_equal_equal, "==" => ScriptTokenType::EqualEqual),
(token_greater, ">" => ScriptTokenType::Greater),
(token_greater_equal, ">=" => ScriptTokenType::GreaterEqual),
(token_less, "<" => ScriptTokenType::Less),
(token_less_equal, "<=" => ScriptTokenType::LessEqual),
(token_double_pipe, "||" => ScriptTokenType::DoublePipe),
(token_double_ampersand, "&&" => ScriptTokenType::DoubleAmpersand),
(token_modulo, "%" => ScriptTokenType::Modulo),
(token_caret, "^" => ScriptTokenType::Caret)
);
#[cfg(test)]
mod operator_checks {
use super::*;
use crate::lexer::Span;
use test_case::test_case;
fn s(st: &str) -> Span {
Span::new(st)
}
#[test]
fn parse_brackets() {
assert_eq!(
token_left_brace(s("{"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::LeftBrace
);
assert_eq!(
token_right_brace(s("}"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::RightBrace
);
assert_eq!(
token_left_paren(s("("))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::LeftParen
);
assert_eq!(
token_right_paren(s(")"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::RightParen
);
#[test_case(token_colon, ":" => Ok(ScriptTokenType::Colon) ; "Scan token_colon")]
#[test_case(token_left_paren, "(" => Ok(ScriptTokenType::LeftParen) ; "Scan token_left_paren")]
#[test_case(token_right_paren, ")" => Ok(ScriptTokenType::RightParen) ; "Scan token_right_paren")]
#[test_case(token_left_brace, "{" => Ok(ScriptTokenType::LeftBrace) ; "Scan token_left_brace")]
#[test_case(token_right_brace, "}" => Ok(ScriptTokenType::RightBrace) ; "Scan token_right_brace")]
#[test_case(token_comma, "," => Ok(ScriptTokenType::Comma) ; "Scan token_comma")]
#[test_case(token_dot, "." => Ok(ScriptTokenType::Dot) ; "Scan token_dot")]
#[test_case(token_minus, "-" => Ok(ScriptTokenType::Minus) ; "Scan token_minus")]
#[test_case(token_plus, "+" => Ok(ScriptTokenType::Plus) ; "Scan token_plus")]
#[test_case(token_semicolon, ";" => Ok(ScriptTokenType::Semicolon) ; "Scan token_semicolon")]
#[test_case(token_slash, "/" => Ok(ScriptTokenType::Slash) ; "Scan token_slash")]
#[test_case(token_asterisk, "*" => Ok(ScriptTokenType::Asterisk) ; "Scan token_asterisk")]
#[test_case(token_bang, "!" => Ok(ScriptTokenType::Bang) ; "Scan token_bang")]
#[test_case(token_bang_equal, "!=" => Ok(ScriptTokenType::BangEqual) ; "Scan token_bang_equal")]
#[test_case(token_equal, "=" => Ok(ScriptTokenType::Equal) ; "Scan token_equal")]
#[test_case(token_equal_equal, "==" => Ok(ScriptTokenType::EqualEqual) ; "Scan token_equal_equal")]
#[test_case(token_greater, ">" => Ok(ScriptTokenType::Greater) ; "Scan token_greater")]
#[test_case(token_greater_equal, ">=" => Ok(ScriptTokenType::GreaterEqual) ; "Scan token_greater_equal")]
#[test_case(token_less, "<" => Ok(ScriptTokenType::Less) ; "Scan token_less")]
#[test_case(token_less_equal, "<=" => Ok(ScriptTokenType::LessEqual) ; "Scan token_less_equal")]
#[test_case(token_double_pipe, "||" => Ok(ScriptTokenType::DoublePipe) ; "Scan token_double_pipe")]
#[test_case(token_double_ampersand, "&&" => Ok(ScriptTokenType::DoubleAmpersand) ; "Scan token_double_ampersand")]
#[test_case(token_modulo, "%" => Ok(ScriptTokenType::Modulo) ; "Scan token_modulo")]
#[test_case(token_caret, "^" => Ok(ScriptTokenType::Caret) ; "Scan token_caret")]
fn parse_token(
func: impl Fn(Span) -> IResult<Span, ScriptToken>,
tok: &str,
) -> Result<ScriptTokenType, nom::Err<nom::error::Error<Span>>> {
func(s(tok)).map(|t| t.1.token_type)
}
}
......@@ -287,7 +287,7 @@ mod token_tests {
#[test_case("-" => Ok(ScriptTokenType::Minus); r#"Parse Minus"#)]
#[test_case("+" => Ok(ScriptTokenType::Plus); r#"Parse Plus"#)]
#[test_case(";" => Ok(ScriptTokenType::Semicolon); r#"Parse Semicolon"#)]
#[test_case(":" => Ok(ScriptTokenType::Semicolon); r#"Parse Colon"#)]
#[test_case(":" => Ok(ScriptTokenType::Colon); r#"Parse Colon"#)]
#[test_case("/" => Ok(ScriptTokenType::Slash); r#"Parse Slash"#)]
#[test_case("*" => Ok(ScriptTokenType::Asterisk); r#"Parse Asterisk"#)]
#[test_case("!" => Ok(ScriptTokenType::Bang); r#"Parse Bang"#)]
......
use crate::error::ForgeResult;
use crate::lexer::{script_to_tokens, ScriptTokenType};
use crate::parser::ast::{Expression, Program};
use crate::TokenError;
use crate::pratt::{Scanner, ScannerError};
use crate::{ForgeError, ForgeErrorKind, TokenError};
use peg::Parse;
pub type InputSpan<'a, Loc, Tok> = Result<(Loc, Tok, Loc), String>;
type ExprSpan<'a> = InputSpan<'a, usize, ScriptTokenType>;
pub type SpanSpan<'a> = Result<(usize, ScriptTokenType, usize), ScannerError>;
macro_rules! export_grammar_fn {
($name:ident = $output:ty => $part: tt) => {
pub fn $name(source: &str) -> Result<$output, String> {
let tokens = script_to_tokens(source)
.map_err(|e| format!("{}", e))?
.iter()
.map(|tok| {
Ok((
tok.position.location_offset(),
tok.token_type.clone(),
tok.position.location_offset() + tok.token_type.len(),
))
pub fn $name(source: &str) -> Result<$output, ForgeError> {
let scanner = script_to_tokens(source)?;
let scanner_iter = scanner.iter().map(|tok| {
Ok((
tok.position.location_offset(),
tok.token_type.clone(),
tok.position.location_offset() + tok.token_type.len(),
))
});
super::forge_grammar::$part::new()
.parse(scanner_iter)
.map_err(|e| {
eprintln!("{}", &e);
ForgeError::from(e)
})
.collect::<Vec<ExprSpan>>();
let value = super::forge_grammar::$part::new()
.parse::<ExprSpan, Vec<ExprSpan>>(tokens)
.map_err(|e| format!("{}", e))?;
Ok(value)
}
};
}
......@@ -39,6 +39,7 @@ mod grammar_test {
use super::{parse_expression, parse_program};
use crate::parse::ast::Expression;
use crate::parser::ast::Program;
use crate::ForgeError;
use test_case::test_case;
#[test_case("123" => matches Ok(_) ; "Parse literal number")]
......@@ -60,7 +61,8 @@ mod grammar_test {
#[test_case("fn has_no_params() {}" => matches Ok(_) ; "Declare empty params fn")]
#[test_case("fn do_some_stuff(foo = 123, bar, baz = \"My Default String\") {}" => matches Ok(_) ; "Declare complex fn")]
#[test_case("let som_value = fn do_some_stuff(foo = 123, bar, baz = \"My Default String\") {}" => matches Ok(_) ; "First class fn")]
fn expression_parsing(prog: &str) -> Result<Expression, String> {
#[test_case("if if if inner_call() { true } else { false } { true } else { false } { body() } else { fallback_body() }" => matches Ok(_) ; "totally_whack_condition")]
fn expression_parsing(prog: &str) -> Result<Expression, ForgeError> {
parse_expression(prog).map(|expr| {
dbg!(&expr);
expr
......@@ -70,7 +72,7 @@ mod grammar_test {
#[test_case("10 - 2 * 4 + 3; false; 12 + 14" => matches Ok(_) ; "Parse value expr list")]
#[test_case("10 - 2 * 4 + 3; false; 12 + 14;" => matches Ok(_) ; "Parse void expr list")]
#[test_case("10 - 2 * 4 + 3;; false; 12 + 14;;;;;" => matches Ok(_) ; "Infinite semicolons")]
fn program_parsing(prog: &str) -> Result<Program, String> {
fn program_parsing(prog: &str) -> Result<Program, ForgeError> {
parse_program(prog).map(|expr| {
dbg!(&expr);
expr
......
use crate::parser::TokenSlice;
peg::parser! {
grammar forge_parser<'a>() for TokenSlice<'a> {
use crate::parser::ast::*;
use crate::runtime::numbers::Number;
use crate::lexer::{ScriptToken, ScriptTokenType};
pub rule program() -> Program
= ex:expression_list() eof() { Program(ex) }
pub rule expression() -> Expression
= ex:value_expression() { Expression::Value(ex) }
/ ex:void_expression() { Expression::Void(ex) }
rule void_expression() -> VoidExpression
= ex:print() { VoidExpression::Print(ex) }
/ "import" "{" items:identifier_list() "}" "from" source:string_value() { VoidExpression::Import(Import { source, items }) }
/ "export" "{" items:identifier_list() "}" { VoidExpression::Export(Export { items }) }
/ e:condition_loop() { VoidExpression::ConditionLoop(e) }
#[cache_left_rec]
rule value_expression() -> ValueExpression
=
co:conditional_statement() { ValueExpression::ConditionalBlock(co) }
/ t:type_of() { ValueExpression::Typeof(t) }
// / binary_expression()
/ decl:declare_variable() { ValueExpression::DeclareIdentifier(decl) }
/ decl:declare_function() { ValueExpression::DeclareFunction(decl) }
/ name:simple_identifier() "(" params:param_list()? ")"
{ ValueExpression::FunctionCall(FunctionCall { name, params: params.unwrap_or_default() }) }
/ op:unary_operator() operand:value_expression()
{ ValueExpression::Unary { operator: op, operand: Box::new(operand) } }
/ left:value_expression() op:binary_operator() right:value_expression()
{ ValueExpression::Binary { lhs: Box::new(left), rhs: Box::new(right), operator: op } }
/ grouped()
/ ident:simple_identifier() !"(" { ValueExpression::Identifier(ident) }
/ li:literal() { ValueExpression::Literal(li) }
rule grouped() -> ValueExpression
= "(" ex:value_expression() ")"
{ ValueExpression::Grouped(GroupedExpression { inner: Box::new(ex) }) }
rule print() -> Print
= "print" ex:value_expression() { ex.into() }
rule type_of() -> TypeofValue
= "typeof" ex:value_expression() { TypeofValue(Box::new(ex)) }
rule declare_function() -> DeclareFunction
= "fn" ident:simple_identifier() "(" params:(function_param() ** ",") ")" body:block()
{ DeclareFunction { ident, params, body } }
rule function_param() -> DeclareIdent
= assign:assignment() { DeclareIdent::WithValue(assign) }
/ ident:simple_identifier() { DeclareIdent::WithoutValue(ident) }
rule assignment() -> Assignment
= ident:simple_identifier() "=" ex:value_expression() { Assignment { ident, value: Box::new(ex) } }
rule declare_variable() -> DeclareIdent
= "let" assign:assignment() { DeclareIdent::WithValue(assign) }
/ "let" ident:simple_identifier() { DeclareIdent::WithoutValue(ident) }
rule condition_loop() -> ConditionalLoop
= "while" guard:value_expression() block:block()
{ ConditionalLoop { block: GuardedBlock { guard: Box::new(guard), block }, fallback: None } }
/ "while" guard:value_expression() block:block() "finally" fallback:block()
{ ConditionalLoop { block: GuardedBlock { guard: Box::new(guard), block }, fallback: Some(fallback) } }
rule conditional_statement() -> Conditional
= blocks:(conditional_block() ++ "else") "else" fallback:block()
{ Conditional { blocks, fallback: Some(fallback) } }
/ blocks:(conditional_block() ++ "else")
{ Conditional { blocks, fallback: None } }
rule conditional_block() -> GuardedBlock
= "if" guard:value_expression() "{" block:expression_list()? "}"
{ GuardedBlock { block: block.unwrap_or_default(), guard: Box::new(guard) } }
rule block() -> ExpressionList
= "{" ex:expression_list() "}" { ex }
rule expression_list() -> ExpressionList
= ex:(expression() ** ";") term:";"? { ExpressionList { expressions: ex, is_void: term.is_some() } }
// #[cache_left_rec]
// rule precedence_expression() -> ValueExpression
// = precedence! {
// val:value_expression() { val }
// "-" z:(@) { ValueExpression::Unary { operator: UnaryOp::Negate, operand: Box::new(z) } }
// --
// x:(@) "+" y:@ { ValueExpression::Binary { operator: BinaryOp::Add, lhs: Box::new(x), rhs: Box::new(y) } }
// x:(@) "-" y:@ { ValueExpression::Binary { operator: BinaryOp::Subtract, lhs: Box::new(x), rhs: Box::new(y) } }
// --
// x:(@) "*" y:@ { ValueExpression::Binary { operator: BinaryOp::Multiply, lhs: Box::new(x), rhs: Box::new(y) } }
// x:(@) "/" y:@ { ValueExpression::Binary { operator: BinaryOp::Divide, lhs: Box::new(x), rhs: Box::new(y) } }
// x:(@) "%" y:@ { ValueExpression::Binary { operator: BinaryOp::Modulo, lhs: Box::new(x), rhs: Box::new(y) } }
// --
// x:(@) "==" y:@ { ValueExpression::Binary { operator: BinaryOp::Equals, lhs: Box::new(x), rhs: Box::new(y) } }
// x:(@) "&&" y:@ { ValueExpression::Binary { operator: BinaryOp::BoolAnd, lhs: Box::new(x), rhs: Box::new(y) } }
// x:(@) "||" y:@ { ValueExpression::Binary { operator: BinaryOp::BoolOr, lhs: Box::new(x), rhs: Box::new(y) } }
// --
// "!" z:(@) { ValueExpression::Unary { operator: UnaryOp::Not, operand: Box::new(z) } }
// --
// "(" ex:precedence_expression() ")" { ValueExpression::Grouped(GroupedExpression { inner: Box::new(ex) }) }
// }
rule binary_operator() -> BinaryOp
= "+" { BinaryOp::Add }
/ "-" { BinaryOp::Subtract }
/ "*" { BinaryOp::Multiply }
/ "/" { BinaryOp::Divide }
/ "%" { BinaryOp::Modulo }
/ "==" { BinaryOp::Equals }
/ "&&" { BinaryOp::BoolAnd }
/ "||" { BinaryOp::BoolOr }
rule unary_operator() -> UnaryOp
= "!" { UnaryOp::Not }
/ "-" { UnaryOp::Negate }
rule identifier_list() -> IdentifierList
= identifier() ++ ","
rule param_list() -> ParameterList
= value_expression() ++ ","
rule identifier() -> IdentifierNode
= id:alias_identifier() { IdentifierNode::Alias(id) }
/ id:simple_identifier() { IdentifierNode::Direct(id) }
rule alias_identifier() -> IdentifierAlias
= base:simple_identifier() "as" alias:simple_identifier() { IdentifierAlias(base.0, alias.0) }
rule simple_identifier() -> Identifier
= [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(String::from(vl)) }
rule literal() -> LiteralNode
= "true" { LiteralNode::Boolean(true) }
/ "false" { LiteralNode::Boolean(false) }
/ "null" { LiteralNode::Null }
/ [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(String::from(vl)) }
/ [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.clone()) }
/ [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) }
/ [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) }
rule string_value() -> String
= [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { String::from(vl) }
/ [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { vl.clone() }
rule eof() = ![_]
}
}
pub use forge_parser::{expression, program};
pub mod ast;
mod atoms;
mod forge_script;
mod grammar;
#[cfg(test)]
mod test_suite;
pub(crate) mod forge_grammar;
use crate::error::{ForgeError, ForgeErrorKind, ForgeResult};
use crate::print_forge_error;
pub use atoms::TokenSlice;
pub fn slice<'a>(toks: &'a [crate::lexer::ScriptToken]) -> TokenSlice<'a> {
......@@ -17,45 +14,3 @@ pub fn slice<'a>(toks: &'a [crate::lexer::ScriptToken]) -> TokenSlice<'a> {
pub use forge_script::parse_expression;
pub use forge_script::parse_program;
// pub fn parse_expression(expr: &str) -> ForgeResult<ast::Expression> {
// let tokens = crate::lexer::script_to_tokens(expr)?;
// let result = match grammar::expression(&TokenSlice(tokens.as_slice())) {
// Ok(expr) => Ok(expr),
// Err(parse_error) => {
// let bad_token = &tokens[parse_error.location];
// Err(ForgeError {
// kind: ForgeErrorKind::UnexpectedToken {
// found: bad_token.clone(),
// expected: parse_error.expected,
// },
// })
// }
// };
//
// result.map_err(|e| {
// print_forge_error(expr, &e);
// e
// })
// }
// pub fn parse_program(prog: &str) -> ForgeResult<ast::Program> {
// let tokens = crate::lexer::script_to_tokens(prog)?;
// let result = match grammar::program(&TokenSlice(tokens.as_slice())) {
// Ok(prog) => Ok(prog.clone()),
// Err(parse_error) => {
// let bad_token = &tokens[parse_error.location];
// Err(ForgeError {
// kind: ForgeErrorKind::UnexpectedToken {
// found: bad_token.clone(),
// expected: parse_error.expected,
// },
// })
// }
// };
//
// result.map_err(|e| {
// print_forge_error(prog, &e);
// e
// })
// }
......@@ -402,8 +402,6 @@ impl<'a> Scanner<'a> {
};
let ident = || Ok(ScriptTokenType::Identifier(String::from(val)));
eprintln!("Found ident {}", val);
let mut val_chars = val.chars();
match val_chars.next() {
Some('a') => self.check_ident(1, val, "s", ScriptTokenType::Alias),
......
......@@ -303,7 +303,8 @@ mod interpreter_test {
#[test]
/// When the first term in a binary op has already decided the outcome, don't evaluate the following terms
fn short_circuit() {
let prog = parse_program("if true || let bar = 123 { 9001 }").expect("Failed to parse");
let prog =
parse_program("if true || some_undefined_call() { 9001 }").expect("Failed to parse");
let mut vm = SimpleExecutor::default();
assert_eq!(vm.evaluate_program(&prog), Ok(ForgeValue::from(9001)));
assert!(vm.get_variable("bar").is_none());
......
......@@ -19,7 +19,8 @@ impl ChunkBuilder {
pub fn parse(code: &str) -> Result<Chunk, VmError> {
let mut builder = ChunkBuilder::new();
let program = parse_program(code)?; //.map_err(|fe| VmError::ast_parser(format_forge_error(code, &fe)))?;
let program =
parse_program(code).map_err(|e| VmError::chunk_parser(format_forge_error(code, &e)))?;
builder.evaluate_program(&program)?;
Ok(builder.take_chunk())
}
......
......@@ -36,6 +36,36 @@ macro_rules! deref_as {
};
}
#[macro_export]
macro_rules! parse_ops {
(tag_ws ; $(($fn_name:ident, $tok:literal => $prod:expr)),+) => {
$(pub fn $fn_name(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag_ws($tok, input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: $prod,
},
))
})+
};
($fn: expr ; $(($fn_name:ident, $tok:literal => $prod:expr)),+) => {
$(pub fn $fn_name(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = $fn($tok)(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: $prod,
},
))
})+
};
}
/// "Clone-owned", clone a struct into an owned version of that struct, which will typically be
/// a different type to the implementor. If the struct is already an owned struct then the impl
/// _should_ perform a simple clone, though the impl may choose to perform otherwise
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment