Skip to content
Snippets Groups Projects
Verified Commit c16cc5e1 authored by Louis's avatar Louis :fire:
Browse files

Fix keyword precedence, support import/export & conditional loops

parent b5a9dad9
No related branches found
No related tags found
No related merge requests found
Pipeline #488 passed with stages
in 1 minute and 23 seconds
Showing
with 62 additions and 2012 deletions
use nom::bytes::complete::tag;
use nom::character::complete::{char, one_of};
use nom::character::complete::{char, multispace0, multispace1, one_of};
use nom::combinator::value;
use nom::error::ParseError;
use nom::multi::{many0, many1};
use nom::sequence::terminated;
use nom::IResult;
use nom::sequence::{delimited, terminated};
use nom::{Compare, IResult, InputLength, InputTake};
use nom_locate::LocatedSpan;
pub type Span<'a> = LocatedSpan<&'a str>;
......@@ -33,3 +34,6 @@ pub fn raw_decimal(input: Span) -> IResult<Span, OwnedSpan> {
))
}
}
pub fn tag_ws<'a, 'b: 'a>(tag_val: &'b str, sp: Span<'a>) -> IResult<Span<'a>, Span<'a>> {
delimited(multispace0, tag(tag_val), multispace1)(sp)
}
use crate::lexer::atoms::tag_ws;
use crate::lexer::{ScriptToken, ScriptTokenType, Span};
use nom::bytes::complete::tag;
use nom::IResult;
......@@ -5,7 +6,7 @@ use nom_locate::position;
pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("struct")(input)?;
let (input, _) = tag_ws("struct", input)?;
Ok((
input,
ScriptToken {
......@@ -17,7 +18,7 @@ pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_else(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("else")(input)?;
let (input, _) = tag_ws("else", input)?;
Ok((
input,
ScriptToken {
......@@ -29,7 +30,7 @@ pub fn token_else(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_function(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("fn")(input)?;
let (input, _) = tag_ws("fn", input)?;
Ok((
input,
ScriptToken {
......@@ -41,7 +42,7 @@ pub fn token_function(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_for(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("for")(input)?;
let (input, _) = tag_ws("for", input)?;
Ok((
input,
ScriptToken {
......@@ -53,7 +54,7 @@ pub fn token_for(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_if(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("if")(input)?;
let (input, _) = tag_ws("if", input)?;
Ok((
input,
ScriptToken {
......@@ -65,7 +66,7 @@ pub fn token_if(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_null(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("null")(input)?;
let (input, _) = tag_ws("null", input)?;
Ok((
input,
ScriptToken {
......@@ -77,7 +78,7 @@ pub fn token_null(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_print(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("print")(input)?;
let (input, _) = tag_ws("print", input)?;
Ok((
input,
ScriptToken {
......@@ -89,7 +90,7 @@ pub fn token_print(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_return(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("return")(input)?;
let (input, _) = tag_ws("return", input)?;
Ok((
input,
ScriptToken {
......@@ -101,7 +102,7 @@ pub fn token_return(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_super(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("super")(input)?;
let (input, _) = tag_ws("super", input)?;
Ok((
input,
ScriptToken {
......@@ -113,7 +114,7 @@ pub fn token_super(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_this(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("this")(input)?;
let (input, _) = tag_ws("this", input)?;
Ok((
input,
ScriptToken {
......@@ -125,7 +126,7 @@ pub fn token_this(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_let(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("let")(input)?;
let (input, _) = tag_ws("let", input)?;
Ok((
input,
ScriptToken {
......@@ -137,7 +138,7 @@ pub fn token_let(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_while(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("while")(input)?;
let (input, _) = tag_ws("while", input)?;
Ok((
input,
ScriptToken {
......@@ -149,7 +150,7 @@ pub fn token_while(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_export(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("export")(input)?;
let (input, _) = tag_ws("export", input)?;
Ok((
input,
ScriptToken {
......@@ -161,7 +162,7 @@ pub fn token_export(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_import(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("import")(input)?;
let (input, _) = tag_ws("import", input)?;
Ok((
input,
ScriptToken {
......@@ -173,7 +174,7 @@ pub fn token_import(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("as")(input)?;
let (input, _) = tag_ws("as", input)?;
Ok((
input,
ScriptToken {
......@@ -185,7 +186,7 @@ pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> {
pub fn token_from(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("from")(input)?;
let (input, _) = tag_ws("from", input)?;
Ok((
input,
ScriptToken {
......
......@@ -6,9 +6,9 @@ mod strings;
mod tokens;
use keywords::{
token_alias, token_else, token_export, token_for, token_function, token_if, token_import,
token_let, token_null, token_print, token_return, token_struct, token_super, token_this,
token_while,
token_alias, token_else, token_export, token_for, token_from, token_function, token_if,
token_import, token_let, token_null, token_print, token_return, token_struct, token_super,
token_this, token_while,
};
use operators::{
token_asterisk, token_bang, token_bang_equal, token_caret, token_comma, token_dot,
......@@ -51,6 +51,7 @@ mod _lex {
token_import,
token_struct,
token_super,
token_from,
)),
alt((
token_plus,
......
......@@ -10,13 +10,13 @@ peg::parser! {
= ex:expression_list() eof() { Program(ex) }
rule expression_list() -> ExpressionList
= e:(statement() / expression())+ term:";"? { ExpressionList { expressions: e, is_void: term.is_some() } }
= e:(expression() ++ ";") term:";"? { ExpressionList { expressions: e, is_void: term.is_some() } }
rule statement() -> Expression
// Include conditional here separately from expression to allow "if" without semi
= e:conditional() { Expression::Value(ValueExpression::ConditionalBlock(e)) }
/ e:condition_loop() { Expression::Void(VoidExpression::ConditionLoop(e)) }
/ e:expression() ";" { e }
// rule statement() -> Expression
// Include conditional here separately from expression to allow "if" without semi
// = e:conditional() { Expression::Value(ValueExpression::ConditionalBlock(e)) }
// / e:condition_loop() { Expression::Void(VoidExpression::ConditionLoop(e)) }
// / e:expression() ";" { e }
pub rule expression() -> Expression
= ex:value_expression() { Expression::Value(ex) }
......@@ -24,6 +24,9 @@ peg::parser! {
rule void_expression() -> VoidExpression
= ex:print() { VoidExpression::Print(ex) }
/ "import" "{" items:identifier_list() "}" "from" source:raw_string() { VoidExpression::Import(Import { source, items }) }
/ "export" "{" items:identifier_list() "}" { VoidExpression::Export(Export { items }) }
/ e:condition_loop() { VoidExpression::ConditionLoop(e) }
#[cache_left_rec]
rule value_expression() -> ValueExpression
......@@ -104,6 +107,10 @@ peg::parser! {
/ [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) }
/ [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) }
rule raw_string() -> String
= [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { String::from(*vl) }
/ [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { vl.clone() }
rule eof() = ![_]
}
}
......
......@@ -28,3 +28,24 @@ fn conditional() {
parse_program("if 1 + 1 { }").expect("Failed conditional with expr");
parse_program("if (let ident = false) { }").expect("Failed conditional with decl");
}
#[test]
fn import_export() {
parse_program(r#"import { foo } from "core:runtime""#).expect("Failed import with one item");
parse_program(r#"import { foo, bar } from "core:runtime""#)
.expect("Failed import with multiple items");
parse_program(r#"import { foo as baz, bar } from "core:runtime""#)
.expect("Failed import with multiple items and alias");
parse_program(r#"import { foo as qwert, bar as asdf } from "core:runtime""#)
.expect("Failed import with multiple alias");
parse_program(r#"export { foo, bar }"#).expect("Failed export with multiple items");
parse_program(r#"export { foo as qrweas, bar }"#)
.expect("Failed export with multiple items and alias");
}
#[test]
fn expression_list() {
parse_program("foo; bar").expect("Failed simple expression list");
parse_program("if flop { 123 + 2323 } else { let boo = false }; let glob = \"swarmp\"")
.expect("Failed simple expression list");
}
pub mod moka_script;
mod parser;
mod runtime;
use micro_script::moka_script::{RunScriptError, ScriptExitCode};
fn main() {
let mut args = std::env::args();
if args.len() > 2 {
eprintln!("Usage: mscr [script]");
std::process::exit(ScriptExitCode::BadCliArgs as i32);
} else if let Some(arg) = args.nth(1) {
vm_shim::run_file(arg);
} else {
vm_shim::run_repl();
}
}
mod vm_shim {
use micro_script::moka_script;
use micro_script::moka_script::ScriptExitCode;
use std::io::{BufRead, Read};
use std::path::PathBuf;
pub fn run_file(file_path: impl Into<PathBuf>) {
let file = std::fs::File::open(file_path.into());
if let Ok(mut file) = file {
let mut buff = String::new();
file.read_to_string(&mut buff).expect("Failed to read file");
if let Err(e) = moka_script::ms_run_script(buff) {
std::process::exit(ScriptExitCode::RunFileError as i32)
}
}
}
pub fn run_repl() {
let stdin = std::io::stdin().lock();
for line in stdin.lines() {
match line {
Ok(contents) => match contents.as_str() {
".exit" => break,
other => {
if let Err(e) = moka_script::ms_run_script(other.into()) {
eprintln!("{}", e);
std::process::exit(ScriptExitCode::BadReplError as i32);
}
}
},
Err(e) => {
eprintln!("Err! {}", e);
break;
}
}
}
}
}
use crate::parser::{lex_script, parse_tokens, ScriptToken, TokenSlice};
use std::error::Error;
use std::fmt::{Debug, Display, Formatter};
#[derive(Debug)]
#[repr(C)]
pub enum ScriptExitCode {
BadCliArgs = 64,
RunFileError = 65,
BadReplError = 66,
}
#[derive(Debug, Clone)]
pub struct RunScriptError {
line: usize,
location: String,
message: String,
}
impl RunScriptError {
pub fn message(line: usize, message: impl ToString) -> RunScriptError {
RunScriptError {
line,
location: String::new(),
message: message.to_string(),
}
}
pub fn line(line: usize) -> RunScriptError {
RunScriptError {
line,
location: String::new(),
message: String::from("Unknown"),
}
}
}
impl Display for RunScriptError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[Line {}] Error {}: {}",
self.line, self.location, self.message
)
}
}
impl Error for RunScriptError {}
pub fn ms_run_script(source: String) -> Result<(), RunScriptError> {
let tokens = lex_script(source.as_str()).expect("Fuck");
let slice = TokenSlice(tokens.as_slice());
let program = match parse_tokens(&slice) {
Ok(val) => val,
Err(e) => {
let (line, column) = e.get_error_location();
let previous_line = if line > 1 {
source.lines().nth(line - 2)
} else {
None
};
let source_line = source.lines().nth(line - 1).expect("Missing line");
let next_line = source.lines().nth(line);
let largest_line_num = line.max(line.saturating_sub(1)).max(line.saturating_add(1));
let number_length = format!("{}", largest_line_num).len();
eprintln!("| Script error on line {} at \"{}\"\n|", line, e.token);
if let Some(prev) = previous_line {
eprintln!("| [{:>width$}] {}", line - 1, prev, width = number_length);
}
eprintln!(
"| [{:>width$}] {}",
line,
source_line,
width = number_length
);
eprintln!(
"| {} {}{}",
vec![" "; number_length + 2].join(""),
vec![" "; column - 1].join(""),
vec!["^"; e.token.token_type.len()].join(""),
);
if let Some(next) = next_line {
eprintln!("| [{:>width$}] {}", line + 1, next, width = number_length);
}
eprintln!("|\n| Failed To Parse: {}", e.kind);
return Err(RunScriptError::message(line, "Failed to parse"));
}
};
#[cfg(feature = "debug-ast")]
{
println!("\n{:?}\n", tokens);
for expr in program.0.iter() {
println!("{:?}", expr);
}
}
Ok(())
}
program ::= expression_list
block ::= "{" expression_list? "}"
expression_list ::= expression (";" expression) ";"?
expression ::= value_expression
| void_expression
void_expression ::= condition_loop | import | export | block | print
value_expression ::= unary_operator value_expression
| value_expression binary_operator value_expression
| literal
| conditional
| declare_ident
| assignment
print ::= "print" value_expression ";"
condition_loop ::= "while" value_expression block
conditional ::= "if" value_expression block "else" conditional
| "if" value_expression block "else" block
| "if" value_expression block
declare_func ::= "fn" identifier "(" ")" block
declare_ident ::= "let" assignment
| "let" identifier
assignment ::= identifier "=" value_expression
export ::= "export" identifier_list
import ::= "import" identifier_list "from" string
identifier_list ::= "{" identifier ("as" identifier)? ("," identifier ("as" identifier)?)* "}"
binary_operator ::= "*" | "/" | "+" | "-" | "%" | "^"
| "&&" | "||"
| "==" | "!=" | "<" | "<=" | ">" | ">="
unary_operator ::= "-" | "!"
identifier ::= ALPHA ALPHANUM*
| "_" ALPHA ALPHANUM*
literal ::= integer | float | string | boolean | null
boolean ::= "true" | "false"
null ::= "null"
string ::= '"' ANY_NON_UNESCAPED_QUOTE_TOKEN* '"'
integer ::= "-"? DIGIT ("_"? DIGIT)*
float ::= "-"? integer? "." integer
| "-"? integer "." integer?
\ No newline at end of file
use crate::runtime::Number;
use std::ops::Deref;
pub trait AstNode {}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Program<'a>(pub ExpressionList<'a>);
impl<'a> AstNode for Program<'a> {}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct ExpressionList<'a> {
pub expressions: Vec<Expression<'a>>,
pub is_void: bool,
}
impl<'a> ExpressionList<'a> {
pub fn production(expressions: Vec<Expression<'a>>) -> Self {
ExpressionList {
expressions,
is_void: false,
}
}
pub fn voided(expressions: Vec<Expression<'a>>) -> Self {
ExpressionList {
expressions,
is_void: true,
}
}
}
impl<'a> Deref for ExpressionList<'a> {
type Target = Vec<Expression<'a>>;
fn deref(&self) -> &Self::Target {
&self.expressions
}
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum Expression<'a> {
Value(ValueExpression<'a>),
Void(VoidExpression<'a>),
}
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum UnaryOp {
Not,
Negate,
}
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum BinaryOp {
Add,
Subtract,
Multiply,
Divide,
Modulo,
Equals,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum VoidExpression<'a> {
ConditionLoop(ConditionalLoop<'a>),
Import(Import<'a>),
Export(Export<'a>),
Print(Print<'a>),
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum ValueExpression<'a> {
Unary {
operator: UnaryOp,
operand: Box<ValueExpression<'a>>,
},
Binary {
lhs: Box<ValueExpression<'a>>,
rhs: Box<ValueExpression<'a>>,
operator: BinaryOp,
},
Block(ExpressionList<'a>),
Literal(LiteralNode<'a>),
DeclareIdentifier(DeclareIdent<'a>),
Assignment(Assignment<'a>),
ConditionalBlock(Conditional<'a>),
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct ConditionalLoop<'a> {
pub block: GuardedBlock<'a>,
pub fallback: Option<ExpressionList<'a>>,
}
impl<'a> ConditionalLoop<'a> {
pub fn expr_while(block: GuardedBlock<'a>) -> Self {
Self {
block,
fallback: None,
}
}
pub fn expr_while_else(block: GuardedBlock<'a>, fallback: ExpressionList<'a>) -> Self {
Self {
block,
fallback: Some(fallback),
}
}
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Conditional<'a> {
pub blocks: Vec<GuardedBlock<'a>>,
pub fallback: Option<ExpressionList<'a>>,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct GuardedBlock<'a> {
pub guard: Box<ValueExpression<'a>>,
pub block: ExpressionList<'a>,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Import<'a> {
pub source: &'a str,
pub items: IdentifierList<'a>,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Export<'a> {
pub items: IdentifierList<'a>,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Print<'a> {
pub expr: Box<ValueExpression<'a>>,
}
impl<'a> From<ValueExpression<'a>> for Print<'a> {
fn from(value: ValueExpression<'a>) -> Self {
Print {
expr: Box::new(value),
}
}
}
pub type IdentifierList<'a> = Vec<IdentifierNode<'a>>;
pub type ParameterList<'a> = Vec<IdentifierNode<'a>>;
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Identifier<'a>(pub &'a str);
/// Alias an identifier, to create a new way of referring to it
/// IdentifierAlias(original, alias) => identifier "as" alias
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct IdentifierAlias<'a>(pub &'a str, pub &'a str);
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum IdentifierNode<'a> {
Direct(Identifier<'a>),
Alias(IdentifierAlias<'a>),
}
impl<'a> IdentifierNode<'a> {
pub fn get_name(&'a self) -> &'a str {
match self {
Self::Direct(value) => value.0,
Self::Alias(value) => value.1,
}
}
pub fn get_base(&'a self) -> &'a str {
match self {
Self::Direct(value) => value.0,
Self::Alias(value) => value.0,
}
}
}
#[derive(Copy, Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum LiteralNode<'a> {
Number(Number),
String(&'a str),
Boolean(bool),
Null,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub struct Assignment<'a> {
pub ident: Identifier<'a>,
pub value: Box<ValueExpression<'a>>,
}
#[derive(Clone)]
#[cfg_attr(feature = "debug-ast", derive(Debug))]
pub enum DeclareIdent<'a> {
WithValue(Assignment<'a>),
WithoutValue(Identifier<'a>),
}
use nom::bytes::complete::tag;
use nom::character::complete::{char, one_of};
use nom::combinator::value;
use nom::multi::{many0, many1};
use nom::sequence::terminated;
use nom::IResult;
use nom_locate::LocatedSpan;
pub type Span<'a> = LocatedSpan<&'a str>;
pub type OwnedSpan<'a> = LocatedSpan<String>;
pub fn raw_true(input: Span) -> IResult<Span, bool> {
value(true, tag("true"))(input)
}
pub fn raw_false(input: Span) -> IResult<Span, bool> {
value(false, tag("false"))(input)
}
pub fn raw_decimal(input: Span) -> IResult<Span, OwnedSpan> {
let input_offset = input.location_offset();
let input_line = input.location_line();
let (input, list) = many1(terminated(one_of("0123456789"), many0(char('_'))))(input)?;
let string = list.iter().fold(String::with_capacity(list.len()), |a, b| {
format!("{}{}", a, b)
});
unsafe {
Ok((
input,
OwnedSpan::new_from_raw_offset(input_offset, input_line, string, ()),
))
}
}
use crate::parser::ast::Program;
use crate::parser::{ScriptToken, ScriptTokenType};
use peg::error::ExpectedSet;
use peg::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult};
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::ops::{Deref, DerefMut};
#[derive(Clone, Debug)]
pub enum ParseErrorKind {
Unexpected(ExpectedSet),
}
impl Display for ParseErrorKind {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unexpected(set) => write!(f, "expected {}", set),
}
}
}
#[derive(Clone, Debug)]
pub struct ParseError<'a> {
pub token: &'a ScriptToken<'a>,
pub kind: ParseErrorKind,
}
impl<'a> ParseError<'a> {
pub fn get_error_location(&self) -> (usize, usize) {
(
self.token.position.location_line() as usize,
self.token.position.get_column(),
)
}
}
impl<'a> Display for ParseError<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"Failed to parse at [{}, {}]: {}. {}",
self.token.position.location_line(),
self.token.position.get_column(),
self.token.position.fragment(),
self.kind,
)
}
}
impl<'a> Error for ParseError<'a> {}
pub fn parse_tokens<'a>(list: &'a TokenSlice) -> Result<Program<'a>, ParseError<'a>> {
match script_parser::program(list) {
Ok(prog) => Ok(prog),
Err(e) => {
let bad_token = &list[e.location];
Err(ParseError {
token: bad_token,
kind: ParseErrorKind::Unexpected(e.expected),
})
}
}
}
use crate::parser::atoms::Span;
use crate::parser::{ScriptToken, ScriptTokenType};
use nom::bytes::complete::tag;
use nom::IResult;
use nom_locate::position;
pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("struct")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Class,
},
))
}
pub fn token_else(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("else")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Else,
},
))
}
pub fn token_function(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("fn")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Function,
},
))
}
pub fn token_for(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("for")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::For,
},
))
}
pub fn token_if(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("if")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::If,
},
))
}
pub fn token_null(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("null")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Null,
},
))
}
pub fn token_print(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("print")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Print,
},
))
}
pub fn token_return(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("return")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Return,
},
))
}
pub fn token_super(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("super")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Super,
},
))
}
pub fn token_this(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("this")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::This,
},
))
}
pub fn token_let(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("let")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Let,
},
))
}
pub fn token_while(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("while")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::While,
},
))
}
pub fn token_export(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("export")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Export,
},
))
}
pub fn token_import(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("import")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Import,
},
))
}
pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("as")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Alias,
},
))
}
pub fn token_from(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("from")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::From,
},
))
}
use crate::parser::atoms::Span;
use crate::parser::ScriptToken;
use nom::branch::alt;
use nom::character::complete::multispace0;
use nom::error::ErrorKind;
use nom::multi::fold_many0;
use nom::sequence::delimited;
use nom::IResult;
use nom_locate::LocatedSpan;
use std::error::Error;
use std::fmt::{Display, Formatter};
use crate::parser::token_parser::{
token_alias, token_asterisk, token_bang, token_bang_equal, token_boolean, token_caret,
token_comma, token_dot, token_double_ampersand, token_double_pipe, token_else, token_equal,
token_equal_equal, token_export, token_float, token_for, token_function, token_greater,
token_greater_equal, token_ident, token_if, token_import, token_int, token_left_brace,
token_left_paren, token_less, token_less_equal, token_let, token_minus, token_modulo,
token_null, token_plus, token_print, token_return, token_right_brace, token_right_paren,
token_semicolon, token_slash, token_string, token_struct, token_super, token_this, token_while,
};
pub fn any_token(input: Span) -> IResult<Span, ScriptToken> {
alt((
alt((
token_if,
token_function,
token_alias,
token_for,
token_let,
token_else,
token_this,
token_null,
token_while,
token_return,
token_print,
token_export,
token_import,
token_struct,
token_super,
)),
alt((
token_plus,
token_minus,
token_asterisk,
token_slash,
token_bang,
token_comma,
token_dot,
token_caret,
token_modulo,
token_left_brace,
token_left_paren,
token_right_brace,
token_right_paren,
token_double_ampersand,
token_double_pipe,
token_semicolon,
)),
alt((
token_less,
token_greater,
token_equal,
token_equal_equal,
token_bang_equal,
token_less_equal,
token_greater_equal,
)),
alt((
token_float,
token_int,
token_boolean,
token_string,
token_ident,
)),
))(input)
}
pub fn token_list(input: Span) -> IResult<Span, Vec<ScriptToken>> {
let (span, list) = fold_many0(
delimited(multispace0, any_token, multispace0),
Vec::new,
|mut list, tok| {
list.push(tok);
list
},
)(input)?;
Ok((span, list))
}
#[derive(Debug)]
pub struct LexError<'a> {
pub inner: nom::error::Error<Span<'a>>,
}
impl<'a> LexError<'a> {
pub fn get_error_location(&self) -> (usize, usize) {
(
self.inner.input.location_line() as usize,
self.inner.input.get_column(),
)
}
}
impl<'a> Display for LexError<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.inner)
}
}
impl<'a> Error for LexError<'a> {}
impl<'a> From<nom::Err<nom::error::Error<Span<'a>>>> for LexError<'a> {
fn from(value: nom::Err<nom::error::Error<Span<'a>>>) -> Self {
match value {
nom::Err::Error(err) => Self { inner: err },
nom::Err::Failure(err) => Self { inner: err },
nom::Err::Incomplete(_) => Self {
inner: nom::error::Error::new(
LocatedSpan::new("<incomplete program>"),
ErrorKind::Alt,
),
},
}
}
}
pub fn lex_script<'a, 'b: 'a>(script: &'b str) -> Result<Vec<ScriptToken<'a>>, LexError> {
let script_span = Span::new(script);
let tokens = token_list(script_span).map_err(LexError::from)?;
Ok(tokens.1)
}
#[cfg(test)]
mod lexing_test {
use crate::parser::lexer::lex_script;
use crate::parser::{ScriptToken, ScriptTokenType};
#[test]
fn simple_maths() {
let tokens = lex_script("12 + 21").expect("Failed to lex");
assert_eq!(tokens.len(), 3);
assert_eq!(tokens[0].token_type, ScriptTokenType::Integer(12));
assert_eq!(tokens[1].token_type, ScriptTokenType::Plus);
assert_eq!(tokens[2].token_type, ScriptTokenType::Integer(21));
}
#[test]
fn multiline() {
let script = r#"
"Foo"; "Bar";
12 + 21
"#;
let tokens = lex_script(script).expect("Failed to lex");
assert_eq!(tokens.len(), 7);
assert_eq!(
tokens[0].token_type,
ScriptTokenType::OwnedString(String::from("Foo"))
);
assert_eq!(tokens[1].token_type, ScriptTokenType::Semicolon);
assert_eq!(
tokens[2].token_type,
ScriptTokenType::OwnedString(String::from("Bar"))
);
assert_eq!(tokens[3].token_type, ScriptTokenType::Semicolon);
assert_eq!(tokens[4].token_type, ScriptTokenType::Integer(12));
assert_eq!(tokens[5].token_type, ScriptTokenType::Plus);
assert_eq!(tokens[6].token_type, ScriptTokenType::Integer(21));
}
}
mod ast;
mod atoms;
mod grammar;
mod keywords;
mod lexer;
mod operators;
mod primitives;
mod strings;
mod tokens;
pub mod token_parser {
pub use super::keywords::{
token_alias, token_else, token_export, token_for, token_function, token_if, token_import,
token_let, token_null, token_print, token_return, token_struct, token_super, token_this,
token_while,
};
pub use super::operators::{
token_asterisk, token_bang, token_bang_equal, token_caret, token_comma, token_dot,
token_double_ampersand, token_double_pipe, token_equal, token_equal_equal, token_greater,
token_greater_equal, token_left_brace, token_left_paren, token_less, token_less_equal,
token_minus, token_modulo, token_plus, token_right_brace, token_right_paren,
token_semicolon, token_slash,
};
pub use super::primitives::{token_boolean, token_float, token_ident, token_int};
pub use super::strings::token_string;
}
pub use grammar::{parse_tokens, TokenSlice};
pub use lexer::lex_script;
pub use tokens::{ScriptToken, ScriptTokenType};
#[cfg(test)]
#[macro_export]
macro_rules! map_result {
($val: expr, $with: expr) => {
match $val {
Ok((remainder, token)) => $with(remainder, token),
Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"),
Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => {
panic!(
"At [{}:{}]: {}; Value ||{}||",
&err.input.location_line(),
&err.input.get_column(),
&err.code.description(),
&err.input
);
}
}
};
($val: expr, $with: expr, $printable: expr) => {
match $val {
Ok((remainder, token)) => $with(remainder, token),
Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"),
Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => {
panic!(
"At [{}:{}]: {}; Value {}",
&err.input.location_line(),
&err.input.get_column(),
&err.code.description(),
$printable
);
}
}
};
}
use crate::parser::atoms::Span;
use crate::parser::{ScriptToken, ScriptTokenType};
use nom::bytes::complete::tag;
use nom::IResult;
use nom_locate::position;
pub fn token_left_paren(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("(")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LeftParen,
},
))
}
pub fn token_right_paren(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(")")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::RightParen,
},
))
}
pub fn token_left_brace(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("{")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LeftBrace,
},
))
}
pub fn token_right_brace(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("}")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::RightBrace,
},
))
}
pub fn token_comma(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(",")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Comma,
},
))
}
pub fn token_dot(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(".")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Dot,
},
))
}
pub fn token_minus(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("-")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Minus,
},
))
}
pub fn token_plus(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("+")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Plus,
},
))
}
pub fn token_semicolon(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(";")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Semicolon,
},
))
}
pub fn token_slash(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("/")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Slash,
},
))
}
pub fn token_asterisk(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("*")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Asterisk,
},
))
}
pub fn token_bang(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("!")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Bang,
},
))
}
pub fn token_bang_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("!=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::BangEqual,
},
))
}
pub fn token_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Equal,
},
))
}
pub fn token_equal_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("==")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::EqualEqual,
},
))
}
pub fn token_greater(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(">")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Greater,
},
))
}
pub fn token_greater_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag(">=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::GreaterEqual,
},
))
}
pub fn token_less(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("<")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Less,
},
))
}
pub fn token_less_equal(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("<=")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::LessEqual,
},
))
}
pub fn token_double_pipe(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("||")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::DoublePipe,
},
))
}
pub fn token_double_ampersand(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("&&")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::DoubleAmpersand,
},
))
}
pub fn token_modulo(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("%")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Modulo,
},
))
}
pub fn token_caret(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, _) = tag("^")(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Caret,
},
))
}
#[cfg(test)]
mod operator_checks {
use super::*;
use crate::parser::atoms::Span;
fn s(st: &str) -> Span {
Span::new(st)
}
#[test]
fn parse_brackets() {
assert_eq!(
token_left_brace(s("{"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::LeftBrace
);
assert_eq!(
token_right_brace(s("}"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::RightBrace
);
assert_eq!(
token_left_paren(s("("))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::LeftParen
);
assert_eq!(
token_right_paren(s(")"))
.expect("Failed to parse")
.1
.token_type,
ScriptTokenType::RightParen
);
}
}
use crate::parser::atoms::{raw_decimal, raw_false, raw_true, Span};
use crate::parser::tokens::{ScriptToken, ScriptTokenType};
use nom::branch::alt;
use nom::bytes::complete::{escaped, is_not, tag};
use nom::character::complete::{alpha1, alphanumeric1, char, one_of};
use nom::combinator::{opt, recognize};
use nom::multi::many0_count;
use nom::sequence::{delimited, pair, separated_pair};
use nom::{error_position, IResult};
use nom_locate::position;
pub fn token_ident(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, value) = recognize(pair(
alt((alpha1, tag("_"))),
many0_count(alt((alphanumeric1, tag("_")))),
))(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Identifier(value.fragment()),
},
))
}
pub fn token_int(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, sign) = opt(one_of("+-"))(input)?;
let (input, value) = raw_decimal(input)?;
format!("{}{}", sign.map(String::from).unwrap_or_default(), value)
.parse::<i64>()
.map(|value| {
(
input,
ScriptToken {
token_type: ScriptTokenType::Integer(value),
position: pos,
},
)
})
.map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit)))
}
pub fn token_float(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, sign) = opt(one_of("+-"))(input)?;
let (input, (before, after)) =
separated_pair(opt(raw_decimal), tag("."), opt(raw_decimal))(input)?;
let formatted_number = format!(
"{}{}.{}",
sign.map(String::from).unwrap_or_default(),
before
.map(|s| s.fragment().to_owned())
.unwrap_or_else(|| String::from("0")),
after
.map(|s| s.fragment().to_owned())
.unwrap_or_else(|| String::from("0"))
);
formatted_number
.parse::<f64>()
.map(|value| {
(
input,
ScriptToken {
token_type: ScriptTokenType::Float(value),
position: pos,
},
)
})
.map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit)))
}
pub fn token_boolean(input: Span) -> IResult<Span, ScriptToken> {
let (input, pos) = position(input)?;
let (input, value) = alt((raw_true, raw_false))(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::Boolean(value),
},
))
}
#[cfg(test)]
mod parsing_tests {
use super::*;
use crate::map_result;
#[test]
fn parse_integer() {
let positive_cases = [
("1234", 1234),
("-1234", -1234),
("0", 0),
("1_000_000", 1000000),
("-12_34", -1234),
];
for (program, expected) in positive_cases {
map_result!(token_int(Span::new(program)), |_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::Integer(expected))
});
}
}
#[test]
fn parse_floats() {
let positive_cases = [
("12.34", 12.34),
("-12.34", -12.34),
("0.", 0.),
(".0", 0.),
(".0.", 0.),
(".0.1.2", 0.),
("1_000_000.1_23", 1000000.123),
("-12_34.0_0_0", -1234.0),
];
for (program, expected) in positive_cases {
map_result!(token_float(Span::new(program)), |_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::Float(expected))
});
}
}
#[test]
fn parse_bools() {
let positive_cases = [("true", true), ("false", false)];
for (program, expected) in positive_cases {
map_result!(
token_boolean(Span::new(program)),
|_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::Boolean(expected))
}
);
}
}
#[test]
fn parse_identifier() {
let positive_cases = ["BarBaz", "Foo", "foo", "foasd123", "_adad"];
for expected in positive_cases {
map_result!(token_ident(Span::new(expected)), |_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::Identifier(expected))
});
}
}
}
// parser combinators are constructed from the bottom up:
// first we write parsers for the smallest elements (escaped characters),
// then combine them into larger parsers.
use crate::parser::atoms::Span;
use crate::parser::{ScriptToken, ScriptTokenType};
use nom::branch::alt;
use nom::bytes::complete::{is_not, take_while_m_n};
use nom::character::complete::{char as p_char, multispace1, one_of};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::multi::fold_many1;
use nom::sequence::{delimited, preceded};
use nom::IResult;
use nom_locate::position;
/// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6
/// hexadecimal numerals. We will combine this later with parse_escaped_char
/// to parse sequences like \u{00AC}.
fn parse_unicode(input: Span) -> IResult<Span, char> {
// `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match
// a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals.
let parse_hex = take_while_m_n(1, 6, |ch: char| ch.is_ascii_hexdigit());
// `preceded` takes a prefix parser, and if it succeeds, returns the result
// of the body parser. In this case, it parses u{XXXX}.
let parse_delimited_hex = preceded(
p_char::<Span, nom::error::Error<Span>>('u'),
// `delimited` is like `preceded`, but it parses both a prefix and a suffix.
// It returns the result of the middle parser. In this case, it parses
// {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX
delimited(p_char('{'), parse_hex, p_char('}')),
);
// `map_res` takes the result of a parser and applies a function that returns
// a Result. In this case we take the hex bytes from parse_hex and attempt to
// convert them to a u32.
let parse_u32 = map_res(parse_delimited_hex, move |hex| {
u32::from_str_radix(hex.fragment(), 16)
});
// map_opt is like map_res, but it takes an Option instead of a Result. If
// the function returns None, map_opt returns an error. In this case, because
// not all u32 values are valid unicode code points, we have to fallibly
// convert to p_char with from_u32.
let (span, char) = map_opt(parse_u32, move |val| char::from_u32(val))(input)?;
Ok((span, char))
}
/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc.
fn parse_escaped_char(input: Span) -> IResult<Span, char> {
preceded(
p_char('\\'),
// `alt` tries each parser in sequence, returning the result of
// the first successful match
alt((
parse_unicode,
// The `value` parser returns a fixed value (the first argument) if its
// parser (the second argument) succeeds. In these cases, it looks for
// the marker characters (n, r, t, etc) and returns the matching
// character (\n, \r, \t, etc).
value('\n', p_char('n')),
value('\r', p_char('r')),
value('\t', p_char('t')),
value('\u{08}', p_char('b')),
value('\u{0C}', p_char('f')),
value('\\', p_char('\\')),
value('/', p_char('/')),
value('"', p_char('"')),
)),
)(input)
}
/// Parse a backslash, followed by any amount of whitespace. This is used later
/// to discard any escaped whitespace.
fn parse_escaped_whitespace(input: Span) -> IResult<Span, Span> {
preceded(p_char('\\'), multispace1)(input)
}
/// Parse a non-empty block of text that doesn't include \ or "
fn parse_literal(input: Span) -> IResult<Span, Span> {
// `is_not` parses a string of 0 or more characters that aren't one of the
// given characters.
let not_quote_slash = is_not("\"\\");
// `verify` runs a parser, then runs a verification function on the output of
// the parser. The verification function accepts out output only if it
// returns true. In this case, we want to ensure that the output of is_not
// is non-empty.
verify(not_quote_slash, |s: &Span| !s.fragment().is_empty())(input)
}
/// A string fragment contains a fragment of a string being parsed: either
/// a non-empty Literal (a series of non-escaped characters), a single
/// parsed escaped character, or a block of escaped whitespace.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
}
/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
/// into a StringFragment.
fn parse_fragment(input: Span) -> IResult<Span, StringFragment> {
alt((
map(parse_literal, |sp: Span| {
StringFragment::Literal(sp.fragment())
}),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
))(input)
}
/// Parse a string. Use a loop of parse_fragment and push all of the fragments
/// into an output string.
pub fn token_string(input: Span) -> IResult<Span, ScriptToken> {
// fold is the equivalent of iterator::fold. It runs a parser in a loop,
// and for each output value, calls a folding function on each output value.
let build_string = fold_many1(
// Our parser function– parses a single string fragment
parse_fragment,
// Our init value, an empty string
String::new,
// Our folding function. For each fragment, append the fragment to the
// string.
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
}
string
},
);
let (input, pos) = position(input)?;
let (input, value) = delimited(p_char('"'), build_string, p_char('"'))(input)?;
Ok((
input,
ScriptToken {
position: pos,
token_type: ScriptTokenType::OwnedString(value),
},
))
}
#[cfg(test)]
mod string_test {
use super::token_string;
use crate::map_result;
use crate::parser::atoms::Span;
use crate::parser::{ScriptToken, ScriptTokenType};
#[test]
fn parse_escaped_string() {
let positive_cases = [
(
r#""This is an escaped String""#,
String::from("This is an escaped String"),
),
(
r#""This is an \"escaped\" String""#,
String::from("This is an \"escaped\" String"),
),
(
r#""Many whitespaces can be collapsed with a slash \
and they won't matter""#,
String::from(
"Many whitespaces can be collapsed with a slash and they won't matter",
),
),
(
r#""Big whitespace preserved can be collapsed with a slash
and they won't matter""#,
String::from(
r#"Big whitespace preserved can be collapsed with a slash
and they won't matter"#,
),
),
];
for (program, expected) in positive_cases {
map_result!(
token_string(Span::new(program)),
|_, token: ScriptToken| {
assert_eq!(token.token_type, ScriptTokenType::OwnedString(expected))
},
program
);
}
}
}
use crate::parser::atoms::Span;
use std::error::Error;
use std::fmt::{format, Debug, Display, Formatter};
#[derive(PartialEq, Clone, Debug)]
pub enum ScriptTokenType<'a> {
// Structural Tokens
LeftParen,
RightParen,
LeftBrace,
RightBrace,
Comma,
Dot,
Semicolon,
// Unary Operators
Bang,
Minus,
// Binary Operators
Asterisk,
Slash,
Plus,
BangEqual,
Equal,
EqualEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
DoublePipe,
DoubleAmpersand,
Modulo,
Caret,
// Literals
Identifier(&'a str),
String(&'a str),
OwnedString(String),
Integer(i64),
Float(f64),
Boolean(bool),
// Keywords
Class,
Else,
Function,
For,
If,
Null,
Print,
Return,
Super,
This,
Let,
While,
Export,
Import,
Alias,
From,
// Misc
Eof,
}
impl<'a> ScriptTokenType<'a> {
pub fn len(&self) -> usize {
match self {
ScriptTokenType::LeftParen => 1,
ScriptTokenType::RightParen => 1,
ScriptTokenType::LeftBrace => 2,
ScriptTokenType::RightBrace => 2,
ScriptTokenType::Comma => 1,
ScriptTokenType::Dot => 1,
ScriptTokenType::Minus => 1,
ScriptTokenType::Plus => 1,
ScriptTokenType::Semicolon => 1,
ScriptTokenType::Slash => 1,
ScriptTokenType::Asterisk => 1,
ScriptTokenType::Bang => 1,
ScriptTokenType::BangEqual => 2,
ScriptTokenType::Equal => 1,
ScriptTokenType::EqualEqual => 2,
ScriptTokenType::Greater => 1,
ScriptTokenType::GreaterEqual => 2,
ScriptTokenType::Less => 1,
ScriptTokenType::LessEqual => 2,
ScriptTokenType::DoublePipe => 2,
ScriptTokenType::DoubleAmpersand => 2,
ScriptTokenType::Modulo => 1,
ScriptTokenType::Caret => 1,
ScriptTokenType::Identifier(value) => value.len(),
ScriptTokenType::String(value) => value.len() + 2,
ScriptTokenType::OwnedString(value) => value.len() + 2,
ScriptTokenType::Integer(value) => format!("{}", value).len(),
ScriptTokenType::Float(value) => format!("{}", value).len(),
ScriptTokenType::Boolean(value) => {
if *value {
4
} else {
5
}
}
ScriptTokenType::Class => 6,
ScriptTokenType::Else => 4,
ScriptTokenType::Function => 2,
ScriptTokenType::For => 3,
ScriptTokenType::If => 2,
ScriptTokenType::Null => 4,
ScriptTokenType::Print => 5,
ScriptTokenType::Return => 6,
ScriptTokenType::Super => 5,
ScriptTokenType::This => 4,
ScriptTokenType::Let => 3,
ScriptTokenType::While => 5,
ScriptTokenType::Export => 6,
ScriptTokenType::Import => 6,
ScriptTokenType::Alias => 2,
ScriptTokenType::From => 4,
ScriptTokenType::Eof => 0,
}
}
}
impl<'a> Display for ScriptTokenType<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
ScriptTokenType::LeftParen => write!(f, "("),
ScriptTokenType::RightParen => write!(f, ")"),
ScriptTokenType::LeftBrace => write!(f, "{{"),
ScriptTokenType::RightBrace => write!(f, "}}"),
ScriptTokenType::Comma => write!(f, ","),
ScriptTokenType::Dot => write!(f, "."),
ScriptTokenType::Minus => write!(f, "-"),
ScriptTokenType::Plus => write!(f, "+"),
ScriptTokenType::Semicolon => write!(f, ";"),
ScriptTokenType::Slash => write!(f, "/"),
ScriptTokenType::Asterisk => write!(f, "*"),
ScriptTokenType::Bang => write!(f, "!"),
ScriptTokenType::BangEqual => write!(f, "!="),
ScriptTokenType::Equal => write!(f, "="),
ScriptTokenType::EqualEqual => write!(f, "=="),
ScriptTokenType::Greater => write!(f, ">"),
ScriptTokenType::GreaterEqual => write!(f, ">="),
ScriptTokenType::Less => write!(f, "<"),
ScriptTokenType::LessEqual => write!(f, "<="),
ScriptTokenType::DoublePipe => write!(f, "||"),
ScriptTokenType::DoubleAmpersand => write!(f, "&&"),
ScriptTokenType::Modulo => write!(f, "%"),
ScriptTokenType::Caret => write!(f, "^"),
ScriptTokenType::Identifier(value) => write!(f, "{}", value),
ScriptTokenType::String(value) => write!(f, "{}", value),
ScriptTokenType::OwnedString(value) => write!(f, "{}", value),
ScriptTokenType::Integer(value) => write!(f, "{}", value),
ScriptTokenType::Float(value) => write!(f, "{}", value),
ScriptTokenType::Boolean(value) => write!(f, "{}", value),
ScriptTokenType::Class => write!(f, "struct"),
ScriptTokenType::Else => write!(f, "else"),
ScriptTokenType::Function => write!(f, "fn"),
ScriptTokenType::For => write!(f, "for"),
ScriptTokenType::If => write!(f, "if"),
ScriptTokenType::Null => write!(f, "null"),
ScriptTokenType::Print => write!(f, "print"),
ScriptTokenType::Return => write!(f, "return"),
ScriptTokenType::Super => write!(f, "super"),
ScriptTokenType::This => write!(f, "this"),
ScriptTokenType::Let => write!(f, "let"),
ScriptTokenType::While => write!(f, "while"),
ScriptTokenType::Export => write!(f, "export"),
ScriptTokenType::Import => write!(f, "import"),
ScriptTokenType::Alias => write!(f, "as"),
ScriptTokenType::From => write!(f, "from"),
ScriptTokenType::Eof => write!(f, ""),
}
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub struct TokenFromStringError<'a> {
source: &'a str,
}
impl<'a> Display for TokenFromStringError<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "Failed to parse into token; value {}", self.source)
}
}
impl<'a> Error for TokenFromStringError<'a> {}
impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> {
type Error = TokenFromStringError<'a>;
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
match value {
"(" => Ok(ScriptTokenType::LeftParen),
")" => Ok(ScriptTokenType::RightParen),
"{" => Ok(ScriptTokenType::LeftBrace),
"}" => Ok(ScriptTokenType::RightBrace),
"," => Ok(ScriptTokenType::Comma),
"." => Ok(ScriptTokenType::Dot),
"-" => Ok(ScriptTokenType::Minus),
"+" => Ok(ScriptTokenType::Plus),
";" => Ok(ScriptTokenType::Semicolon),
"/" => Ok(ScriptTokenType::Slash),
"*" => Ok(ScriptTokenType::Asterisk),
"!" => Ok(ScriptTokenType::Bang),
"!=" => Ok(ScriptTokenType::BangEqual),
"=" => Ok(ScriptTokenType::Equal),
"==" => Ok(ScriptTokenType::EqualEqual),
">" => Ok(ScriptTokenType::Greater),
">=" => Ok(ScriptTokenType::GreaterEqual),
"<" => Ok(ScriptTokenType::Less),
"<=" => Ok(ScriptTokenType::LessEqual),
"||" => Ok(ScriptTokenType::DoublePipe),
"&&" => Ok(ScriptTokenType::DoubleAmpersand),
"%" => Ok(ScriptTokenType::Modulo),
"^" => Ok(ScriptTokenType::Caret),
"struct" => Ok(ScriptTokenType::Class),
"else" => Ok(ScriptTokenType::Else),
"fn" => Ok(ScriptTokenType::Function),
"for" => Ok(ScriptTokenType::For),
"if" => Ok(ScriptTokenType::If),
"null" => Ok(ScriptTokenType::Null),
"print" => Ok(ScriptTokenType::Print),
"return" => Ok(ScriptTokenType::Return),
"super" => Ok(ScriptTokenType::Super),
"this" => Ok(ScriptTokenType::This),
"let" => Ok(ScriptTokenType::Let),
"while" => Ok(ScriptTokenType::While),
"export" => Ok(ScriptTokenType::Export),
"import" => Ok(ScriptTokenType::Import),
"as" => Ok(ScriptTokenType::Alias),
"from" => Ok(ScriptTokenType::From),
_ => Err(TokenFromStringError { source: value }),
}
}
}
#[derive(Clone)]
pub struct ScriptToken<'a> {
pub position: Span<'a>,
pub token_type: ScriptTokenType<'a>,
}
impl<'a> Display for ScriptToken<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.token_type)
}
}
impl<'a> Debug for ScriptToken<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"[{}:{}] {:?}",
self.position.location_line(),
self.position.get_column(),
self.token_type
)
}
}
#[cfg(test)]
mod token_tests {
use crate::parser::ScriptTokenType;
#[test]
fn match_type_from_string() {
assert_eq!(
ScriptTokenType::try_from("("),
Ok(ScriptTokenType::LeftParen)
);
assert_eq!(
ScriptTokenType::try_from(")"),
Ok(ScriptTokenType::RightParen)
);
assert_eq!(
ScriptTokenType::try_from("{"),
Ok(ScriptTokenType::LeftBrace)
);
assert_eq!(
ScriptTokenType::try_from("}"),
Ok(ScriptTokenType::RightBrace)
);
assert_eq!(ScriptTokenType::try_from(","), Ok(ScriptTokenType::Comma));
assert_eq!(ScriptTokenType::try_from("."), Ok(ScriptTokenType::Dot));
assert_eq!(ScriptTokenType::try_from("-"), Ok(ScriptTokenType::Minus));
assert_eq!(ScriptTokenType::try_from("+"), Ok(ScriptTokenType::Plus));
assert_eq!(
ScriptTokenType::try_from(";"),
Ok(ScriptTokenType::Semicolon)
);
assert_eq!(ScriptTokenType::try_from("/"), Ok(ScriptTokenType::Slash));
assert_eq!(
ScriptTokenType::try_from("*"),
Ok(ScriptTokenType::Asterisk)
);
assert_eq!(ScriptTokenType::try_from("!"), Ok(ScriptTokenType::Bang));
assert_eq!(
ScriptTokenType::try_from("!="),
Ok(ScriptTokenType::BangEqual)
);
assert_eq!(ScriptTokenType::try_from("="), Ok(ScriptTokenType::Equal));
assert_eq!(
ScriptTokenType::try_from("=="),
Ok(ScriptTokenType::EqualEqual)
);
assert_eq!(ScriptTokenType::try_from(">"), Ok(ScriptTokenType::Greater));
assert_eq!(
ScriptTokenType::try_from(">="),
Ok(ScriptTokenType::GreaterEqual)
);
assert_eq!(ScriptTokenType::try_from("<"), Ok(ScriptTokenType::Less));
assert_eq!(
ScriptTokenType::try_from("<="),
Ok(ScriptTokenType::LessEqual)
);
assert_eq!(
ScriptTokenType::try_from("||"),
Ok(ScriptTokenType::DoublePipe)
);
assert_eq!(
ScriptTokenType::try_from("&&"),
Ok(ScriptTokenType::DoubleAmpersand)
);
assert_eq!(ScriptTokenType::try_from("%"), Ok(ScriptTokenType::Modulo));
assert_eq!(ScriptTokenType::try_from("^"), Ok(ScriptTokenType::Caret));
assert_eq!(
ScriptTokenType::try_from("struct"),
Ok(ScriptTokenType::Class)
);
assert_eq!(ScriptTokenType::try_from("else"), Ok(ScriptTokenType::Else));
assert_eq!(
ScriptTokenType::try_from("fn"),
Ok(ScriptTokenType::Function)
);
assert_eq!(ScriptTokenType::try_from("for"), Ok(ScriptTokenType::For));
assert_eq!(ScriptTokenType::try_from("if"), Ok(ScriptTokenType::If));
assert_eq!(ScriptTokenType::try_from("null"), Ok(ScriptTokenType::Null));
assert_eq!(
ScriptTokenType::try_from("print"),
Ok(ScriptTokenType::Print)
);
assert_eq!(
ScriptTokenType::try_from("return"),
Ok(ScriptTokenType::Return)
);
assert_eq!(
ScriptTokenType::try_from("super"),
Ok(ScriptTokenType::Super)
);
assert_eq!(ScriptTokenType::try_from("this"), Ok(ScriptTokenType::This));
assert_eq!(ScriptTokenType::try_from("let"), Ok(ScriptTokenType::Let));
assert_eq!(
ScriptTokenType::try_from("while"),
Ok(ScriptTokenType::While)
);
assert_eq!(
ScriptTokenType::try_from("export"),
Ok(ScriptTokenType::Export)
);
assert_eq!(
ScriptTokenType::try_from("import"),
Ok(ScriptTokenType::Import)
);
assert_eq!(ScriptTokenType::try_from("as"), Ok(ScriptTokenType::Alias));
assert_eq!(ScriptTokenType::try_from("from"), Ok(ScriptTokenType::From));
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment