From 4ffd33fc04f23140022d05f8940013d4ceca72ca Mon Sep 17 00:00:00 2001 From: Louis Capitanchik <contact@louiscap.co> Date: Mon, 15 May 2023 02:44:27 +0100 Subject: [PATCH] Split into multiple crates --- .idea/micro_script.iml | 3 + Cargo.lock | 165 +++++++- Cargo.toml | 27 +- Makefile | 12 + forge-script-lang/Cargo.toml | 22 ++ forge-script-lang/src/error/mod.rs | 144 +++++++ forge-script-lang/src/lexer/atoms.rs | 35 ++ forge-script-lang/src/lexer/keywords.rs | 196 ++++++++++ forge-script-lang/src/lexer/mod.rs | 110 ++++++ forge-script-lang/src/lexer/operators.rs | 326 +++++++++++++++ forge-script-lang/src/lexer/primitives.rs | 154 ++++++++ forge-script-lang/src/lexer/strings.rs | 188 +++++++++ forge-script-lang/src/lexer/tokens.rs | 370 ++++++++++++++++++ forge-script-lang/src/lib.rs | 46 +++ forge-script-lang/src/parser/ast.rs | 332 ++++++++++++++++ forge-script-lang/src/parser/atoms.rs | 58 +++ forge-script-lang/src/parser/grammar.rs | 100 +++++ forge-script-lang/src/parser/mod.rs | 53 +++ forge-script-lang/src/runtime/executor/mod.rs | 17 + .../src/runtime/executor/printer.rs | 191 +++++++++ forge-script-lang/src/runtime/mod.rs | 3 + forge-script-lang/src/runtime/numbers.rs | 293 ++++++++++++++ forge-script-lang/src/runtime/value.rs | 142 +++++++ forge-script-web/Cargo.toml | 19 + forge-script-web/src/lib.rs | 21 + forge-script/Cargo.toml | 12 + forge-script/src/main.rs | 3 + src/main-web.rs | 0 src/parser/grammar.rs | 144 ------- 29 files changed, 3020 insertions(+), 166 deletions(-) create mode 100644 Makefile create mode 100644 forge-script-lang/Cargo.toml create mode 100644 forge-script-lang/src/error/mod.rs create mode 100644 forge-script-lang/src/lexer/atoms.rs create mode 100644 forge-script-lang/src/lexer/keywords.rs create mode 100644 forge-script-lang/src/lexer/mod.rs create mode 100644 forge-script-lang/src/lexer/operators.rs create mode 100644 forge-script-lang/src/lexer/primitives.rs create mode 100644 forge-script-lang/src/lexer/strings.rs create mode 100644 forge-script-lang/src/lexer/tokens.rs create mode 100644 forge-script-lang/src/lib.rs create mode 100644 forge-script-lang/src/parser/ast.rs create mode 100644 forge-script-lang/src/parser/atoms.rs create mode 100644 forge-script-lang/src/parser/grammar.rs create mode 100644 forge-script-lang/src/parser/mod.rs create mode 100644 forge-script-lang/src/runtime/executor/mod.rs create mode 100644 forge-script-lang/src/runtime/executor/printer.rs create mode 100644 forge-script-lang/src/runtime/mod.rs create mode 100644 forge-script-lang/src/runtime/numbers.rs create mode 100644 forge-script-lang/src/runtime/value.rs create mode 100644 forge-script-web/Cargo.toml create mode 100644 forge-script-web/src/lib.rs create mode 100644 forge-script/Cargo.toml create mode 100644 forge-script/src/main.rs create mode 100644 src/main-web.rs diff --git a/.idea/micro_script.iml b/.idea/micro_script.iml index 9b4cf84..5fd23c3 100644 --- a/.idea/micro_script.iml +++ b/.idea/micro_script.iml @@ -3,6 +3,9 @@ <component name="NewModuleRootManager" inherit-compiler-output="true"> <exclude-output /> <content url="file://$MODULE_DIR$"> + <sourceFolder url="file://$MODULE_DIR$/forge-script-lang/src" isTestSource="false" /> + <sourceFolder url="file://$MODULE_DIR$/forge-script-web/src" isTestSource="false" /> + <sourceFolder url="file://$MODULE_DIR$/forge-script/src" isTestSource="false" /> <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" /> <excludeFolder url="file://$MODULE_DIR$/target" /> </content> diff --git a/Cargo.lock b/Cargo.lock index 9a6a97d..7203c79 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "bumpalo" +version = "3.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c6ed94e98ecff0c12dd1b04c15ec0d7d9458ca8fe806cea6f12954efe74c63b" + [[package]] name = "bytecount" version = "0.6.3" @@ -9,20 +15,69 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] -name = "memchr" -version = "2.5.0" +name = "cfg-if" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] -name = "micro_script" +name = "console_error_panic_hook" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06aeb73f470f66dcdbf7223caeebb85984942f22f1adb2a088cf9668146bbbc" +dependencies = [ + "cfg-if", + "wasm-bindgen", +] + +[[package]] +name = "forge-script" +version = "0.1.0" + +[[package]] +name = "forge-script-lang" version = "0.1.0" dependencies = [ "nom", "nom_locate", "peg", + "serde", +] + +[[package]] +name = "forge-script-web" +version = "0.1.0" +dependencies = [ + "console_error_panic_hook", + "forge-script-lang", + "serde-wasm-bindgen", + "wasm-bindgen", +] + +[[package]] +name = "js-sys" +version = "0.3.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68c16e1bfd491478ab155fd8b4896b86f9ede344949b641e61501e07c2b8b4d5" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", ] +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -50,6 +105,12 @@ dependencies = [ "nom", ] +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" + [[package]] name = "peg" version = "0.8.1" @@ -95,8 +156,104 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "serde" +version = "1.0.163" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde-wasm-bindgen" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" +dependencies = [ + "js-sys", + "serde", + "wasm-bindgen", +] + +[[package]] +name = "serde_derive" +version = "1.0.163" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "unicode-ident" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "wasm-bindgen" +version = "0.2.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b6cb788c4e39112fbe1822277ef6fb3c55cd86b95cb3d3c4c1c9597e4ac74b4" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e522ed4105a9d626d885b35d62501b30d9666283a5c8be12c14a8bdafe7822" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "358a79a0cb89d21db8120cbfb91392335913e4890665b1a7981d9e956903b434" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4783ce29f09b9d93134d41297aded3a712b7b979e9c6f28c32cb88c973a94869" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a901d592cafaa4d711bc324edfaff879ac700b19c3dfd60058d2b445be2691eb" diff --git a/Cargo.toml b/Cargo.toml index 5ba3840..24c1d48 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,23 +1,14 @@ -[package] -name = "micro_script" -version = "0.1.0" -edition = "2021" +[workspace] +members = [ + "forge-script-lang", + "forge-script-web", + "forge-script" +] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[profile.release] +lto = true -[features] -default = [] -debug-ast = [] - -[[bin]] -name = "micro_script" -path = "src/main.rs" - -[lib] -name = "micro_script" -path = "src/lib.rs" - -[dependencies] +[workspace.dependencies] nom = "7.1.3" nom_locate = "4.1.0" peg = "0.8.1" diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..8d2ab78 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +build: + cargo build --release + +test: + cargo test --features debug-ast + +web: + rm -rf forge-script-web/out + mkdir -p forge-script-web/out + wasm-pack build forge-script-web --release --target web --out-dir out/pkg-web + wasm-pack build forge-script-web --release --target nodejs --out-dir out/pkg-npm + wasm-pack build forge-script-web --release --target bundler --out-dir out/pkg-bundle \ No newline at end of file diff --git a/forge-script-lang/Cargo.toml b/forge-script-lang/Cargo.toml new file mode 100644 index 0000000..13235fc --- /dev/null +++ b/forge-script-lang/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "forge-script-lang" +version = "0.1.0" +edition = "2021" + +license = "Apache-2.0" +authors = [ + "Louis Capitanchik <louis@microhacks.co.uk>" +] +repository = "https://lab.lcr.gr/microhacks/forge-script.git" + +[features] +default = [] +debug-ast = [] +serde = ["dep:serde"] +verbose-serde = [] + +[dependencies] +nom.workspace = true +nom_locate.workspace = true +peg.workspace = true +serde = { version = "1.0", optional = true, features = ["derive"] } \ No newline at end of file diff --git a/forge-script-lang/src/error/mod.rs b/forge-script-lang/src/error/mod.rs new file mode 100644 index 0000000..46b8e80 --- /dev/null +++ b/forge-script-lang/src/error/mod.rs @@ -0,0 +1,144 @@ +use crate::lexer::Span; +use crate::parse::ScriptToken; +use std::error::Error; +use std::fmt::{Display, Formatter}; + +#[derive(Debug)] +pub enum TokenErrorKind<'a> { + Incomplete, + NomError(nom::error::Error<Span<'a>>), +} + +#[derive(Debug)] +pub struct TokenError<'a> { + pub kind: TokenErrorKind<'a>, +} + +impl<'a> From<TokenErrorKind<'a>> for TokenError<'a> { + fn from(value: TokenErrorKind<'a>) -> Self { + TokenError { kind: value } + } +} + +impl<'a> Display for TokenError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.kind { + TokenErrorKind::Incomplete => write!(f, "Incomplete Program"), + TokenErrorKind::NomError(err) => write!(f, "{}", err), + } + } +} +impl<'a> Error for TokenError<'a> {} +impl<'a> From<nom::Err<nom::error::Error<Span<'a>>>> for TokenError<'a> { + fn from(value: nom::Err<nom::error::Error<Span<'a>>>) -> Self { + match value { + nom::Err::Error(err) => TokenErrorKind::NomError(err).into(), + nom::Err::Failure(err) => TokenErrorKind::NomError(err).into(), + nom::Err::Incomplete(_) => TokenErrorKind::Incomplete.into(), + } + } +} + +#[derive(Clone, Debug)] +pub enum ParseErrorKind<'a> { + Unexpected { + found: ScriptToken<'a>, + expected: peg::error::ExpectedSet, + }, +} + +#[derive(Clone, Debug)] +pub struct ParseError<'a> { + pub kind: ParseErrorKind<'a>, +} + +#[derive(Debug)] +pub enum ForgeErrorKind<'a> { + IncompleteInput, + LexerError(nom::error::Error<Span<'a>>), + UnexpectedToken { + found: ScriptToken<'a>, + expected: peg::error::ExpectedSet, + }, +} + +#[derive(Debug)] +pub struct ForgeError<'a> { + pub kind: ForgeErrorKind<'a>, +} + +impl<'a> From<ParseError<'a>> for ForgeError<'a> { + fn from(value: ParseError<'a>) -> Self { + match value.kind { + ParseErrorKind::Unexpected { found, expected } => ForgeError { + kind: ForgeErrorKind::UnexpectedToken { found, expected }, + }, + } + } +} + +impl<'a> From<TokenError<'a>> for ForgeError<'a> { + fn from(value: TokenError<'a>) -> Self { + match value.kind { + TokenErrorKind::Incomplete => ForgeError { + kind: ForgeErrorKind::IncompleteInput, + }, + TokenErrorKind::NomError(span) => ForgeError { + kind: ForgeErrorKind::LexerError(span), + }, + } + } +} + +pub type ForgeResult<'a, T> = Result<T, ForgeError<'a>>; + +pub fn print_unexpected_token<'a>( + source: &'a str, + token: &'a ScriptToken<'a>, + expected: &'a peg::error::ExpectedSet, +) { + let line = token.position.location_line() as usize; + let column = token.position.get_column(); + + let previous_line = if line > 1 { + source.lines().nth(line - 2) + } else { + None + }; + let source_line = source.lines().nth(line - 1).expect("Missing line"); + let next_line = source.lines().nth(line); + + let largest_line_num = line.max(line.saturating_sub(1)).max(line.saturating_add(1)); + let number_length = format!("{}", largest_line_num).len(); + + eprintln!("| Script error on line {} at \"{}\"\n|", line, token); + if let Some(prev) = previous_line { + eprintln!("| [{:>width$}] {}", line - 1, prev, width = number_length); + } + eprintln!( + "| [{:>width$}] {}", + line, + source_line, + width = number_length + ); + eprintln!( + "| {} {}{}", + vec![" "; number_length + 2].join(""), + vec![" "; column - 1].join(""), + vec!["^"; token.token_type.len()].join(""), + ); + if let Some(next) = next_line { + eprintln!("| [{:>width$}] {}", line + 1, next, width = number_length); + } + eprintln!("|\n| Failed To Parse: expected {}", expected); +} + +pub fn print_forge_error<'a>(source: &'a str, fe: &'a ForgeError) { + match &fe.kind { + ForgeErrorKind::IncompleteInput => eprintln!("| Unexpected end of file"), + ForgeErrorKind::LexerError(err) => eprintln!("| {}", err), + ForgeErrorKind::UnexpectedToken { found, expected } => { + print_unexpected_token(source, found, expected) + } + } +} diff --git a/forge-script-lang/src/lexer/atoms.rs b/forge-script-lang/src/lexer/atoms.rs new file mode 100644 index 0000000..99c907b --- /dev/null +++ b/forge-script-lang/src/lexer/atoms.rs @@ -0,0 +1,35 @@ +use nom::bytes::complete::tag; +use nom::character::complete::{char, one_of}; +use nom::combinator::value; +use nom::multi::{many0, many1}; +use nom::sequence::terminated; +use nom::IResult; +use nom_locate::LocatedSpan; + +pub type Span<'a> = LocatedSpan<&'a str>; +pub type OwnedSpan<'a> = LocatedSpan<String>; + +pub fn raw_true(input: Span) -> IResult<Span, bool> { + value(true, tag("true"))(input) +} + +pub fn raw_false(input: Span) -> IResult<Span, bool> { + value(false, tag("false"))(input) +} + +pub fn raw_decimal(input: Span) -> IResult<Span, OwnedSpan> { + let input_offset = input.location_offset(); + let input_line = input.location_line(); + + let (input, list) = many1(terminated(one_of("0123456789"), many0(char('_'))))(input)?; + let string = list.iter().fold(String::with_capacity(list.len()), |a, b| { + format!("{}{}", a, b) + }); + + unsafe { + Ok(( + input, + OwnedSpan::new_from_raw_offset(input_offset, input_line, string, ()), + )) + } +} diff --git a/forge-script-lang/src/lexer/keywords.rs b/forge-script-lang/src/lexer/keywords.rs new file mode 100644 index 0000000..2da25a9 --- /dev/null +++ b/forge-script-lang/src/lexer/keywords.rs @@ -0,0 +1,196 @@ +use crate::lexer::{ScriptToken, ScriptTokenType, Span}; +use nom::bytes::complete::tag; +use nom::IResult; +use nom_locate::position; + +pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("struct")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Class, + }, + )) +} + +pub fn token_else(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("else")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Else, + }, + )) +} + +pub fn token_function(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("fn")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Function, + }, + )) +} + +pub fn token_for(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("for")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::For, + }, + )) +} + +pub fn token_if(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("if")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::If, + }, + )) +} + +pub fn token_null(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("null")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Null, + }, + )) +} + +pub fn token_print(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("print")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Print, + }, + )) +} + +pub fn token_return(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("return")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Return, + }, + )) +} + +pub fn token_super(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("super")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Super, + }, + )) +} + +pub fn token_this(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("this")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::This, + }, + )) +} + +pub fn token_let(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("let")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Let, + }, + )) +} + +pub fn token_while(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("while")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::While, + }, + )) +} + +pub fn token_export(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("export")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Export, + }, + )) +} + +pub fn token_import(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("import")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Import, + }, + )) +} + +pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("as")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Alias, + }, + )) +} + +pub fn token_from(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("from")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::From, + }, + )) +} diff --git a/forge-script-lang/src/lexer/mod.rs b/forge-script-lang/src/lexer/mod.rs new file mode 100644 index 0000000..1ba8950 --- /dev/null +++ b/forge-script-lang/src/lexer/mod.rs @@ -0,0 +1,110 @@ +mod atoms; +mod keywords; +mod operators; +mod primitives; +mod strings; +mod tokens; + +use keywords::{ + token_alias, token_else, token_export, token_for, token_function, token_if, token_import, + token_let, token_null, token_print, token_return, token_struct, token_super, token_this, + token_while, +}; +use operators::{ + token_asterisk, token_bang, token_bang_equal, token_caret, token_comma, token_dot, + token_double_ampersand, token_double_pipe, token_equal, token_equal_equal, token_greater, + token_greater_equal, token_left_brace, token_left_paren, token_less, token_less_equal, + token_minus, token_modulo, token_plus, token_right_brace, token_right_paren, token_semicolon, + token_slash, +}; +use primitives::{token_boolean, token_float, token_ident, token_int}; +use strings::token_string; +pub use tokens::{ScriptToken, ScriptTokenType}; + +use crate::error::TokenError; +pub(crate) use atoms::{raw_decimal, raw_false, raw_true, OwnedSpan, Span}; + +mod _lex { + use super::*; + use nom::branch::alt; + use nom::IResult; + + use nom::character::complete::multispace0; + use nom::multi::fold_many0; + use nom::sequence::delimited; + + fn any_token(input: Span) -> IResult<Span, ScriptToken> { + alt(( + alt(( + token_if, + token_function, + token_alias, + token_for, + token_let, + token_else, + token_this, + token_null, + token_while, + token_return, + token_print, + token_export, + token_import, + token_struct, + token_super, + )), + alt(( + token_plus, + token_minus, + token_asterisk, + token_slash, + token_bang, + token_comma, + token_dot, + token_caret, + token_modulo, + token_left_brace, + token_left_paren, + token_right_brace, + token_right_paren, + token_double_ampersand, + token_double_pipe, + token_semicolon, + )), + alt(( + token_less, + token_greater, + token_equal, + token_equal_equal, + token_bang_equal, + token_less_equal, + token_greater_equal, + )), + alt(( + token_float, + token_int, + token_boolean, + token_string, + token_ident, + )), + ))(input) + } + + pub fn script_to_tokens<'a, 'b: 'a>( + script: &'b str, + ) -> Result<Vec<ScriptToken<'a>>, TokenError<'a>> { + let script_span = Span::new(script); + let (_, tokens) = fold_many0( + delimited(multispace0, any_token, multispace0), + Vec::new, + |mut list, tok| { + list.push(tok); + list + }, + )(script_span)?; + + Ok(tokens) + } +} + +use crate::parser::TokenSlice; +pub use _lex::script_to_tokens; diff --git a/forge-script-lang/src/lexer/operators.rs b/forge-script-lang/src/lexer/operators.rs new file mode 100644 index 0000000..edcc907 --- /dev/null +++ b/forge-script-lang/src/lexer/operators.rs @@ -0,0 +1,326 @@ +use crate::lexer::Span; +use crate::lexer::{ScriptToken, ScriptTokenType}; +use nom::bytes::complete::tag; +use nom::IResult; +use nom_locate::position; + +pub fn token_left_paren(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("(")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LeftParen, + }, + )) +} + +pub fn token_right_paren(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(")")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::RightParen, + }, + )) +} + +pub fn token_left_brace(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("{")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LeftBrace, + }, + )) +} + +pub fn token_right_brace(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("}")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::RightBrace, + }, + )) +} + +pub fn token_comma(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(",")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Comma, + }, + )) +} + +pub fn token_dot(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(".")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Dot, + }, + )) +} + +pub fn token_minus(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("-")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Minus, + }, + )) +} + +pub fn token_plus(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("+")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Plus, + }, + )) +} + +pub fn token_semicolon(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(";")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Semicolon, + }, + )) +} + +pub fn token_slash(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("/")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Slash, + }, + )) +} + +pub fn token_asterisk(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("*")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Asterisk, + }, + )) +} + +pub fn token_bang(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("!")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Bang, + }, + )) +} + +pub fn token_bang_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("!=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::BangEqual, + }, + )) +} + +pub fn token_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Equal, + }, + )) +} + +pub fn token_equal_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("==")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::EqualEqual, + }, + )) +} + +pub fn token_greater(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(">")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Greater, + }, + )) +} + +pub fn token_greater_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(">=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::GreaterEqual, + }, + )) +} + +pub fn token_less(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("<")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Less, + }, + )) +} + +pub fn token_less_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("<=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LessEqual, + }, + )) +} + +pub fn token_double_pipe(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("||")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::DoublePipe, + }, + )) +} + +pub fn token_double_ampersand(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("&&")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::DoubleAmpersand, + }, + )) +} + +pub fn token_modulo(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("%")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Modulo, + }, + )) +} + +pub fn token_caret(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("^")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Caret, + }, + )) +} + +#[cfg(test)] +mod operator_checks { + use super::*; + use crate::lexer::Span; + + fn s(st: &str) -> Span { + Span::new(st) + } + + #[test] + fn parse_brackets() { + assert_eq!( + token_left_brace(s("{")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::LeftBrace + ); + + assert_eq!( + token_right_brace(s("}")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::RightBrace + ); + + assert_eq!( + token_left_paren(s("(")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::LeftParen + ); + + assert_eq!( + token_right_paren(s(")")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::RightParen + ); + } +} diff --git a/forge-script-lang/src/lexer/primitives.rs b/forge-script-lang/src/lexer/primitives.rs new file mode 100644 index 0000000..fda23f5 --- /dev/null +++ b/forge-script-lang/src/lexer/primitives.rs @@ -0,0 +1,154 @@ +use crate::lexer::{raw_decimal, raw_false, raw_true, ScriptToken, ScriptTokenType, Span}; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::complete::{alpha1, alphanumeric1, one_of}; +use nom::combinator::{opt, recognize}; +use nom::multi::many0_count; +use nom::sequence::{pair, separated_pair}; +use nom::{error_position, IResult}; +use nom_locate::position; + +pub fn token_ident(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, value) = recognize(pair( + alt((alpha1, tag("_"))), + many0_count(alt((alphanumeric1, tag("_")))), + ))(input)?; + + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Identifier(value.fragment()), + }, + )) +} + +pub fn token_int(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, sign) = opt(one_of("+-"))(input)?; + let (input, value) = raw_decimal(input)?; + + format!("{}{}", sign.map(String::from).unwrap_or_default(), value) + .parse::<i64>() + .map(|value| { + ( + input, + ScriptToken { + token_type: ScriptTokenType::Integer(value), + position: pos, + }, + ) + }) + .map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit))) +} + +pub fn token_float(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, sign) = opt(one_of("+-"))(input)?; + let (input, (before, after)) = + separated_pair(opt(raw_decimal), tag("."), opt(raw_decimal))(input)?; + + let formatted_number = format!( + "{}{}.{}", + sign.map(String::from).unwrap_or_default(), + before + .map(|s| s.fragment().to_owned()) + .unwrap_or_else(|| String::from("0")), + after + .map(|s| s.fragment().to_owned()) + .unwrap_or_else(|| String::from("0")) + ); + + formatted_number + .parse::<f64>() + .map(|value| { + ( + input, + ScriptToken { + token_type: ScriptTokenType::Float(value), + position: pos, + }, + ) + }) + .map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit))) +} + +pub fn token_boolean(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, value) = alt((raw_true, raw_false))(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Boolean(value), + }, + )) +} + +#[cfg(test)] +mod parsing_tests { + use super::*; + use crate::map_result; + + #[test] + fn parse_integer() { + let positive_cases = [ + ("1234", 1234), + ("-1234", -1234), + ("0", 0), + ("1_000_000", 1000000), + ("-12_34", -1234), + ]; + + for (program, expected) in positive_cases { + map_result!(token_int(Span::new(program)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Integer(expected)) + }); + } + } + #[test] + fn parse_floats() { + let positive_cases = [ + ("12.34", 12.34), + ("-12.34", -12.34), + ("0.", 0.), + (".0", 0.), + (".0.", 0.), + (".0.1.2", 0.), + ("1_000_000.1_23", 1000000.123), + ("-12_34.0_0_0", -1234.0), + ]; + + for (program, expected) in positive_cases { + map_result!(token_float(Span::new(program)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Float(expected)) + }); + } + } + + #[test] + fn parse_bools() { + let positive_cases = [("true", true), ("false", false)]; + + for (program, expected) in positive_cases { + map_result!( + token_boolean(Span::new(program)), + |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Boolean(expected)) + } + ); + } + } + + #[test] + fn parse_identifier() { + let positive_cases = ["BarBaz", "Foo", "foo", "foasd123", "_adad"]; + + for expected in positive_cases { + map_result!(token_ident(Span::new(expected)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Identifier(expected)) + }); + } + } +} diff --git a/forge-script-lang/src/lexer/strings.rs b/forge-script-lang/src/lexer/strings.rs new file mode 100644 index 0000000..0b04579 --- /dev/null +++ b/forge-script-lang/src/lexer/strings.rs @@ -0,0 +1,188 @@ +use crate::lexer::{ScriptToken, ScriptTokenType, Span}; +use nom::branch::alt; +use nom::bytes::complete::{is_not, take_while_m_n}; +use nom::character::complete::{char as p_char, multispace1}; +use nom::combinator::{map, map_opt, map_res, value, verify}; +use nom::multi::fold_many1; +use nom::sequence::{delimited, preceded}; +use nom::IResult; +use nom_locate::position; + +/// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6 +/// hexadecimal numerals. We will combine this later with parse_escaped_char +/// to parse sequences like \u{00AC}. +fn parse_unicode(input: Span) -> IResult<Span, char> { + // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match + // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. + let parse_hex = take_while_m_n(1, 6, |ch: char| ch.is_ascii_hexdigit()); + + // `preceded` takes a prefix parser, and if it succeeds, returns the result + // of the body parser. In this case, it parses u{XXXX}. + let parse_delimited_hex = preceded( + p_char::<Span, nom::error::Error<Span>>('u'), + // `delimited` is like `preceded`, but it parses both a prefix and a suffix. + // It returns the result of the middle parser. In this case, it parses + // {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX + delimited(p_char('{'), parse_hex, p_char('}')), + ); + + // `map_res` takes the result of a parser and applies a function that returns + // a Result. In this case we take the hex bytes from parse_hex and attempt to + // convert them to a u32. + let parse_u32 = map_res(parse_delimited_hex, move |hex| { + u32::from_str_radix(hex.fragment(), 16) + }); + + // map_opt is like map_res, but it takes an Option instead of a Result. If + // the function returns None, map_opt returns an error. In this case, because + // not all u32 values are valid unicode code points, we have to fallibly + // convert to p_char with from_u32. + let (span, char) = map_opt(parse_u32, move |val| char::from_u32(val))(input)?; + Ok((span, char)) +} + +/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. +fn parse_escaped_char(input: Span) -> IResult<Span, char> { + preceded( + p_char('\\'), + // `alt` tries each parser in sequence, returning the result of + // the first successful match + alt(( + parse_unicode, + // The `value` parser returns a fixed value (the first argument) if its + // parser (the second argument) succeeds. In these cases, it looks for + // the marker characters (n, r, t, etc) and returns the matching + // character (\n, \r, \t, etc). + value('\n', p_char('n')), + value('\r', p_char('r')), + value('\t', p_char('t')), + value('\u{08}', p_char('b')), + value('\u{0C}', p_char('f')), + value('\\', p_char('\\')), + value('/', p_char('/')), + value('"', p_char('"')), + )), + )(input) +} + +/// Parse a backslash, followed by any amount of whitespace. This is used later +/// to discard any escaped whitespace. +fn parse_escaped_whitespace(input: Span) -> IResult<Span, Span> { + preceded(p_char('\\'), multispace1)(input) +} + +/// Parse a non-empty block of text that doesn't include \ or " +fn parse_literal(input: Span) -> IResult<Span, Span> { + // `is_not` parses a string of 0 or more characters that aren't one of the + // given characters. + let not_quote_slash = is_not("\"\\"); + + // `verify` runs a parser, then runs a verification function on the output of + // the parser. The verification function accepts out output only if it + // returns true. In this case, we want to ensure that the output of is_not + // is non-empty. + verify(not_quote_slash, |s: &Span| !s.fragment().is_empty())(input) +} + +/// A string fragment contains a fragment of a string being parsed: either +/// a non-empty Literal (a series of non-escaped characters), a single +/// parsed escaped character, or a block of escaped whitespace. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum StringFragment<'a> { + Literal(&'a str), + EscapedChar(char), + EscapedWS, +} + +/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char +/// into a StringFragment. +fn parse_fragment(input: Span) -> IResult<Span, StringFragment> { + alt(( + map(parse_literal, |sp: Span| { + StringFragment::Literal(sp.fragment()) + }), + map(parse_escaped_char, StringFragment::EscapedChar), + value(StringFragment::EscapedWS, parse_escaped_whitespace), + ))(input) +} + +/// Parse a string. Use a loop of parse_fragment and push all of the fragments +/// into an output string. +pub fn token_string(input: Span) -> IResult<Span, ScriptToken> { + // fold is the equivalent of iterator::fold. It runs a parser in a loop, + // and for each output value, calls a folding function on each output value. + let build_string = fold_many1( + // Our parser function– parses a single string fragment + parse_fragment, + // Our init value, an empty string + String::new, + // Our folding function. For each fragment, append the fragment to the + // string. + |mut string, fragment| { + match fragment { + StringFragment::Literal(s) => string.push_str(s), + StringFragment::EscapedChar(c) => string.push(c), + StringFragment::EscapedWS => {} + } + string + }, + ); + + let (input, pos) = position(input)?; + let (input, value) = delimited(p_char('"'), build_string, p_char('"'))(input)?; + + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::OwnedString(value), + }, + )) +} + +#[cfg(test)] +mod string_test { + use super::token_string; + use crate::lexer::Span; + use crate::lexer::{ScriptToken, ScriptTokenType}; + use crate::map_result; + + #[test] + fn parse_escaped_string() { + let positive_cases = [ + ( + r#""This is an escaped String""#, + String::from("This is an escaped String"), + ), + ( + r#""This is an \"escaped\" String""#, + String::from("This is an \"escaped\" String"), + ), + ( + r#""Many whitespaces can be collapsed with a slash \ + and they won't matter""#, + String::from( + "Many whitespaces can be collapsed with a slash and they won't matter", + ), + ), + ( + r#""Big whitespace preserved can be collapsed with a slash + and they won't matter""#, + String::from( + r#"Big whitespace preserved can be collapsed with a slash + and they won't matter"#, + ), + ), + ]; + + for (program, expected) in positive_cases { + map_result!( + token_string(Span::new(program)), + |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::OwnedString(expected)) + }, + program + ); + } + } +} diff --git a/forge-script-lang/src/lexer/tokens.rs b/forge-script-lang/src/lexer/tokens.rs new file mode 100644 index 0000000..33031ae --- /dev/null +++ b/forge-script-lang/src/lexer/tokens.rs @@ -0,0 +1,370 @@ +use crate::lexer::Span; +use std::error::Error; +use std::fmt::{format, Debug, Display, Formatter}; + +#[derive(PartialEq, Clone, Debug)] +pub enum ScriptTokenType<'a> { + // Structural Tokens + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Semicolon, + + // Unary Operators + Bang, + Minus, + + // Binary Operators + Asterisk, + Slash, + Plus, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + DoublePipe, + DoubleAmpersand, + Modulo, + Caret, + + // Literals + Identifier(&'a str), + String(&'a str), + OwnedString(String), + Integer(i64), + Float(f64), + Boolean(bool), + + // Keywords + Class, + Else, + Function, + For, + If, + Null, + Print, + Return, + Super, + This, + Let, + While, + Export, + Import, + Alias, + From, + + // Misc + Eof, +} + +impl<'a> ScriptTokenType<'a> { + pub fn len(&self) -> usize { + match self { + ScriptTokenType::LeftParen => 1, + ScriptTokenType::RightParen => 1, + ScriptTokenType::LeftBrace => 2, + ScriptTokenType::RightBrace => 2, + ScriptTokenType::Comma => 1, + ScriptTokenType::Dot => 1, + ScriptTokenType::Minus => 1, + ScriptTokenType::Plus => 1, + ScriptTokenType::Semicolon => 1, + ScriptTokenType::Slash => 1, + ScriptTokenType::Asterisk => 1, + ScriptTokenType::Bang => 1, + ScriptTokenType::BangEqual => 2, + ScriptTokenType::Equal => 1, + ScriptTokenType::EqualEqual => 2, + ScriptTokenType::Greater => 1, + ScriptTokenType::GreaterEqual => 2, + ScriptTokenType::Less => 1, + ScriptTokenType::LessEqual => 2, + ScriptTokenType::DoublePipe => 2, + ScriptTokenType::DoubleAmpersand => 2, + ScriptTokenType::Modulo => 1, + ScriptTokenType::Caret => 1, + ScriptTokenType::Identifier(value) => value.len(), + ScriptTokenType::String(value) => value.len() + 2, + ScriptTokenType::OwnedString(value) => value.len() + 2, + ScriptTokenType::Integer(value) => format!("{}", value).len(), + ScriptTokenType::Float(value) => format!("{}", value).len(), + ScriptTokenType::Boolean(value) => { + if *value { + 4 + } else { + 5 + } + } + ScriptTokenType::Class => 6, + ScriptTokenType::Else => 4, + ScriptTokenType::Function => 2, + ScriptTokenType::For => 3, + ScriptTokenType::If => 2, + ScriptTokenType::Null => 4, + ScriptTokenType::Print => 5, + ScriptTokenType::Return => 6, + ScriptTokenType::Super => 5, + ScriptTokenType::This => 4, + ScriptTokenType::Let => 3, + ScriptTokenType::While => 5, + ScriptTokenType::Export => 6, + ScriptTokenType::Import => 6, + ScriptTokenType::Alias => 2, + ScriptTokenType::From => 4, + ScriptTokenType::Eof => 0, + } + } +} + +impl<'a> Display for ScriptTokenType<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ScriptTokenType::LeftParen => write!(f, "("), + ScriptTokenType::RightParen => write!(f, ")"), + ScriptTokenType::LeftBrace => write!(f, "{{"), + ScriptTokenType::RightBrace => write!(f, "}}"), + ScriptTokenType::Comma => write!(f, ","), + ScriptTokenType::Dot => write!(f, "."), + ScriptTokenType::Minus => write!(f, "-"), + ScriptTokenType::Plus => write!(f, "+"), + ScriptTokenType::Semicolon => write!(f, ";"), + ScriptTokenType::Slash => write!(f, "/"), + ScriptTokenType::Asterisk => write!(f, "*"), + ScriptTokenType::Bang => write!(f, "!"), + ScriptTokenType::BangEqual => write!(f, "!="), + ScriptTokenType::Equal => write!(f, "="), + ScriptTokenType::EqualEqual => write!(f, "=="), + ScriptTokenType::Greater => write!(f, ">"), + ScriptTokenType::GreaterEqual => write!(f, ">="), + ScriptTokenType::Less => write!(f, "<"), + ScriptTokenType::LessEqual => write!(f, "<="), + ScriptTokenType::DoublePipe => write!(f, "||"), + ScriptTokenType::DoubleAmpersand => write!(f, "&&"), + ScriptTokenType::Modulo => write!(f, "%"), + ScriptTokenType::Caret => write!(f, "^"), + ScriptTokenType::Identifier(value) => write!(f, "{}", value), + ScriptTokenType::String(value) => write!(f, "{}", value), + ScriptTokenType::OwnedString(value) => write!(f, "{}", value), + ScriptTokenType::Integer(value) => write!(f, "{}", value), + ScriptTokenType::Float(value) => write!(f, "{}", value), + ScriptTokenType::Boolean(value) => write!(f, "{}", value), + ScriptTokenType::Class => write!(f, "struct"), + ScriptTokenType::Else => write!(f, "else"), + ScriptTokenType::Function => write!(f, "fn"), + ScriptTokenType::For => write!(f, "for"), + ScriptTokenType::If => write!(f, "if"), + ScriptTokenType::Null => write!(f, "null"), + ScriptTokenType::Print => write!(f, "print"), + ScriptTokenType::Return => write!(f, "return"), + ScriptTokenType::Super => write!(f, "super"), + ScriptTokenType::This => write!(f, "this"), + ScriptTokenType::Let => write!(f, "let"), + ScriptTokenType::While => write!(f, "while"), + ScriptTokenType::Export => write!(f, "export"), + ScriptTokenType::Import => write!(f, "import"), + ScriptTokenType::Alias => write!(f, "as"), + ScriptTokenType::From => write!(f, "from"), + ScriptTokenType::Eof => write!(f, ""), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct TokenFromStringError<'a> { + source: &'a str, +} +impl<'a> Display for TokenFromStringError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Failed to parse into token; value {}", self.source) + } +} +impl<'a> Error for TokenFromStringError<'a> {} + +impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> { + type Error = TokenFromStringError<'a>; + + fn try_from(value: &'a str) -> Result<Self, Self::Error> { + match value { + "(" => Ok(ScriptTokenType::LeftParen), + ")" => Ok(ScriptTokenType::RightParen), + "{" => Ok(ScriptTokenType::LeftBrace), + "}" => Ok(ScriptTokenType::RightBrace), + "," => Ok(ScriptTokenType::Comma), + "." => Ok(ScriptTokenType::Dot), + "-" => Ok(ScriptTokenType::Minus), + "+" => Ok(ScriptTokenType::Plus), + ";" => Ok(ScriptTokenType::Semicolon), + "/" => Ok(ScriptTokenType::Slash), + "*" => Ok(ScriptTokenType::Asterisk), + "!" => Ok(ScriptTokenType::Bang), + "!=" => Ok(ScriptTokenType::BangEqual), + "=" => Ok(ScriptTokenType::Equal), + "==" => Ok(ScriptTokenType::EqualEqual), + ">" => Ok(ScriptTokenType::Greater), + ">=" => Ok(ScriptTokenType::GreaterEqual), + "<" => Ok(ScriptTokenType::Less), + "<=" => Ok(ScriptTokenType::LessEqual), + "||" => Ok(ScriptTokenType::DoublePipe), + "&&" => Ok(ScriptTokenType::DoubleAmpersand), + "%" => Ok(ScriptTokenType::Modulo), + "^" => Ok(ScriptTokenType::Caret), + "struct" => Ok(ScriptTokenType::Class), + "else" => Ok(ScriptTokenType::Else), + "fn" => Ok(ScriptTokenType::Function), + "for" => Ok(ScriptTokenType::For), + "if" => Ok(ScriptTokenType::If), + "null" => Ok(ScriptTokenType::Null), + "print" => Ok(ScriptTokenType::Print), + "return" => Ok(ScriptTokenType::Return), + "super" => Ok(ScriptTokenType::Super), + "this" => Ok(ScriptTokenType::This), + "let" => Ok(ScriptTokenType::Let), + "while" => Ok(ScriptTokenType::While), + "export" => Ok(ScriptTokenType::Export), + "import" => Ok(ScriptTokenType::Import), + "as" => Ok(ScriptTokenType::Alias), + "from" => Ok(ScriptTokenType::From), + "false" => Ok(ScriptTokenType::Boolean(false)), + "true" => Ok(ScriptTokenType::Boolean(true)), + _ => Err(TokenFromStringError { source: value }), + } + } +} + +#[derive(Clone)] +pub struct ScriptToken<'a> { + pub position: Span<'a>, + pub token_type: ScriptTokenType<'a>, +} + +impl<'a> Display for ScriptToken<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.token_type) + } +} + +impl<'a> Debug for ScriptToken<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{}:{}] {:?}", + self.position.location_line(), + self.position.get_column(), + self.token_type + ) + } +} + +#[cfg(test)] +mod token_tests { + use super::ScriptTokenType; + + #[test] + fn match_type_from_string() { + assert_eq!( + ScriptTokenType::try_from("("), + Ok(ScriptTokenType::LeftParen) + ); + assert_eq!( + ScriptTokenType::try_from(")"), + Ok(ScriptTokenType::RightParen) + ); + assert_eq!( + ScriptTokenType::try_from("{"), + Ok(ScriptTokenType::LeftBrace) + ); + assert_eq!( + ScriptTokenType::try_from("}"), + Ok(ScriptTokenType::RightBrace) + ); + assert_eq!(ScriptTokenType::try_from(","), Ok(ScriptTokenType::Comma)); + assert_eq!(ScriptTokenType::try_from("."), Ok(ScriptTokenType::Dot)); + assert_eq!(ScriptTokenType::try_from("-"), Ok(ScriptTokenType::Minus)); + assert_eq!(ScriptTokenType::try_from("+"), Ok(ScriptTokenType::Plus)); + assert_eq!( + ScriptTokenType::try_from(";"), + Ok(ScriptTokenType::Semicolon) + ); + assert_eq!(ScriptTokenType::try_from("/"), Ok(ScriptTokenType::Slash)); + assert_eq!( + ScriptTokenType::try_from("*"), + Ok(ScriptTokenType::Asterisk) + ); + assert_eq!(ScriptTokenType::try_from("!"), Ok(ScriptTokenType::Bang)); + assert_eq!( + ScriptTokenType::try_from("!="), + Ok(ScriptTokenType::BangEqual) + ); + assert_eq!(ScriptTokenType::try_from("="), Ok(ScriptTokenType::Equal)); + assert_eq!( + ScriptTokenType::try_from("=="), + Ok(ScriptTokenType::EqualEqual) + ); + assert_eq!(ScriptTokenType::try_from(">"), Ok(ScriptTokenType::Greater)); + assert_eq!( + ScriptTokenType::try_from(">="), + Ok(ScriptTokenType::GreaterEqual) + ); + assert_eq!(ScriptTokenType::try_from("<"), Ok(ScriptTokenType::Less)); + assert_eq!( + ScriptTokenType::try_from("<="), + Ok(ScriptTokenType::LessEqual) + ); + assert_eq!( + ScriptTokenType::try_from("||"), + Ok(ScriptTokenType::DoublePipe) + ); + assert_eq!( + ScriptTokenType::try_from("&&"), + Ok(ScriptTokenType::DoubleAmpersand) + ); + assert_eq!(ScriptTokenType::try_from("%"), Ok(ScriptTokenType::Modulo)); + assert_eq!(ScriptTokenType::try_from("^"), Ok(ScriptTokenType::Caret)); + assert_eq!( + ScriptTokenType::try_from("struct"), + Ok(ScriptTokenType::Class) + ); + assert_eq!(ScriptTokenType::try_from("else"), Ok(ScriptTokenType::Else)); + assert_eq!( + ScriptTokenType::try_from("fn"), + Ok(ScriptTokenType::Function) + ); + assert_eq!(ScriptTokenType::try_from("for"), Ok(ScriptTokenType::For)); + assert_eq!(ScriptTokenType::try_from("if"), Ok(ScriptTokenType::If)); + assert_eq!(ScriptTokenType::try_from("null"), Ok(ScriptTokenType::Null)); + assert_eq!( + ScriptTokenType::try_from("print"), + Ok(ScriptTokenType::Print) + ); + assert_eq!( + ScriptTokenType::try_from("return"), + Ok(ScriptTokenType::Return) + ); + assert_eq!( + ScriptTokenType::try_from("super"), + Ok(ScriptTokenType::Super) + ); + assert_eq!(ScriptTokenType::try_from("this"), Ok(ScriptTokenType::This)); + assert_eq!(ScriptTokenType::try_from("let"), Ok(ScriptTokenType::Let)); + assert_eq!( + ScriptTokenType::try_from("while"), + Ok(ScriptTokenType::While) + ); + assert_eq!( + ScriptTokenType::try_from("export"), + Ok(ScriptTokenType::Export) + ); + assert_eq!( + ScriptTokenType::try_from("import"), + Ok(ScriptTokenType::Import) + ); + assert_eq!(ScriptTokenType::try_from("as"), Ok(ScriptTokenType::Alias)); + assert_eq!(ScriptTokenType::try_from("from"), Ok(ScriptTokenType::From)); + } +} diff --git a/forge-script-lang/src/lib.rs b/forge-script-lang/src/lib.rs new file mode 100644 index 0000000..91ddc9b --- /dev/null +++ b/forge-script-lang/src/lib.rs @@ -0,0 +1,46 @@ +mod error; +mod lexer; +mod parser; +pub mod runtime; + +pub use error::{print_forge_error, ParseError, ParseErrorKind, TokenError, TokenErrorKind}; +pub mod parse { + pub use super::lexer::{script_to_tokens, ScriptToken, ScriptTokenType}; + pub use super::parser::ast; + pub use super::parser::{parse_expression, parse_program}; +} + +#[cfg(test)] +#[macro_export] +macro_rules! map_result { + ($val: expr, $with: expr) => { + match $val { + Ok((remainder, token)) => $with(remainder, token), + Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"), + Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => { + panic!( + "At [{}:{}]: {}; Value ||{}||", + &err.input.location_line(), + &err.input.get_column(), + &err.code.description(), + &err.input + ); + } + } + }; + ($val: expr, $with: expr, $printable: expr) => { + match $val { + Ok((remainder, token)) => $with(remainder, token), + Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"), + Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => { + panic!( + "At [{}:{}]: {}; Value {}", + &err.input.location_line(), + &err.input.get_column(), + &err.code.description(), + $printable + ); + } + } + }; +} diff --git a/forge-script-lang/src/parser/ast.rs b/forge-script-lang/src/parser/ast.rs new file mode 100644 index 0000000..f6b8f55 --- /dev/null +++ b/forge-script-lang/src/parser/ast.rs @@ -0,0 +1,332 @@ +use crate::runtime::numbers::Number; +use crate::runtime::value::ForgeValue; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; + +pub trait AstNode {} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Program(pub ExpressionList); +impl AstNode for Program {} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ExpressionList { + pub expressions: Vec<Expression>, + pub is_void: bool, +} +impl ExpressionList { + pub fn production(expressions: Vec<Expression>) -> Self { + ExpressionList { + expressions, + is_void: false, + } + } + pub fn voided(expressions: Vec<Expression>) -> Self { + ExpressionList { + expressions, + is_void: true, + } + } +} +impl Deref for ExpressionList { + type Target = Vec<Expression>; + fn deref(&self) -> &Self::Target { + &self.expressions + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum Expression { + #[cfg_attr(feature = "serde", serde(rename = "value_expression"))] + Value(ValueExpression), + #[cfg_attr(feature = "serde", serde(rename = "void_expression"))] + Void(VoidExpression), +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +pub enum UnaryOp { + Not, + Negate, +} + +impl Display for UnaryOp { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + Self::Not => "!", + Self::Negate => "-", + } + ) + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +pub enum BinaryOp { + Add, + Subtract, + Multiply, + Divide, + Modulo, + Equals, +} + +impl Display for BinaryOp { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + match self { + BinaryOp::Add => "+", + BinaryOp::Subtract => "-", + BinaryOp::Multiply => "*", + BinaryOp::Divide => "/", + BinaryOp::Modulo => "%", + BinaryOp::Equals => "==", + } + ) + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum VoidExpression { + ConditionLoop(ConditionalLoop), + Import(Import), + Export(Export), + Print(Print), +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct GroupedExpression { + pub inner: Box<ValueExpression>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum ValueExpression { + Unary { + operator: UnaryOp, + operand: Box<ValueExpression>, + }, + Binary { + lhs: Box<ValueExpression>, + rhs: Box<ValueExpression>, + operator: BinaryOp, + }, + Grouped(GroupedExpression), + Block(ExpressionList), + Literal(LiteralNode), + DeclareIdentifier(DeclareIdent), + Assignment(Assignment), + ConditionalBlock(Conditional), +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct ConditionalLoop { + pub block: GuardedBlock, + pub fallback: Option<ExpressionList>, +} + +impl ConditionalLoop { + pub fn expr_while(block: GuardedBlock) -> Self { + Self { + block, + fallback: None, + } + } + pub fn expr_while_else(block: GuardedBlock, fallback: ExpressionList) -> Self { + Self { + block, + fallback: Some(fallback), + } + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Conditional { + pub blocks: Vec<GuardedBlock>, + pub fallback: Option<ExpressionList>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct GuardedBlock { + pub guard: Box<ValueExpression>, + pub block: ExpressionList, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Import { + pub source: String, + pub items: IdentifierList, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Export { + pub items: IdentifierList, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Print { + pub expr: Box<ValueExpression>, +} +impl From<ValueExpression> for Print { + fn from(value: ValueExpression) -> Self { + Print { + expr: Box::new(value), + } + } +} + +pub type IdentifierList = Vec<IdentifierNode>; +pub type ParameterList = Vec<IdentifierNode>; + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Identifier(pub String); +impl Display for Identifier { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +/// Alias an identifier, to create a new way of referring to it +/// IdentifierAlias(original, alias) => identifier "as" alias +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct IdentifierAlias(pub String, pub String); +impl Display for IdentifierAlias { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{} as {}", self.0, self.1) + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum IdentifierNode { + Direct(Identifier), + Alias(IdentifierAlias), +} +impl Display for IdentifierNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Direct(val) => val.fmt(f), + Self::Alias(val) => val.fmt(f), + } + } +} + +impl IdentifierNode { + pub fn get_name(&self) -> &str { + match self { + Self::Direct(value) => &value.0, + Self::Alias(value) => &value.1, + } + } + + pub fn get_base(&self) -> &str { + match self { + Self::Direct(value) => &value.0, + Self::Alias(value) => &value.0, + } + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum LiteralNode { + Number(Number), + String(String), + Boolean(bool), + Null, +} + +impl Display for LiteralNode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Boolean(val) => write!(f, "{}", val), + Self::Number(val) => write!(f, "{}", val), + Self::String(val) => write!(f, r#""{}""#, val), + Self::Null => write!(f, "null"), + } + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct Assignment { + pub ident: Identifier, + pub value: Box<ValueExpression>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(tag = "node_type", content = "node_value", rename_all = "snake_case") +)] +pub enum DeclareIdent { + WithValue(Assignment), + WithoutValue(Identifier), +} diff --git a/forge-script-lang/src/parser/atoms.rs b/forge-script-lang/src/parser/atoms.rs new file mode 100644 index 0000000..ecc1950 --- /dev/null +++ b/forge-script-lang/src/parser/atoms.rs @@ -0,0 +1,58 @@ +use crate::lexer::{ScriptToken, ScriptTokenType}; +use peg::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; +use std::ops::Deref; + +#[repr(transparent)] +pub struct TokenSlice<'a>(pub &'a [ScriptToken<'a>]); +impl<'a> Deref for TokenSlice<'a> { + type Target = [ScriptToken<'a>]; + fn deref(&self) -> &'a Self::Target { + self.0 + } +} + +impl<'a> Parse for TokenSlice<'a> { + type PositionRepr = usize; + + fn start(&self) -> usize { + 0 + } + + fn is_eof(&self, position: usize) -> bool { + position >= self.len() + } + + fn position_repr(&self, position: usize) -> Self::PositionRepr { + position + } +} +impl<'a> ParseElem<'a> for TokenSlice<'a> { + type Element = &'a ScriptToken<'a>; + + fn parse_elem(&'a self, pos: usize) -> RuleResult<Self::Element> { + match self[pos..].first() { + Some(elemt) => RuleResult::Matched(pos + 1, elemt), + None => RuleResult::Failed, + } + } +} +impl<'a> ParseLiteral for TokenSlice<'a> { + fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { + let matches = self + .get(pos) + .map(|token| ScriptTokenType::try_from(literal).as_ref() == Ok(&token.token_type)) + .unwrap_or(false); + if matches { + RuleResult::Matched(pos + 1, ()) + } else { + RuleResult::Failed + } + } +} +impl<'a> ParseSlice<'a> for TokenSlice<'a> { + type Slice = &'a [ScriptToken<'a>]; + + fn parse_slice(&'a self, p1: usize, p2: usize) -> Self::Slice { + &self[p1..p2] + } +} diff --git a/forge-script-lang/src/parser/grammar.rs b/forge-script-lang/src/parser/grammar.rs new file mode 100644 index 0000000..e292608 --- /dev/null +++ b/forge-script-lang/src/parser/grammar.rs @@ -0,0 +1,100 @@ +use crate::parser::TokenSlice; + +peg::parser! { + grammar script_parser<'a>() for TokenSlice<'a> { + use crate::parser::ast::*; + use crate::runtime::numbers::Number; + use crate::lexer::{ScriptToken, ScriptTokenType}; + + pub rule program() -> Program + = ex:expression_list() eof() { Program(ex) } + + rule expression_list() -> ExpressionList + = e:(statement() / expression())+ term:";"? { ExpressionList { expressions: e, is_void: term.is_some() } } + + rule statement() -> Expression + // Include conditional here separately from expression to allow "if" without semi + = e:conditional() { Expression::Value(ValueExpression::ConditionalBlock(e)) } + / e:condition_loop() { Expression::Void(VoidExpression::ConditionLoop(e)) } + / e:expression() ";" { e } + + pub rule expression() -> Expression + = ex:value_expression() { Expression::Value(ex) } + / ex:void_expression() { Expression::Void(ex) } + + rule void_expression() -> VoidExpression + = ex:print() { VoidExpression::Print(ex) } + + #[cache_left_rec] + rule value_expression() -> ValueExpression + = "(" ex:value_expression() ")" { ValueExpression::Grouped(GroupedExpression { inner: Box::new(ex) }) } + / co:conditional() { ValueExpression::ConditionalBlock(co) } + / left:value_expression() op:binary_operator() right:value_expression() + { ValueExpression::Binary { lhs: Box::new(left), rhs: Box::new(right), operator: op } } + / op:unary_operator() operand:value_expression() + { ValueExpression::Unary { operator: op, operand: Box::new(operand) } } + / li:literal() { ValueExpression::Literal(li) } + + rule print() -> Print + = "print" ex:value_expression() { ex.into() } + + rule condition_loop() -> ConditionalLoop + = "while" guard:value_expression() "{" block:expression_list() "}" { ConditionalLoop { block: GuardedBlock { guard: Box::new(guard), block}, fallback: None } } + / "while" guard:value_expression() "{" block:expression_list() "}" "else" "{" fallback:expression_list() "}" + { ConditionalLoop { block: GuardedBlock { guard: Box::new(guard), block}, fallback: Some(fallback) } } + + rule conditional() -> Conditional + // = bl:guarded_block() { Conditional { fallback: None, blocks: vec![bl] } } + = blocks:(guarded_block() ++ "else") "else" "{" fallback:expression_list() "}" { + Conditional { + blocks, + fallback: Some(fallback) + } + } + / blocks:(guarded_block() ++ "else") { Conditional { fallback: None, blocks, } } + + rule guarded_block() -> GuardedBlock + = "if" guard:value_expression() "{" block:expression_list() "}" + { GuardedBlock { block, guard: Box::new(guard) } } + + rule binary_operator() -> BinaryOp + = "+" { BinaryOp::Add } + / "-" { BinaryOp::Subtract } + / "*" { BinaryOp::Multiply } + / "/" { BinaryOp::Divide } + + rule unary_operator() -> UnaryOp + = "!" { UnaryOp::Not } + / "-" { UnaryOp::Negate } + + rule identifier_list() -> IdentifierList + = identifier() ++ "," + + rule param_list() -> ParameterList + = ids:bare_identifier() ++ "," + { ids.iter().cloned().map(IdentifierNode::Direct).collect() } + + rule identifier() -> IdentifierNode + = id:alias_identifier() { IdentifierNode::Alias(id) } + / id:bare_identifier() { IdentifierNode::Direct(id) } + + rule alias_identifier() -> IdentifierAlias + = base:bare_identifier() "as" alias:bare_identifier() { IdentifierAlias(base.0, alias.0) } + + rule bare_identifier() -> Identifier + = [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(String::from(*vl)) } + + rule literal() -> LiteralNode + = "true" { LiteralNode::Boolean(true) } + / "false" { LiteralNode::Boolean(false) } + / "null" { LiteralNode::Null } + / [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(String::from(*vl)) } + / [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.clone()) } + / [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) } + / [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) } + + rule eof() = ![_] + } +} + +pub use script_parser::{expression, program}; diff --git a/forge-script-lang/src/parser/mod.rs b/forge-script-lang/src/parser/mod.rs new file mode 100644 index 0000000..454b676 --- /dev/null +++ b/forge-script-lang/src/parser/mod.rs @@ -0,0 +1,53 @@ +pub mod ast; +mod atoms; +mod grammar; + +use crate::error::{ForgeError, ForgeErrorKind, ForgeResult}; +use crate::print_forge_error; +pub use atoms::TokenSlice; + +pub fn slice<'a>(toks: &'a [crate::lexer::ScriptToken]) -> TokenSlice<'a> { + TokenSlice(toks) +} + +pub fn parse_expression(expr: &str) -> ForgeResult<ast::Expression> { + let tokens = crate::lexer::script_to_tokens(expr)?; + let result = match grammar::expression(&TokenSlice(tokens.as_slice())) { + Ok(expr) => Ok(expr), + Err(parse_error) => { + let bad_token = &tokens[parse_error.location]; + Err(ForgeError { + kind: ForgeErrorKind::UnexpectedToken { + found: bad_token.clone(), + expected: parse_error.expected, + }, + }) + } + }; + + result.map_err(|e| { + print_forge_error(expr, &e); + e + }) +} + +pub fn parse_program(prog: &str) -> ForgeResult<ast::Program> { + let tokens = crate::lexer::script_to_tokens(prog)?; + let result = match grammar::program(&TokenSlice(tokens.as_slice())) { + Ok(prog) => Ok(prog.clone()), + Err(parse_error) => { + let bad_token = &tokens[parse_error.location]; + Err(ForgeError { + kind: ForgeErrorKind::UnexpectedToken { + found: bad_token.clone(), + expected: parse_error.expected, + }, + }) + } + }; + + result.map_err(|e| { + print_forge_error(prog, &e); + e + }) +} diff --git a/forge-script-lang/src/runtime/executor/mod.rs b/forge-script-lang/src/runtime/executor/mod.rs new file mode 100644 index 0000000..ab3ef43 --- /dev/null +++ b/forge-script-lang/src/runtime/executor/mod.rs @@ -0,0 +1,17 @@ +mod printer; + +use crate::parser::ast::{Expression, Program, ValueExpression, VoidExpression}; + +pub trait Visitor { + fn evaluate_value_expression(&mut self, expression: &ValueExpression); + fn evaluate_void_expression(&mut self, expression: &VoidExpression); + fn evaluate_expression(&mut self, expression: &Expression) { + match expression { + Expression::Value(expr) => self.evaluate_value_expression(expr), + Expression::Void(expr) => self.evaluate_void_expression(expr), + } + } + fn evaluate_program(&mut self, program: &Program); +} + +pub use printer::TreePrinter; diff --git a/forge-script-lang/src/runtime/executor/printer.rs b/forge-script-lang/src/runtime/executor/printer.rs new file mode 100644 index 0000000..7431492 --- /dev/null +++ b/forge-script-lang/src/runtime/executor/printer.rs @@ -0,0 +1,191 @@ +use crate::parser::ast::*; +use crate::runtime::executor::Visitor; + +pub struct TreePrinter { + indent: usize, + buffer: String, +} + +impl TreePrinter { + pub fn new() -> Self { + Self { + indent: 0, + buffer: String::new(), + } + } + + pub fn increment(&mut self) { + self.indent = self.indent.saturating_add(1); + } + + pub fn decrement(&mut self) { + self.indent = self.indent.saturating_sub(1); + } + + pub fn get_indent(&self) -> String { + vec!["\t"; self.indent].join("") + } + + fn write(&mut self, value: impl ToString) { + self.buffer.push_str(value.to_string().as_str()); + } + fn writeln(&mut self, value: impl ToString) { + self.buffer.push_str(value.to_string().as_str()); + self.buffer.push('\n'); + } + fn write_indent(&mut self) { + self.write(self.get_indent()); + } + fn new_line(&mut self) { + self.buffer.push('\n'); + } + + fn format_expression_list(&self, list: &ExpressionList) -> String { + let mut inner_printer = TreePrinter::new(); + inner_printer.indent = self.indent; + + let len = list.len(); + let mut iter = list.expressions.iter(); + let mut counter = 1; + + while let Some(value) = iter.next() { + inner_printer.evaluate_expression(value); + if counter < len { + inner_printer.writeln(";"); + } + counter += 1; + } + + if list.is_void { + inner_printer.writeln(";"); + } + + inner_printer.buffer + } + + pub fn value(&self) -> &str { + self.buffer.as_str() + } + + pub fn take_value(&mut self) -> String { + std::mem::take(&mut self.buffer) + } +} + +impl Visitor for TreePrinter { + fn evaluate_value_expression(&mut self, expression: &ValueExpression) { + match expression { + ValueExpression::Unary { operand, operator } => { + self.write(operator); + self.evaluate_value_expression(operand.as_ref()); + } + ValueExpression::Binary { operator, rhs, lhs } => { + self.evaluate_value_expression(lhs.as_ref()); + self.write(operator); + self.evaluate_value_expression(rhs.as_ref()); + } + ValueExpression::Block(list) => { + self.writeln("{"); + self.increment(); + self.write(self.format_expression_list(list)); + self.decrement(); + self.writeln("}"); + } + ValueExpression::Literal(value) => self.write(value), + ValueExpression::DeclareIdentifier(dec_ident) => match dec_ident { + DeclareIdent::WithoutValue(ident) => { + self.write("let "); + self.write(ident); + } + DeclareIdent::WithValue(assign) => { + self.write("let "); + self.write(&assign.ident); + self.write(" = "); + self.evaluate_value_expression(assign.value.as_ref()); + } + }, + ValueExpression::Assignment(assign) => { + self.write(&assign.ident); + self.write(" = "); + self.evaluate_value_expression(assign.value.as_ref()); + } + ValueExpression::ConditionalBlock(condition) => { + let mut iter = condition.blocks.iter(); + let mut curr = 1; + let count = iter.len(); + while let Some(item) = iter.next() { + self.write("if "); + self.evaluate_value_expression(item.guard.as_ref()); + self.writeln(" {"); + self.increment(); + self.write(self.format_expression_list(&item.block)); + self.decrement(); + if curr < count { + self.write("} else "); + } else { + self.write("}"); + } + } + + if let Some(fall) = &condition.fallback { + self.writeln(" else {"); + self.increment(); + self.write(self.format_expression_list(fall)); + self.decrement(); + self.writeln("}"); + } + } + ValueExpression::Grouped(GroupedExpression { inner }) => { + self.write("("); + self.evaluate_value_expression(inner.as_ref()); + self.write(")"); + } + } + } + + fn evaluate_void_expression(&mut self, expression: &VoidExpression) { + match expression { + VoidExpression::ConditionLoop(cond) => { + self.write("while "); + self.evaluate_value_expression(cond.block.guard.as_ref()); + self.writeln(" {"); + self.increment(); + self.writeln(self.format_expression_list(&cond.block.block)); + self.decrement(); + self.writeln(" {"); + } + VoidExpression::Import(import) => { + self.write("import { "); + let ident_list = import + .items + .iter() + .map(|idn| format!("{}", idn)) + .collect::<Vec<String>>(); + + self.write(ident_list.as_slice().join(", ")); + self.write(" } from \""); + self.write(&import.source); + self.write("\""); + } + VoidExpression::Export(export) => { + self.write("export {"); + let ident_list = export + .items + .iter() + .map(|idn| format!("{}", idn)) + .collect::<Vec<String>>(); + + self.write(ident_list.as_slice().join(", ")); + self.write("}"); + } + VoidExpression::Print(print) => { + self.write("print "); + self.evaluate_value_expression(print.expr.as_ref()); + } + } + } + + fn evaluate_program(&mut self, program: &Program) { + self.writeln(self.format_expression_list(&program.0)); + } +} diff --git a/forge-script-lang/src/runtime/mod.rs b/forge-script-lang/src/runtime/mod.rs new file mode 100644 index 0000000..afe3ea9 --- /dev/null +++ b/forge-script-lang/src/runtime/mod.rs @@ -0,0 +1,3 @@ +pub mod executor; +pub mod numbers; +pub mod value; diff --git a/forge-script-lang/src/runtime/numbers.rs b/forge-script-lang/src/runtime/numbers.rs new file mode 100644 index 0000000..3519f16 --- /dev/null +++ b/forge-script-lang/src/runtime/numbers.rs @@ -0,0 +1,293 @@ +use std::cmp::Ordering; +use std::fmt::{Display, Formatter}; +use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Rem, RemAssign, Sub, SubAssign}; + +/// Represents a numerical value in a script +#[derive(Copy, Clone, Debug)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[cfg_attr( + feature = "serde", + serde(tag = "f", content = "v", rename_all = "snake_case") +)] +pub enum Number { + Integer(i64), + Float(f64), +} + +impl Number { + /// Create a new Number representing the provided integer + #[inline] + pub const fn integer(value: i64) -> Self { + Self::Integer(value) + } + /// Create a new Number representing the provided float + #[inline] + pub const fn float(value: f64) -> Self { + Self::Float(value) + } + /// Check if the internal representation of this number is a fractional value + #[inline] + pub const fn is_float(&self) -> bool { + matches!(self, Number::Float(..)) + } + /// Check if the internal representation of this number is an integer value + #[inline] + pub const fn is_integer(&self) -> bool { + matches!(self, Number::Integer(..)) + } + + /// Create a copy of the value represented as an Integer internally. + /// Will lose information if the value is a float and has a non-zero + /// mantissa + pub fn as_integer(&self) -> Self { + match self { + &Self::Float(value) => Self::Integer(value as i64), + &Self::Integer(value) => Self::Integer(value), + } + } + /// Create a copy of the value represented as a Float internally. + /// Will not lose information in the conversion + pub fn as_float(&self) -> Self { + match self { + &Self::Float(value) => Self::Float(value), + &Self::Integer(value) => Self::Float(value as f64), + } + } + + /// Unwrap this number into a native rust value, represented as + /// an integer + /// Will lose information if the value is a float and has a non-zero + /// mantissa + pub fn as_i64(&self) -> i64 { + match self { + Self::Integer(val) => *val, + Self::Float(val) => (*val) as i64, + } + } + /// Unwrap this number into a native rust value, represented as + /// a float + /// Will not lose information in the conversion + pub fn as_f64(&self) -> f64 { + match self { + Self::Integer(val) => (*val) as f64, + Self::Float(val) => *val, + } + } + + /// Check to see if both value _and_ type matches between this and another + /// Number. + /// An escape hatch for situations where it is critical that values are (or + /// aren't) the same internal type. + pub fn matches(&self, other: Number) -> bool { + use Number::*; + match (*self, other) { + (Integer(first), Integer(second)) => first == second, + (Float(first), Float(second)) => first == second, + _ => false, + } + } +} + +impl PartialEq for Number { + fn eq(&self, other: &Self) -> bool { + use Number::*; + match (self, other) { + (Integer(first), Integer(second)) => first == second, + (Float(first), Float(second)) => first == second, + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => (*int as f64) == *fl, + } + } +} + +impl PartialOrd for Number { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + use Number::*; + match (self, other) { + (Integer(first), Integer(second)) => first.partial_cmp(second), + (Float(first), Float(second)) => first.partial_cmp(second), + (Float(first), Integer(second)) => first.partial_cmp(&(*second as f64)), + (Integer(first), Float(second)) => (*first as f64).partial_cmp(second), + } + } +} + +impl Add for Number { + type Output = Number; + fn add(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first.saturating_add(second)), + (Float(first), Float(second)) => Float(first + second), + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => Float(fl + int as f64), + } + } +} +impl AddAssign for Number { + #[inline] + fn add_assign(&mut self, rhs: Self) { + *self = *self + rhs; + } +} +impl Sub for Number { + type Output = Number; + fn sub(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first.saturating_sub(second)), + (Float(first), Float(second)) => Float(first - second), + (Float(first), Integer(second)) => Float(first - second as f64), + (Integer(first), Float(second)) => Float(first as f64 - second), + } + } +} +impl SubAssign for Number { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + *self = *self - rhs; + } +} +impl Mul for Number { + type Output = Number; + + fn mul(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first * second), + (Float(first), Float(second)) => Float(first * second), + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => Float(fl * int as f64), + } + } +} +impl MulAssign for Number { + #[inline] + fn mul_assign(&mut self, rhs: Self) { + *self = *self * rhs + } +} +impl Div for Number { + type Output = Number; + #[inline] + fn div(self, rhs: Self) -> Self::Output { + Number::float(self.as_f64() / rhs.as_f64()) + } +} +impl DivAssign for Number { + #[inline] + fn div_assign(&mut self, rhs: Self) { + *self = *self / rhs; + } +} + +impl Rem for Number { + type Output = Number; + fn rem(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first % second), + (Float(first), Float(second)) => Float(first % second), + (Float(first), Integer(second)) => Float(first % second as f64), + (Integer(first), Float(second)) => Float(first as f64 % second), + } + } +} + +impl RemAssign for Number { + #[inline] + fn rem_assign(&mut self, rhs: Self) { + *self = *self % rhs; + } +} + +impl Display for Number { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Number::Float(val) => write!(f, "{}", val), + Number::Integer(val) => write!(f, "{}", val), + } + } +} + +macro_rules! impl_into_primitive { + ($($type:ty),+) => { + $( + impl From<Number> for $type { + fn from(value: Number) -> Self { + match value { + Number::Integer(value) => value as $type, + Number::Float(value) => value as $type, + } + } + } + )+ + }; +} + +macro_rules! impl_from { + (int: $($type:ty),+) => { + $( + impl From<$type> for Number { + fn from(value: $type) -> Self { + Number::Integer(value as i64) + } + } + )+ + }; + (float: $($type:ty),+) => { + $( + impl From<$type> for Number { + fn from(value: $type) -> Self { + Number::Float(value as f64) + } + } + )+ + }; +} + +impl_into_primitive!(u8, u16, u32, u64, usize, i8, i16, i32, i64, isize, f32, f64); +impl_from!(int: u8, u16, u32, u64, usize, i8, i16, i32, i64, isize); +impl_from!(float: f32, f64); + +#[cfg(test)] +mod gcse_maths { + use super::Number; + + #[test] + fn eq_ignores_internal() { + assert_eq!(Number::integer(123), Number::float(123.0)); + assert_eq!(Number::integer(123), Number::integer(123)); + assert_eq!(Number::float(123.0), Number::float(123.0)); + } + + #[test] + fn addition() { + let first = Number::integer(123); + let second = Number::integer(456); + let third = Number::float(789.0); + let fourth = Number::float(123.4); + + assert_eq!(first + second, Number::integer(579)); + assert_eq!(third + fourth, Number::float(912.4)); + } + + #[test] + fn type_coercion() { + let first = Number::integer(150); + let second = Number::float(5.0); + + assert!(first.is_integer()); + assert!(second.is_float()); + + assert!((first + second).is_float(), "Addition did not create float"); + assert!( + (first - second).is_float(), + "Subtraction did not create float" + ); + assert!((first * second).is_float(), "Multiply did not create float"); + assert!((first / second).is_float(), "Divide did not create float"); + + assert!( + (first / first).is_float(), + "Divide should always create float" + ); + } +} diff --git a/forge-script-lang/src/runtime/value.rs b/forge-script-lang/src/runtime/value.rs new file mode 100644 index 0000000..089a07f --- /dev/null +++ b/forge-script-lang/src/runtime/value.rs @@ -0,0 +1,142 @@ +use crate::parser::ast::LiteralNode; +use crate::runtime::numbers::Number; +use std::fmt::{Display, Formatter}; + +#[derive(Clone, Debug)] +#[cfg_attr( + feature = "serde", + derive(serde::Serialize, serde::Deserialize), + serde(rename_all = "snake_case") +)] +#[cfg_attr( + all(feature = "serde", feature = "verbose-serde"), + serde(tag = "type", content = "value") +)] +#[cfg_attr( + all(feature = "serde", not(feature = "verbose-serde")), + serde(untagged) +)] +pub enum ForgeValue { + Number(Number), + Boolean(bool), + String(String), + List(Vec<ForgeValue>), + Null, +} + +impl ForgeValue { + /// Perform type coercion to force this value into a bool + /// + /// ## True + /// - Non-zero number + /// - Literal value "true" + /// - Non-empty list + /// - Non-empty string + /// + /// ## False + /// - Zero + /// - Literal value "false" + /// - Empty list + /// - Empty string + /// - Null + pub fn as_bool(&self) -> bool { + match self { + ForgeValue::Number(val) => val != &Number::integer(0), + ForgeValue::Boolean(val) => *val, + ForgeValue::List(val) => !val.is_empty(), + ForgeValue::String(val) => !val.is_empty(), + ForgeValue::Null => false, + } + } + + /// Perform type coercion to force this value into a number + /// + /// Number => Number + /// true => 1 + /// false => 0 + /// Non-Empty List => 1 + /// Empty List => 0 + /// Non-Empty String => 1 + /// Empty String => 0 + /// null => 0 + pub fn as_number(&self) -> Number { + match self { + ForgeValue::Number(val) => *val, + ForgeValue::Boolean(val) => { + if *val { + Number::Integer(1) + } else { + Number::Integer(0) + } + } + ForgeValue::List(val) => { + if val.is_empty() { + Number::Integer(0) + } else { + Number::Integer(1) + } + } + ForgeValue::String(val) => { + if val.is_empty() { + Number::Integer(0) + } else { + Number::Integer(1) + } + } + ForgeValue::Null => Number::Integer(0), + } + } + + /// Perform type coercion to force this value into a string + /// + /// Does not quote strings, just returns their value + /// Arrays print as a comma seperated list, surrounded by "\[" "]" + pub fn as_string(&self) -> String { + self.to_string() + } +} + +impl Display for ForgeValue { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Number(n) => write!(f, "{}", n), + Self::String(st) => st.fmt(f), + Self::Null => write!(f, "null"), + Self::Boolean(val) => { + if *val { + write!(f, "true") + } else { + write!(f, "false") + } + } + Self::List(val) => { + write!( + f, + "[{}]", + val.iter() + .map(|val| format!("{}", val)) + .fold(String::new(), |mut acc, va| { + if acc.is_empty() { + va + } else { + acc.push_str(", "); + acc.push_str(va.as_str()); + acc + } + }) + ) + } + } + } +} + +impl From<LiteralNode> for ForgeValue { + fn from(value: LiteralNode) -> Self { + match value { + LiteralNode::Boolean(val) => ForgeValue::Boolean(val), + LiteralNode::String(val) => ForgeValue::String(val), + LiteralNode::Number(val) => ForgeValue::Number(val), + LiteralNode::Null => ForgeValue::Null, + } + } +} diff --git a/forge-script-web/Cargo.toml b/forge-script-web/Cargo.toml new file mode 100644 index 0000000..a4bb7e7 --- /dev/null +++ b/forge-script-web/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "forge-script-web" +version = "0.1.0" +edition = "2021" + +license = "Apache-2.0" +authors = [ + "Louis Capitanchik <louis@microhacks.co.uk>" +] +repository = "https://lab.lcr.gr/microhacks/forge-script.git" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +wasm-bindgen = "0.2.85" +serde-wasm-bindgen = "0.5.0" +forge-script-lang = { path = "../forge-script-lang", features = ["serde"] } +console_error_panic_hook = "0.1.7" \ No newline at end of file diff --git a/forge-script-web/src/lib.rs b/forge-script-web/src/lib.rs new file mode 100644 index 0000000..11e449e --- /dev/null +++ b/forge-script-web/src/lib.rs @@ -0,0 +1,21 @@ +use forge_script_lang::parse::parse_program; +use forge_script_lang::runtime::executor::{TreePrinter, Visitor}; +use wasm_bindgen::prelude::*; + +#[wasm_bindgen(start)] +pub fn init() { + std::panic::set_hook(Box::new(console_error_panic_hook::hook)); +} + +#[wasm_bindgen] +pub fn compile_ast(program: &str) -> Result<JsValue, serde_wasm_bindgen::Error> { + serde_wasm_bindgen::to_value(&parse_program(program).expect("Failed to parse")) +} + +#[wasm_bindgen] +pub fn format_script(program: &str) -> Result<String, serde_wasm_bindgen::Error> { + let ast = &parse_program(program).expect("Failed to parse"); + let mut writer = TreePrinter::new(); + writer.evaluate_program(ast); + Ok(writer.take_value()) +} diff --git a/forge-script/Cargo.toml b/forge-script/Cargo.toml new file mode 100644 index 0000000..cbea55c --- /dev/null +++ b/forge-script/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "forge-script" +version = "0.1.0" +edition = "2021" + +license = "Apache-2.0" +authors = [ + "Louis Capitanchik <louis@microhacks.co.uk>" +] +repository = "https://lab.lcr.gr/microhacks/forge-script.git" + +[dependencies] diff --git a/forge-script/src/main.rs b/forge-script/src/main.rs new file mode 100644 index 0000000..a30eb95 --- /dev/null +++ b/forge-script/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/src/main-web.rs b/src/main-web.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/grammar.rs b/src/parser/grammar.rs index c440118..37eaf95 100644 --- a/src/parser/grammar.rs +++ b/src/parser/grammar.rs @@ -6,150 +6,6 @@ use std::error::Error; use std::fmt::{Display, Formatter}; use std::ops::{Deref, DerefMut}; -#[repr(transparent)] -pub struct TokenSlice<'a>(pub &'a [ScriptToken<'a>]); -impl<'a> Deref for TokenSlice<'a> { - type Target = [ScriptToken<'a>]; - fn deref(&self) -> &'a Self::Target { - self.0 - } -} - -impl<'a> Parse for TokenSlice<'a> { - type PositionRepr = usize; - - fn start(&self) -> usize { - 0 - } - - fn is_eof(&self, position: usize) -> bool { - position >= self.len() - } - - fn position_repr(&self, position: usize) -> Self::PositionRepr { - position - } -} -impl<'a> ParseElem<'a> for TokenSlice<'a> { - type Element = &'a ScriptToken<'a>; - - fn parse_elem(&'a self, pos: usize) -> RuleResult<Self::Element> { - match self[pos..].first() { - Some(elemt) => RuleResult::Matched(pos + 1, elemt), - None => RuleResult::Failed, - } - } -} -impl<'a> ParseLiteral for TokenSlice<'a> { - fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { - let matches = self - .get(pos) - .map(|token| ScriptTokenType::try_from(literal).as_ref() == Ok(&token.token_type)) - .unwrap_or(false); - if matches { - RuleResult::Matched(pos + 1, ()) - } else { - RuleResult::Failed - } - } -} -impl<'a> ParseSlice<'a> for TokenSlice<'a> { - type Slice = &'a [ScriptToken<'a>]; - - fn parse_slice(&'a self, p1: usize, p2: usize) -> Self::Slice { - &self[p1..p2] - } -} - -peg::parser! { - grammar script_parser<'a>() for TokenSlice<'a> { - use crate::parser::ast::*; - use crate::runtime::Number; - - pub rule program() -> Program<'input> - = ex:expression_list() eof() { Program(ex) } - - rule expression_list() -> ExpressionList<'input> - = e:(statement() / expression())+ term:";"? { ExpressionList { expressions: e, is_void: term.is_some() } } - - rule statement() -> Expression<'input> - // Include conditional here separately from expression to allow "if" without semi - = e:conditional() { Expression::Value(ValueExpression::ConditionalBlock(e)) } - / e:expression() ";" { e } - - pub rule expression() -> Expression<'input> - = ex:value_expression() { Expression::Value(ex) } - / ex:void_expression() { Expression::Void(ex) } - - rule void_expression() -> VoidExpression<'input> - = ex:print() { VoidExpression::Print(ex) } - - #[cache_left_rec] - rule value_expression() -> ValueExpression<'input> - = co:conditional() { ValueExpression::ConditionalBlock(co) } - / left:value_expression() op:binary_operator() right:value_expression() - { ValueExpression::Binary { lhs: Box::new(left), rhs: Box::new(right), operator: op } } - / op:unary_operator() operand:value_expression() - { ValueExpression::Unary { operator: op, operand: Box::new(operand) } } - / li:literal() { ValueExpression::Literal(li) } - - rule print() -> Print<'input> - = "print" ex:value_expression() { ex.into() } - - rule conditional() -> Conditional<'input> - // = bl:guarded_block() { Conditional { fallback: None, blocks: vec![bl] } } - = blocks:(guarded_block() ++ "else") "else" "{" fallback:expression_list() "}" { - Conditional { - blocks, - fallback: Some(fallback) - } - } - / blocks:(guarded_block() ++ "else") { Conditional { fallback: None, blocks, } } - - rule guarded_block() -> GuardedBlock<'input> - = "if" guard:value_expression() "{" block:expression_list() "}" - { GuardedBlock { block: block, guard: Box::new(guard) } } - - rule binary_operator() -> BinaryOp - = "+" { BinaryOp::Add } - / "-" { BinaryOp::Subtract } - / "*" { BinaryOp::Multiply } - / "/" { BinaryOp::Divide } - - rule unary_operator() -> UnaryOp - = "!" { UnaryOp::Not } - / "-" { UnaryOp::Negate } - - rule identifier_list() -> IdentifierList<'input> - = identifier() ++ "," - - rule param_list() -> ParameterList<'input> - = ids:bare_identifier() ++ "," - { ids.iter().copied().map(IdentifierNode::Direct).collect() } - - rule identifier() -> IdentifierNode<'input> - = id:alias_identifier() { IdentifierNode::Alias(id) } - / id:bare_identifier() { IdentifierNode::Direct(id) } - - rule alias_identifier() -> IdentifierAlias<'input> - = base:bare_identifier() "as" alias:bare_identifier() { IdentifierAlias(base.0, alias.0) } - - rule bare_identifier() -> Identifier<'input> - = [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(vl) } - - rule literal() -> LiteralNode<'input> - = "true" { LiteralNode::Boolean(true) } - / "false" { LiteralNode::Boolean(false) } - / "null" { LiteralNode::Null } - / [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(vl) } - / [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.as_str()) } - / [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) } - / [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) } - - rule eof() = ![_] - } -} - #[derive(Clone, Debug)] pub enum ParseErrorKind { Unexpected(ExpectedSet), -- GitLab