diff --git a/Cargo.lock b/Cargo.lock index 3b00ffccc557aeadc82889e53db50e9bbfd2b9a4..9a6a97d72fc7a926c21666269ad5f524b2cf00ed 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,101 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "bytecount" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "micro_script" version = "0.1.0" +dependencies = [ + "nom", + "nom_locate", + "peg", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom_locate" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e299bf5ea7b212e811e71174c5d1a5d065c4c0ad0c8691ecb1f97e3e66025e" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + +[[package]] +name = "peg" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a07f2cafdc3babeebc087e499118343442b742cc7c31b4d054682cc598508554" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a90084dc05cf0428428e3d12399f39faad19b0909f64fb9170c9fdd6d9cd49b" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa00462b37ead6d11a82c9d568b26682d78e0477dc02d1966c013af80969739" + +[[package]] +name = "proc-macro2" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f4f29d145265ec1c483c7c654450edde0bfe043d3938d6972630663356d9500" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "unicode-ident" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" diff --git a/Cargo.toml b/Cargo.toml index 39b575a4ed6e6959c7e3910d0d68b2335af0c027..5ba384044a89ba22090c1f961b1c071d7158d4c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] +debug-ast = [] + [[bin]] name = "micro_script" path = "src/main.rs" @@ -14,3 +18,6 @@ name = "micro_script" path = "src/lib.rs" [dependencies] +nom = "7.1.3" +nom_locate = "4.1.0" +peg = "0.8.1" diff --git a/src/lib.rs b/src/lib.rs index a25580dd2bed0d62c3a6ac3ba34e3e6a790702d0..2a08c574032d8ebe42954218214cef12ee30574b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,3 @@ pub mod moka_script; +mod parser; +mod runtime; diff --git a/src/main.rs b/src/main.rs index 29c8250b96cd752b014aea9d16d97d3332e279bb..10147288b47c3f8bbda1b291c9690a1ea4cea2b5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,11 +1,11 @@ +use micro_script::moka_script::{RunScriptError, ScriptExitCode}; + fn main() { let mut args = std::env::args(); - println!("{:?}", &args); - if args.len() > 2 { eprintln!("Usage: mscr [script]"); - std::process::exit(64); - } else if let Some(arg) = args.nth(2) { + std::process::exit(ScriptExitCode::BadCliArgs as i32); + } else if let Some(arg) = args.nth(1) { vm_shim::run_file(arg); } else { vm_shim::run_repl(); @@ -14,6 +14,7 @@ fn main() { mod vm_shim { use micro_script::moka_script; + use micro_script::moka_script::ScriptExitCode; use std::io::{BufRead, Read}; use std::path::PathBuf; @@ -22,7 +23,9 @@ mod vm_shim { if let Ok(mut file) = file { let mut buff = String::new(); file.read_to_string(&mut buff).expect("Failed to read file"); - moka_script::run(buff); + if let Err(e) = moka_script::ms_run_script(buff) { + std::process::exit(ScriptExitCode::RunFileError as i32) + } } } @@ -33,7 +36,10 @@ mod vm_shim { Ok(contents) => match contents.as_str() { ".exit" => break, other => { - println!("{}", other); + if let Err(e) = moka_script::ms_run_script(other.into()) { + eprintln!("{}", e); + std::process::exit(ScriptExitCode::BadReplError as i32); + } } }, Err(e) => { diff --git a/src/moka_script.rs b/src/moka_script.rs index 5a123168c1c5e8b25ed0e1f6f45990f932c41420..acb43e696cc1f2f47bbfbe400eb0e29e8eb7eb3d 100644 --- a/src/moka_script.rs +++ b/src/moka_script.rs @@ -1,6 +1,103 @@ -pub fn run(source: String) { - let basic_tokens = source.split(' '); - for token in basic_tokens { - println!("{}", token); +use crate::parser::{lex_script, parse_tokens, ScriptToken, TokenSlice}; +use std::error::Error; +use std::fmt::{Debug, Display, Formatter}; + +#[derive(Debug)] +#[repr(C)] +pub enum ScriptExitCode { + BadCliArgs = 64, + RunFileError = 65, + BadReplError = 66, +} + +#[derive(Debug, Clone)] +pub struct RunScriptError { + line: usize, + location: String, + message: String, +} + +impl RunScriptError { + pub fn message(line: usize, message: impl ToString) -> RunScriptError { + RunScriptError { + line, + location: String::new(), + message: message.to_string(), + } + } + + pub fn line(line: usize) -> RunScriptError { + RunScriptError { + line, + location: String::new(), + message: String::from("Unknown"), + } + } +} + +impl Display for RunScriptError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[Line {}] Error {}: {}", + self.line, self.location, self.message + ) + } +} + +impl Error for RunScriptError {} + +pub fn ms_run_script(source: String) -> Result<(), RunScriptError> { + let tokens = lex_script(source.as_str()).expect("Fuck"); + let slice = TokenSlice(tokens.as_slice()); + + let program = match parse_tokens(&slice) { + Ok(val) => val, + Err(e) => { + let (line, column) = e.get_error_location(); + + let previous_line = if line > 1 { + source.lines().nth(line - 2) + } else { + None + }; + let source_line = source.lines().nth(line - 1).expect("Missing line"); + let next_line = source.lines().nth(line); + + let largest_line_num = line.max(line.saturating_sub(1)).max(line.saturating_add(1)); + let number_length = format!("{}", largest_line_num).len(); + + eprintln!("| Script error on line {} at \"{}\"\n|", line, e.token); + if let Some(prev) = previous_line { + eprintln!("| [{:>width$}] {}", line - 1, prev, width = number_length); + } + eprintln!( + "| [{:>width$}] {}", + line, + source_line, + width = number_length + ); + eprintln!( + "| {} {}{}", + vec![" "; number_length + 2].join(""), + vec![" "; column - 1].join(""), + vec!["^"; e.token.token_type.len()].join(""), + ); + if let Some(next) = next_line { + eprintln!("| [{:>width$}] {}", line + 1, next, width = number_length); + } + eprintln!("|\n| Failed To Parse: {}", e.kind); + return Err(RunScriptError::message(line, "Failed to parse")); + } + }; + + #[cfg(feature = "debug-ast")] + { + println!("\n{:?}\n", tokens); + for expr in program.0.iter() { + println!("{:?}", expr); + } } + + Ok(()) } diff --git a/src/parser/Grammar.bnf b/src/parser/Grammar.bnf new file mode 100644 index 0000000000000000000000000000000000000000..8420aa07ff16a76ea2ba84f2c7d8f1f4bca97d2f --- /dev/null +++ b/src/parser/Grammar.bnf @@ -0,0 +1,47 @@ +program ::= expression_list +block ::= "{" expression_list? "}" + +expression_list ::= expression (";" expression) ";"? + +expression ::= value_expression + | void_expression + +void_expression ::= condition_loop | import | export | block | print +value_expression ::= unary_operator value_expression + | value_expression binary_operator value_expression + | literal + | conditional + | declare_ident + | assignment + +print ::= "print" value_expression ";" + +condition_loop ::= "while" value_expression block +conditional ::= "if" value_expression block "else" conditional + | "if" value_expression block "else" block + | "if" value_expression block + +declare_func ::= "fn" identifier "(" ")" block + +declare_ident ::= "let" assignment + | "let" identifier +assignment ::= identifier "=" value_expression +export ::= "export" identifier_list +import ::= "import" identifier_list "from" string +identifier_list ::= "{" identifier ("as" identifier)? ("," identifier ("as" identifier)?)* "}" + +binary_operator ::= "*" | "/" | "+" | "-" | "%" | "^" + | "&&" | "||" + | "==" | "!=" | "<" | "<=" | ">" | ">=" +unary_operator ::= "-" | "!" +identifier ::= ALPHA ALPHANUM* + | "_" ALPHA ALPHANUM* + +literal ::= integer | float | string | boolean | null + +boolean ::= "true" | "false" +null ::= "null" +string ::= '"' ANY_NON_UNESCAPED_QUOTE_TOKEN* '"' +integer ::= "-"? DIGIT ("_"? DIGIT)* +float ::= "-"? integer? "." integer + | "-"? integer "." integer? \ No newline at end of file diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000000000000000000000000000000000000..fa5dfb8796584270bc9e722f30da8f3dac7ce54e --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1,209 @@ +use crate::runtime::Number; +use std::ops::Deref; + +pub trait AstNode {} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Program<'a>(pub ExpressionList<'a>); +impl<'a> AstNode for Program<'a> {} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct ExpressionList<'a> { + pub expressions: Vec<Expression<'a>>, + pub is_void: bool, +} +impl<'a> ExpressionList<'a> { + pub fn production(expressions: Vec<Expression<'a>>) -> Self { + ExpressionList { + expressions, + is_void: false, + } + } + pub fn voided(expressions: Vec<Expression<'a>>) -> Self { + ExpressionList { + expressions, + is_void: true, + } + } +} +impl<'a> Deref for ExpressionList<'a> { + type Target = Vec<Expression<'a>>; + fn deref(&self) -> &Self::Target { + &self.expressions + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum Expression<'a> { + Value(ValueExpression<'a>), + Void(VoidExpression<'a>), +} + +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum UnaryOp { + Not, + Negate, +} +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum BinaryOp { + Add, + Subtract, + Multiply, + Divide, + Modulo, + Equals, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum VoidExpression<'a> { + ConditionLoop(ConditionalLoop<'a>), + Import(Import<'a>), + Export(Export<'a>), + Print(Print<'a>), +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum ValueExpression<'a> { + Unary { + operator: UnaryOp, + operand: Box<ValueExpression<'a>>, + }, + Binary { + lhs: Box<ValueExpression<'a>>, + rhs: Box<ValueExpression<'a>>, + operator: BinaryOp, + }, + Block(ExpressionList<'a>), + Literal(LiteralNode<'a>), + DeclareIdentifier(DeclareIdent<'a>), + Assignment(Assignment<'a>), + ConditionalBlock(Conditional<'a>), +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct ConditionalLoop<'a> { + pub block: GuardedBlock<'a>, + pub fallback: Option<ExpressionList<'a>>, +} + +impl<'a> ConditionalLoop<'a> { + pub fn expr_while(block: GuardedBlock<'a>) -> Self { + Self { + block, + fallback: None, + } + } + pub fn expr_while_else(block: GuardedBlock<'a>, fallback: ExpressionList<'a>) -> Self { + Self { + block, + fallback: Some(fallback), + } + } +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Conditional<'a> { + pub blocks: Vec<GuardedBlock<'a>>, + pub fallback: Option<ExpressionList<'a>>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct GuardedBlock<'a> { + pub guard: Box<ValueExpression<'a>>, + pub block: ExpressionList<'a>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Import<'a> { + pub source: &'a str, + pub items: IdentifierList<'a>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Export<'a> { + pub items: IdentifierList<'a>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Print<'a> { + pub expr: Box<ValueExpression<'a>>, +} +impl<'a> From<ValueExpression<'a>> for Print<'a> { + fn from(value: ValueExpression<'a>) -> Self { + Print { + expr: Box::new(value), + } + } +} + +pub type IdentifierList<'a> = Vec<IdentifierNode<'a>>; +pub type ParameterList<'a> = Vec<IdentifierNode<'a>>; + +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Identifier<'a>(pub &'a str); + +/// Alias an identifier, to create a new way of referring to it +/// IdentifierAlias(original, alias) => identifier "as" alias +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct IdentifierAlias<'a>(pub &'a str, pub &'a str); + +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum IdentifierNode<'a> { + Direct(Identifier<'a>), + Alias(IdentifierAlias<'a>), +} + +impl<'a> IdentifierNode<'a> { + pub fn get_name(&'a self) -> &'a str { + match self { + Self::Direct(value) => value.0, + Self::Alias(value) => value.1, + } + } + + pub fn get_base(&'a self) -> &'a str { + match self { + Self::Direct(value) => value.0, + Self::Alias(value) => value.0, + } + } +} + +#[derive(Copy, Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum LiteralNode<'a> { + Number(Number), + String(&'a str), + Boolean(bool), + Null, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub struct Assignment<'a> { + pub ident: Identifier<'a>, + pub value: Box<ValueExpression<'a>>, +} + +#[derive(Clone)] +#[cfg_attr(feature = "debug-ast", derive(Debug))] +pub enum DeclareIdent<'a> { + WithValue(Assignment<'a>), + WithoutValue(Identifier<'a>), +} diff --git a/src/parser/atoms.rs b/src/parser/atoms.rs new file mode 100644 index 0000000000000000000000000000000000000000..99c907b1d603f1639902f02b837612a4efbad769 --- /dev/null +++ b/src/parser/atoms.rs @@ -0,0 +1,35 @@ +use nom::bytes::complete::tag; +use nom::character::complete::{char, one_of}; +use nom::combinator::value; +use nom::multi::{many0, many1}; +use nom::sequence::terminated; +use nom::IResult; +use nom_locate::LocatedSpan; + +pub type Span<'a> = LocatedSpan<&'a str>; +pub type OwnedSpan<'a> = LocatedSpan<String>; + +pub fn raw_true(input: Span) -> IResult<Span, bool> { + value(true, tag("true"))(input) +} + +pub fn raw_false(input: Span) -> IResult<Span, bool> { + value(false, tag("false"))(input) +} + +pub fn raw_decimal(input: Span) -> IResult<Span, OwnedSpan> { + let input_offset = input.location_offset(); + let input_line = input.location_line(); + + let (input, list) = many1(terminated(one_of("0123456789"), many0(char('_'))))(input)?; + let string = list.iter().fold(String::with_capacity(list.len()), |a, b| { + format!("{}{}", a, b) + }); + + unsafe { + Ok(( + input, + OwnedSpan::new_from_raw_offset(input_offset, input_line, string, ()), + )) + } +} diff --git a/src/parser/grammar.rs b/src/parser/grammar.rs new file mode 100644 index 0000000000000000000000000000000000000000..c4401182bbe3b9b0f534d66307476f2eb4f7303f --- /dev/null +++ b/src/parser/grammar.rs @@ -0,0 +1,205 @@ +use crate::parser::ast::Program; +use crate::parser::{ScriptToken, ScriptTokenType}; +use peg::error::ExpectedSet; +use peg::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; +use std::error::Error; +use std::fmt::{Display, Formatter}; +use std::ops::{Deref, DerefMut}; + +#[repr(transparent)] +pub struct TokenSlice<'a>(pub &'a [ScriptToken<'a>]); +impl<'a> Deref for TokenSlice<'a> { + type Target = [ScriptToken<'a>]; + fn deref(&self) -> &'a Self::Target { + self.0 + } +} + +impl<'a> Parse for TokenSlice<'a> { + type PositionRepr = usize; + + fn start(&self) -> usize { + 0 + } + + fn is_eof(&self, position: usize) -> bool { + position >= self.len() + } + + fn position_repr(&self, position: usize) -> Self::PositionRepr { + position + } +} +impl<'a> ParseElem<'a> for TokenSlice<'a> { + type Element = &'a ScriptToken<'a>; + + fn parse_elem(&'a self, pos: usize) -> RuleResult<Self::Element> { + match self[pos..].first() { + Some(elemt) => RuleResult::Matched(pos + 1, elemt), + None => RuleResult::Failed, + } + } +} +impl<'a> ParseLiteral for TokenSlice<'a> { + fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { + let matches = self + .get(pos) + .map(|token| ScriptTokenType::try_from(literal).as_ref() == Ok(&token.token_type)) + .unwrap_or(false); + if matches { + RuleResult::Matched(pos + 1, ()) + } else { + RuleResult::Failed + } + } +} +impl<'a> ParseSlice<'a> for TokenSlice<'a> { + type Slice = &'a [ScriptToken<'a>]; + + fn parse_slice(&'a self, p1: usize, p2: usize) -> Self::Slice { + &self[p1..p2] + } +} + +peg::parser! { + grammar script_parser<'a>() for TokenSlice<'a> { + use crate::parser::ast::*; + use crate::runtime::Number; + + pub rule program() -> Program<'input> + = ex:expression_list() eof() { Program(ex) } + + rule expression_list() -> ExpressionList<'input> + = e:(statement() / expression())+ term:";"? { ExpressionList { expressions: e, is_void: term.is_some() } } + + rule statement() -> Expression<'input> + // Include conditional here separately from expression to allow "if" without semi + = e:conditional() { Expression::Value(ValueExpression::ConditionalBlock(e)) } + / e:expression() ";" { e } + + pub rule expression() -> Expression<'input> + = ex:value_expression() { Expression::Value(ex) } + / ex:void_expression() { Expression::Void(ex) } + + rule void_expression() -> VoidExpression<'input> + = ex:print() { VoidExpression::Print(ex) } + + #[cache_left_rec] + rule value_expression() -> ValueExpression<'input> + = co:conditional() { ValueExpression::ConditionalBlock(co) } + / left:value_expression() op:binary_operator() right:value_expression() + { ValueExpression::Binary { lhs: Box::new(left), rhs: Box::new(right), operator: op } } + / op:unary_operator() operand:value_expression() + { ValueExpression::Unary { operator: op, operand: Box::new(operand) } } + / li:literal() { ValueExpression::Literal(li) } + + rule print() -> Print<'input> + = "print" ex:value_expression() { ex.into() } + + rule conditional() -> Conditional<'input> + // = bl:guarded_block() { Conditional { fallback: None, blocks: vec![bl] } } + = blocks:(guarded_block() ++ "else") "else" "{" fallback:expression_list() "}" { + Conditional { + blocks, + fallback: Some(fallback) + } + } + / blocks:(guarded_block() ++ "else") { Conditional { fallback: None, blocks, } } + + rule guarded_block() -> GuardedBlock<'input> + = "if" guard:value_expression() "{" block:expression_list() "}" + { GuardedBlock { block: block, guard: Box::new(guard) } } + + rule binary_operator() -> BinaryOp + = "+" { BinaryOp::Add } + / "-" { BinaryOp::Subtract } + / "*" { BinaryOp::Multiply } + / "/" { BinaryOp::Divide } + + rule unary_operator() -> UnaryOp + = "!" { UnaryOp::Not } + / "-" { UnaryOp::Negate } + + rule identifier_list() -> IdentifierList<'input> + = identifier() ++ "," + + rule param_list() -> ParameterList<'input> + = ids:bare_identifier() ++ "," + { ids.iter().copied().map(IdentifierNode::Direct).collect() } + + rule identifier() -> IdentifierNode<'input> + = id:alias_identifier() { IdentifierNode::Alias(id) } + / id:bare_identifier() { IdentifierNode::Direct(id) } + + rule alias_identifier() -> IdentifierAlias<'input> + = base:bare_identifier() "as" alias:bare_identifier() { IdentifierAlias(base.0, alias.0) } + + rule bare_identifier() -> Identifier<'input> + = [ScriptToken { token_type: ScriptTokenType::Identifier(vl), .. }] { Identifier(vl) } + + rule literal() -> LiteralNode<'input> + = "true" { LiteralNode::Boolean(true) } + / "false" { LiteralNode::Boolean(false) } + / "null" { LiteralNode::Null } + / [ScriptToken { token_type: ScriptTokenType::String(vl), .. }] { LiteralNode::String(vl) } + / [ScriptToken { token_type: ScriptTokenType::OwnedString(vl), .. }] { LiteralNode::String(vl.as_str()) } + / [ScriptToken { token_type: ScriptTokenType::Integer(vl), .. }] { LiteralNode::Number(Number::Integer(*vl)) } + / [ScriptToken { token_type: ScriptTokenType::Float(vl), .. }] { LiteralNode::Number(Number::Float(*vl)) } + + rule eof() = ![_] + } +} + +#[derive(Clone, Debug)] +pub enum ParseErrorKind { + Unexpected(ExpectedSet), +} + +impl Display for ParseErrorKind { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unexpected(set) => write!(f, "expected {}", set), + } + } +} + +#[derive(Clone, Debug)] +pub struct ParseError<'a> { + pub token: &'a ScriptToken<'a>, + pub kind: ParseErrorKind, +} +impl<'a> ParseError<'a> { + pub fn get_error_location(&self) -> (usize, usize) { + ( + self.token.position.location_line() as usize, + self.token.position.get_column(), + ) + } +} +impl<'a> Display for ParseError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "Failed to parse at [{}, {}]: {}. {}", + self.token.position.location_line(), + self.token.position.get_column(), + self.token.position.fragment(), + self.kind, + ) + } +} + +impl<'a> Error for ParseError<'a> {} + +pub fn parse_tokens<'a>(list: &'a TokenSlice) -> Result<Program<'a>, ParseError<'a>> { + match script_parser::program(list) { + Ok(prog) => Ok(prog), + Err(e) => { + let bad_token = &list[e.location]; + Err(ParseError { + token: bad_token, + kind: ParseErrorKind::Unexpected(e.expected), + }) + } + } +} diff --git a/src/parser/keywords.rs b/src/parser/keywords.rs new file mode 100644 index 0000000000000000000000000000000000000000..2a57ca39a74377bd06855f7f32bac8df66240614 --- /dev/null +++ b/src/parser/keywords.rs @@ -0,0 +1,197 @@ +use crate::parser::atoms::Span; +use crate::parser::{ScriptToken, ScriptTokenType}; +use nom::bytes::complete::tag; +use nom::IResult; +use nom_locate::position; + +pub fn token_struct(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("struct")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Class, + }, + )) +} + +pub fn token_else(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("else")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Else, + }, + )) +} + +pub fn token_function(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("fn")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Function, + }, + )) +} + +pub fn token_for(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("for")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::For, + }, + )) +} + +pub fn token_if(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("if")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::If, + }, + )) +} + +pub fn token_null(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("null")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Null, + }, + )) +} + +pub fn token_print(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("print")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Print, + }, + )) +} + +pub fn token_return(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("return")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Return, + }, + )) +} + +pub fn token_super(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("super")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Super, + }, + )) +} + +pub fn token_this(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("this")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::This, + }, + )) +} + +pub fn token_let(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("let")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Let, + }, + )) +} + +pub fn token_while(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("while")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::While, + }, + )) +} + +pub fn token_export(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("export")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Export, + }, + )) +} + +pub fn token_import(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("import")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Import, + }, + )) +} + +pub fn token_alias(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("as")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Alias, + }, + )) +} + +pub fn token_from(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("from")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::From, + }, + )) +} diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs new file mode 100644 index 0000000000000000000000000000000000000000..8aa24c305247bc9f2c17b1c6798bc527fa96aacc --- /dev/null +++ b/src/parser/lexer.rs @@ -0,0 +1,171 @@ +use crate::parser::atoms::Span; +use crate::parser::ScriptToken; +use nom::branch::alt; +use nom::character::complete::multispace0; +use nom::error::ErrorKind; +use nom::multi::fold_many0; +use nom::sequence::delimited; +use nom::IResult; +use nom_locate::LocatedSpan; +use std::error::Error; +use std::fmt::{Display, Formatter}; + +use crate::parser::token_parser::{ + token_alias, token_asterisk, token_bang, token_bang_equal, token_boolean, token_caret, + token_comma, token_dot, token_double_ampersand, token_double_pipe, token_else, token_equal, + token_equal_equal, token_export, token_float, token_for, token_function, token_greater, + token_greater_equal, token_ident, token_if, token_import, token_int, token_left_brace, + token_left_paren, token_less, token_less_equal, token_let, token_minus, token_modulo, + token_null, token_plus, token_print, token_return, token_right_brace, token_right_paren, + token_semicolon, token_slash, token_string, token_struct, token_super, token_this, token_while, +}; + +pub fn any_token(input: Span) -> IResult<Span, ScriptToken> { + alt(( + alt(( + token_if, + token_function, + token_alias, + token_for, + token_let, + token_else, + token_this, + token_null, + token_while, + token_return, + token_print, + token_export, + token_import, + token_struct, + token_super, + )), + alt(( + token_plus, + token_minus, + token_asterisk, + token_slash, + token_bang, + token_comma, + token_dot, + token_caret, + token_modulo, + token_left_brace, + token_left_paren, + token_right_brace, + token_right_paren, + token_double_ampersand, + token_double_pipe, + token_semicolon, + )), + alt(( + token_less, + token_greater, + token_equal, + token_equal_equal, + token_bang_equal, + token_less_equal, + token_greater_equal, + )), + alt(( + token_float, + token_int, + token_boolean, + token_string, + token_ident, + )), + ))(input) +} + +pub fn token_list(input: Span) -> IResult<Span, Vec<ScriptToken>> { + let (span, list) = fold_many0( + delimited(multispace0, any_token, multispace0), + Vec::new, + |mut list, tok| { + list.push(tok); + list + }, + )(input)?; + + Ok((span, list)) +} + +#[derive(Debug)] +pub struct LexError<'a> { + pub inner: nom::error::Error<Span<'a>>, +} + +impl<'a> LexError<'a> { + pub fn get_error_location(&self) -> (usize, usize) { + ( + self.inner.input.location_line() as usize, + self.inner.input.get_column(), + ) + } +} +impl<'a> Display for LexError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.inner) + } +} +impl<'a> Error for LexError<'a> {} +impl<'a> From<nom::Err<nom::error::Error<Span<'a>>>> for LexError<'a> { + fn from(value: nom::Err<nom::error::Error<Span<'a>>>) -> Self { + match value { + nom::Err::Error(err) => Self { inner: err }, + nom::Err::Failure(err) => Self { inner: err }, + nom::Err::Incomplete(_) => Self { + inner: nom::error::Error::new( + LocatedSpan::new("<incomplete program>"), + ErrorKind::Alt, + ), + }, + } + } +} + +pub fn lex_script<'a, 'b: 'a>(script: &'b str) -> Result<Vec<ScriptToken<'a>>, LexError> { + let script_span = Span::new(script); + let tokens = token_list(script_span).map_err(LexError::from)?; + Ok(tokens.1) +} + +#[cfg(test)] +mod lexing_test { + use crate::parser::lexer::lex_script; + use crate::parser::{ScriptToken, ScriptTokenType}; + + #[test] + fn simple_maths() { + let tokens = lex_script("12 + 21").expect("Failed to lex"); + + assert_eq!(tokens.len(), 3); + assert_eq!(tokens[0].token_type, ScriptTokenType::Integer(12)); + assert_eq!(tokens[1].token_type, ScriptTokenType::Plus); + assert_eq!(tokens[2].token_type, ScriptTokenType::Integer(21)); + } + #[test] + fn multiline() { + let script = r#" + "Foo"; "Bar"; + 12 + 21 + "#; + + let tokens = lex_script(script).expect("Failed to lex"); + assert_eq!(tokens.len(), 7); + + assert_eq!( + tokens[0].token_type, + ScriptTokenType::OwnedString(String::from("Foo")) + ); + assert_eq!(tokens[1].token_type, ScriptTokenType::Semicolon); + assert_eq!( + tokens[2].token_type, + ScriptTokenType::OwnedString(String::from("Bar")) + ); + assert_eq!(tokens[3].token_type, ScriptTokenType::Semicolon); + + assert_eq!(tokens[4].token_type, ScriptTokenType::Integer(12)); + assert_eq!(tokens[5].token_type, ScriptTokenType::Plus); + assert_eq!(tokens[6].token_type, ScriptTokenType::Integer(21)); + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..6f585af9e06c6309fe4ce5cea750799d2dea3935 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,64 @@ +mod ast; +mod atoms; +mod grammar; +mod keywords; +mod lexer; +mod operators; +mod primitives; +mod strings; +mod tokens; + +pub mod token_parser { + pub use super::keywords::{ + token_alias, token_else, token_export, token_for, token_function, token_if, token_import, + token_let, token_null, token_print, token_return, token_struct, token_super, token_this, + token_while, + }; + pub use super::operators::{ + token_asterisk, token_bang, token_bang_equal, token_caret, token_comma, token_dot, + token_double_ampersand, token_double_pipe, token_equal, token_equal_equal, token_greater, + token_greater_equal, token_left_brace, token_left_paren, token_less, token_less_equal, + token_minus, token_modulo, token_plus, token_right_brace, token_right_paren, + token_semicolon, token_slash, + }; + pub use super::primitives::{token_boolean, token_float, token_ident, token_int}; + pub use super::strings::token_string; +} +pub use grammar::{parse_tokens, TokenSlice}; +pub use lexer::lex_script; +pub use tokens::{ScriptToken, ScriptTokenType}; + +#[cfg(test)] +#[macro_export] +macro_rules! map_result { + ($val: expr, $with: expr) => { + match $val { + Ok((remainder, token)) => $with(remainder, token), + Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"), + Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => { + panic!( + "At [{}:{}]: {}; Value ||{}||", + &err.input.location_line(), + &err.input.get_column(), + &err.code.description(), + &err.input + ); + } + } + }; + ($val: expr, $with: expr, $printable: expr) => { + match $val { + Ok((remainder, token)) => $with(remainder, token), + Err(nom::Err::Incomplete(_)) => panic!("Incorrect error type"), + Err(nom::Err::Failure(err)) | Err(nom::Err::Error(err)) => { + panic!( + "At [{}:{}]: {}; Value {}", + &err.input.location_line(), + &err.input.get_column(), + &err.code.description(), + $printable + ); + } + } + }; +} diff --git a/src/parser/operators.rs b/src/parser/operators.rs new file mode 100644 index 0000000000000000000000000000000000000000..3f8f51abf72170e72ec6136c993fa678e1ba9014 --- /dev/null +++ b/src/parser/operators.rs @@ -0,0 +1,326 @@ +use crate::parser::atoms::Span; +use crate::parser::{ScriptToken, ScriptTokenType}; +use nom::bytes::complete::tag; +use nom::IResult; +use nom_locate::position; + +pub fn token_left_paren(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("(")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LeftParen, + }, + )) +} + +pub fn token_right_paren(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(")")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::RightParen, + }, + )) +} + +pub fn token_left_brace(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("{")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LeftBrace, + }, + )) +} + +pub fn token_right_brace(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("}")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::RightBrace, + }, + )) +} + +pub fn token_comma(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(",")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Comma, + }, + )) +} + +pub fn token_dot(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(".")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Dot, + }, + )) +} + +pub fn token_minus(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("-")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Minus, + }, + )) +} + +pub fn token_plus(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("+")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Plus, + }, + )) +} + +pub fn token_semicolon(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(";")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Semicolon, + }, + )) +} + +pub fn token_slash(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("/")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Slash, + }, + )) +} + +pub fn token_asterisk(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("*")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Asterisk, + }, + )) +} + +pub fn token_bang(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("!")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Bang, + }, + )) +} + +pub fn token_bang_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("!=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::BangEqual, + }, + )) +} + +pub fn token_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Equal, + }, + )) +} + +pub fn token_equal_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("==")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::EqualEqual, + }, + )) +} + +pub fn token_greater(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(">")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Greater, + }, + )) +} + +pub fn token_greater_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag(">=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::GreaterEqual, + }, + )) +} + +pub fn token_less(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("<")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Less, + }, + )) +} + +pub fn token_less_equal(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("<=")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::LessEqual, + }, + )) +} + +pub fn token_double_pipe(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("||")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::DoublePipe, + }, + )) +} + +pub fn token_double_ampersand(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("&&")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::DoubleAmpersand, + }, + )) +} + +pub fn token_modulo(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("%")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Modulo, + }, + )) +} + +pub fn token_caret(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, _) = tag("^")(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Caret, + }, + )) +} + +#[cfg(test)] +mod operator_checks { + use super::*; + use crate::parser::atoms::Span; + + fn s(st: &str) -> Span { + Span::new(st) + } + + #[test] + fn parse_brackets() { + assert_eq!( + token_left_brace(s("{")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::LeftBrace + ); + + assert_eq!( + token_right_brace(s("}")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::RightBrace + ); + + assert_eq!( + token_left_paren(s("(")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::LeftParen + ); + + assert_eq!( + token_right_paren(s(")")) + .expect("Failed to parse") + .1 + .token_type, + ScriptTokenType::RightParen + ); + } +} diff --git a/src/parser/primitives.rs b/src/parser/primitives.rs new file mode 100644 index 0000000000000000000000000000000000000000..068644aefc9b6307f4a862e7781d3bc017628dfa --- /dev/null +++ b/src/parser/primitives.rs @@ -0,0 +1,155 @@ +use crate::parser::atoms::{raw_decimal, raw_false, raw_true, Span}; +use crate::parser::tokens::{ScriptToken, ScriptTokenType}; +use nom::branch::alt; +use nom::bytes::complete::{escaped, is_not, tag}; +use nom::character::complete::{alpha1, alphanumeric1, char, one_of}; +use nom::combinator::{opt, recognize}; +use nom::multi::many0_count; +use nom::sequence::{delimited, pair, separated_pair}; +use nom::{error_position, IResult}; +use nom_locate::position; + +pub fn token_ident(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, value) = recognize(pair( + alt((alpha1, tag("_"))), + many0_count(alt((alphanumeric1, tag("_")))), + ))(input)?; + + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Identifier(value.fragment()), + }, + )) +} + +pub fn token_int(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, sign) = opt(one_of("+-"))(input)?; + let (input, value) = raw_decimal(input)?; + + format!("{}{}", sign.map(String::from).unwrap_or_default(), value) + .parse::<i64>() + .map(|value| { + ( + input, + ScriptToken { + token_type: ScriptTokenType::Integer(value), + position: pos, + }, + ) + }) + .map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit))) +} + +pub fn token_float(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, sign) = opt(one_of("+-"))(input)?; + let (input, (before, after)) = + separated_pair(opt(raw_decimal), tag("."), opt(raw_decimal))(input)?; + + let formatted_number = format!( + "{}{}.{}", + sign.map(String::from).unwrap_or_default(), + before + .map(|s| s.fragment().to_owned()) + .unwrap_or_else(|| String::from("0")), + after + .map(|s| s.fragment().to_owned()) + .unwrap_or_else(|| String::from("0")) + ); + + formatted_number + .parse::<f64>() + .map(|value| { + ( + input, + ScriptToken { + token_type: ScriptTokenType::Float(value), + position: pos, + }, + ) + }) + .map_err(|_| nom::Err::Failure(error_position!(pos, nom::error::ErrorKind::Digit))) +} + +pub fn token_boolean(input: Span) -> IResult<Span, ScriptToken> { + let (input, pos) = position(input)?; + let (input, value) = alt((raw_true, raw_false))(input)?; + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::Boolean(value), + }, + )) +} + +#[cfg(test)] +mod parsing_tests { + use super::*; + use crate::map_result; + + #[test] + fn parse_integer() { + let positive_cases = [ + ("1234", 1234), + ("-1234", -1234), + ("0", 0), + ("1_000_000", 1000000), + ("-12_34", -1234), + ]; + + for (program, expected) in positive_cases { + map_result!(token_int(Span::new(program)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Integer(expected)) + }); + } + } + #[test] + fn parse_floats() { + let positive_cases = [ + ("12.34", 12.34), + ("-12.34", -12.34), + ("0.", 0.), + (".0", 0.), + (".0.", 0.), + (".0.1.2", 0.), + ("1_000_000.1_23", 1000000.123), + ("-12_34.0_0_0", -1234.0), + ]; + + for (program, expected) in positive_cases { + map_result!(token_float(Span::new(program)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Float(expected)) + }); + } + } + + #[test] + fn parse_bools() { + let positive_cases = [("true", true), ("false", false)]; + + for (program, expected) in positive_cases { + map_result!( + token_boolean(Span::new(program)), + |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Boolean(expected)) + } + ); + } + } + + #[test] + fn parse_identifier() { + let positive_cases = ["BarBaz", "Foo", "foo", "foasd123", "_adad"]; + + for expected in positive_cases { + map_result!(token_ident(Span::new(expected)), |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::Identifier(expected)) + }); + } + } +} diff --git a/src/parser/strings.rs b/src/parser/strings.rs new file mode 100644 index 0000000000000000000000000000000000000000..a83756cfce61e2bd064ff67719e72b6d45ce95b3 --- /dev/null +++ b/src/parser/strings.rs @@ -0,0 +1,193 @@ +// parser combinators are constructed from the bottom up: +// first we write parsers for the smallest elements (escaped characters), +// then combine them into larger parsers. + +use crate::parser::atoms::Span; +use crate::parser::{ScriptToken, ScriptTokenType}; +use nom::branch::alt; +use nom::bytes::complete::{is_not, take_while_m_n}; +use nom::character::complete::{char as p_char, multispace1, one_of}; +use nom::combinator::{map, map_opt, map_res, value, verify}; +use nom::multi::fold_many1; +use nom::sequence::{delimited, preceded}; +use nom::IResult; +use nom_locate::position; + +/// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6 +/// hexadecimal numerals. We will combine this later with parse_escaped_char +/// to parse sequences like \u{00AC}. +fn parse_unicode(input: Span) -> IResult<Span, char> { + // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match + // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. + let parse_hex = take_while_m_n(1, 6, |ch: char| ch.is_ascii_hexdigit()); + + // `preceded` takes a prefix parser, and if it succeeds, returns the result + // of the body parser. In this case, it parses u{XXXX}. + let parse_delimited_hex = preceded( + p_char::<Span, nom::error::Error<Span>>('u'), + // `delimited` is like `preceded`, but it parses both a prefix and a suffix. + // It returns the result of the middle parser. In this case, it parses + // {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX + delimited(p_char('{'), parse_hex, p_char('}')), + ); + + // `map_res` takes the result of a parser and applies a function that returns + // a Result. In this case we take the hex bytes from parse_hex and attempt to + // convert them to a u32. + let parse_u32 = map_res(parse_delimited_hex, move |hex| { + u32::from_str_radix(hex.fragment(), 16) + }); + + // map_opt is like map_res, but it takes an Option instead of a Result. If + // the function returns None, map_opt returns an error. In this case, because + // not all u32 values are valid unicode code points, we have to fallibly + // convert to p_char with from_u32. + let (span, char) = map_opt(parse_u32, move |val| char::from_u32(val))(input)?; + Ok((span, char)) +} + +/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. +fn parse_escaped_char(input: Span) -> IResult<Span, char> { + preceded( + p_char('\\'), + // `alt` tries each parser in sequence, returning the result of + // the first successful match + alt(( + parse_unicode, + // The `value` parser returns a fixed value (the first argument) if its + // parser (the second argument) succeeds. In these cases, it looks for + // the marker characters (n, r, t, etc) and returns the matching + // character (\n, \r, \t, etc). + value('\n', p_char('n')), + value('\r', p_char('r')), + value('\t', p_char('t')), + value('\u{08}', p_char('b')), + value('\u{0C}', p_char('f')), + value('\\', p_char('\\')), + value('/', p_char('/')), + value('"', p_char('"')), + )), + )(input) +} + +/// Parse a backslash, followed by any amount of whitespace. This is used later +/// to discard any escaped whitespace. +fn parse_escaped_whitespace(input: Span) -> IResult<Span, Span> { + preceded(p_char('\\'), multispace1)(input) +} + +/// Parse a non-empty block of text that doesn't include \ or " +fn parse_literal(input: Span) -> IResult<Span, Span> { + // `is_not` parses a string of 0 or more characters that aren't one of the + // given characters. + let not_quote_slash = is_not("\"\\"); + + // `verify` runs a parser, then runs a verification function on the output of + // the parser. The verification function accepts out output only if it + // returns true. In this case, we want to ensure that the output of is_not + // is non-empty. + verify(not_quote_slash, |s: &Span| !s.fragment().is_empty())(input) +} + +/// A string fragment contains a fragment of a string being parsed: either +/// a non-empty Literal (a series of non-escaped characters), a single +/// parsed escaped character, or a block of escaped whitespace. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum StringFragment<'a> { + Literal(&'a str), + EscapedChar(char), + EscapedWS, +} + +/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char +/// into a StringFragment. +fn parse_fragment(input: Span) -> IResult<Span, StringFragment> { + alt(( + map(parse_literal, |sp: Span| { + StringFragment::Literal(sp.fragment()) + }), + map(parse_escaped_char, StringFragment::EscapedChar), + value(StringFragment::EscapedWS, parse_escaped_whitespace), + ))(input) +} + +/// Parse a string. Use a loop of parse_fragment and push all of the fragments +/// into an output string. +pub fn token_string(input: Span) -> IResult<Span, ScriptToken> { + // fold is the equivalent of iterator::fold. It runs a parser in a loop, + // and for each output value, calls a folding function on each output value. + let build_string = fold_many1( + // Our parser function– parses a single string fragment + parse_fragment, + // Our init value, an empty string + String::new, + // Our folding function. For each fragment, append the fragment to the + // string. + |mut string, fragment| { + match fragment { + StringFragment::Literal(s) => string.push_str(s), + StringFragment::EscapedChar(c) => string.push(c), + StringFragment::EscapedWS => {} + } + string + }, + ); + + let (input, pos) = position(input)?; + let (input, value) = delimited(p_char('"'), build_string, p_char('"'))(input)?; + + Ok(( + input, + ScriptToken { + position: pos, + token_type: ScriptTokenType::OwnedString(value), + }, + )) +} + +#[cfg(test)] +mod string_test { + use super::token_string; + use crate::map_result; + use crate::parser::atoms::Span; + use crate::parser::{ScriptToken, ScriptTokenType}; + + #[test] + fn parse_escaped_string() { + let positive_cases = [ + ( + r#""This is an escaped String""#, + String::from("This is an escaped String"), + ), + ( + r#""This is an \"escaped\" String""#, + String::from("This is an \"escaped\" String"), + ), + ( + r#""Many whitespaces can be collapsed with a slash \ + and they won't matter""#, + String::from( + "Many whitespaces can be collapsed with a slash and they won't matter", + ), + ), + ( + r#""Big whitespace preserved can be collapsed with a slash + and they won't matter""#, + String::from( + r#"Big whitespace preserved can be collapsed with a slash + and they won't matter"#, + ), + ), + ]; + + for (program, expected) in positive_cases { + map_result!( + token_string(Span::new(program)), + |_, token: ScriptToken| { + assert_eq!(token.token_type, ScriptTokenType::OwnedString(expected)) + }, + program + ); + } + } +} diff --git a/src/parser/tokens.rs b/src/parser/tokens.rs new file mode 100644 index 0000000000000000000000000000000000000000..6b3499546db819fed60b4110b01bfd43bd3bcfb7 --- /dev/null +++ b/src/parser/tokens.rs @@ -0,0 +1,368 @@ +use crate::parser::atoms::Span; +use std::error::Error; +use std::fmt::{format, Debug, Display, Formatter}; + +#[derive(PartialEq, Clone, Debug)] +pub enum ScriptTokenType<'a> { + // Structural Tokens + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Semicolon, + + // Unary Operators + Bang, + Minus, + + // Binary Operators + Asterisk, + Slash, + Plus, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + DoublePipe, + DoubleAmpersand, + Modulo, + Caret, + + // Literals + Identifier(&'a str), + String(&'a str), + OwnedString(String), + Integer(i64), + Float(f64), + Boolean(bool), + + // Keywords + Class, + Else, + Function, + For, + If, + Null, + Print, + Return, + Super, + This, + Let, + While, + Export, + Import, + Alias, + From, + + // Misc + Eof, +} + +impl<'a> ScriptTokenType<'a> { + pub fn len(&self) -> usize { + match self { + ScriptTokenType::LeftParen => 1, + ScriptTokenType::RightParen => 1, + ScriptTokenType::LeftBrace => 2, + ScriptTokenType::RightBrace => 2, + ScriptTokenType::Comma => 1, + ScriptTokenType::Dot => 1, + ScriptTokenType::Minus => 1, + ScriptTokenType::Plus => 1, + ScriptTokenType::Semicolon => 1, + ScriptTokenType::Slash => 1, + ScriptTokenType::Asterisk => 1, + ScriptTokenType::Bang => 1, + ScriptTokenType::BangEqual => 2, + ScriptTokenType::Equal => 1, + ScriptTokenType::EqualEqual => 2, + ScriptTokenType::Greater => 1, + ScriptTokenType::GreaterEqual => 2, + ScriptTokenType::Less => 1, + ScriptTokenType::LessEqual => 2, + ScriptTokenType::DoublePipe => 2, + ScriptTokenType::DoubleAmpersand => 2, + ScriptTokenType::Modulo => 1, + ScriptTokenType::Caret => 1, + ScriptTokenType::Identifier(value) => value.len(), + ScriptTokenType::String(value) => value.len() + 2, + ScriptTokenType::OwnedString(value) => value.len() + 2, + ScriptTokenType::Integer(value) => format!("{}", value).len(), + ScriptTokenType::Float(value) => format!("{}", value).len(), + ScriptTokenType::Boolean(value) => { + if *value { + 4 + } else { + 5 + } + } + ScriptTokenType::Class => 6, + ScriptTokenType::Else => 4, + ScriptTokenType::Function => 2, + ScriptTokenType::For => 3, + ScriptTokenType::If => 2, + ScriptTokenType::Null => 4, + ScriptTokenType::Print => 5, + ScriptTokenType::Return => 6, + ScriptTokenType::Super => 5, + ScriptTokenType::This => 4, + ScriptTokenType::Let => 3, + ScriptTokenType::While => 5, + ScriptTokenType::Export => 6, + ScriptTokenType::Import => 6, + ScriptTokenType::Alias => 2, + ScriptTokenType::From => 4, + ScriptTokenType::Eof => 0, + } + } +} + +impl<'a> Display for ScriptTokenType<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ScriptTokenType::LeftParen => write!(f, "("), + ScriptTokenType::RightParen => write!(f, ")"), + ScriptTokenType::LeftBrace => write!(f, "{{"), + ScriptTokenType::RightBrace => write!(f, "}}"), + ScriptTokenType::Comma => write!(f, ","), + ScriptTokenType::Dot => write!(f, "."), + ScriptTokenType::Minus => write!(f, "-"), + ScriptTokenType::Plus => write!(f, "+"), + ScriptTokenType::Semicolon => write!(f, ";"), + ScriptTokenType::Slash => write!(f, "/"), + ScriptTokenType::Asterisk => write!(f, "*"), + ScriptTokenType::Bang => write!(f, "!"), + ScriptTokenType::BangEqual => write!(f, "!="), + ScriptTokenType::Equal => write!(f, "="), + ScriptTokenType::EqualEqual => write!(f, "=="), + ScriptTokenType::Greater => write!(f, ">"), + ScriptTokenType::GreaterEqual => write!(f, ">="), + ScriptTokenType::Less => write!(f, "<"), + ScriptTokenType::LessEqual => write!(f, "<="), + ScriptTokenType::DoublePipe => write!(f, "||"), + ScriptTokenType::DoubleAmpersand => write!(f, "&&"), + ScriptTokenType::Modulo => write!(f, "%"), + ScriptTokenType::Caret => write!(f, "^"), + ScriptTokenType::Identifier(value) => write!(f, "{}", value), + ScriptTokenType::String(value) => write!(f, "{}", value), + ScriptTokenType::OwnedString(value) => write!(f, "{}", value), + ScriptTokenType::Integer(value) => write!(f, "{}", value), + ScriptTokenType::Float(value) => write!(f, "{}", value), + ScriptTokenType::Boolean(value) => write!(f, "{}", value), + ScriptTokenType::Class => write!(f, "struct"), + ScriptTokenType::Else => write!(f, "else"), + ScriptTokenType::Function => write!(f, "fn"), + ScriptTokenType::For => write!(f, "for"), + ScriptTokenType::If => write!(f, "if"), + ScriptTokenType::Null => write!(f, "null"), + ScriptTokenType::Print => write!(f, "print"), + ScriptTokenType::Return => write!(f, "return"), + ScriptTokenType::Super => write!(f, "super"), + ScriptTokenType::This => write!(f, "this"), + ScriptTokenType::Let => write!(f, "let"), + ScriptTokenType::While => write!(f, "while"), + ScriptTokenType::Export => write!(f, "export"), + ScriptTokenType::Import => write!(f, "import"), + ScriptTokenType::Alias => write!(f, "as"), + ScriptTokenType::From => write!(f, "from"), + ScriptTokenType::Eof => write!(f, ""), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct TokenFromStringError<'a> { + source: &'a str, +} +impl<'a> Display for TokenFromStringError<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Failed to parse into token; value {}", self.source) + } +} +impl<'a> Error for TokenFromStringError<'a> {} + +impl<'a> TryFrom<&'a str> for ScriptTokenType<'a> { + type Error = TokenFromStringError<'a>; + + fn try_from(value: &'a str) -> Result<Self, Self::Error> { + match value { + "(" => Ok(ScriptTokenType::LeftParen), + ")" => Ok(ScriptTokenType::RightParen), + "{" => Ok(ScriptTokenType::LeftBrace), + "}" => Ok(ScriptTokenType::RightBrace), + "," => Ok(ScriptTokenType::Comma), + "." => Ok(ScriptTokenType::Dot), + "-" => Ok(ScriptTokenType::Minus), + "+" => Ok(ScriptTokenType::Plus), + ";" => Ok(ScriptTokenType::Semicolon), + "/" => Ok(ScriptTokenType::Slash), + "*" => Ok(ScriptTokenType::Asterisk), + "!" => Ok(ScriptTokenType::Bang), + "!=" => Ok(ScriptTokenType::BangEqual), + "=" => Ok(ScriptTokenType::Equal), + "==" => Ok(ScriptTokenType::EqualEqual), + ">" => Ok(ScriptTokenType::Greater), + ">=" => Ok(ScriptTokenType::GreaterEqual), + "<" => Ok(ScriptTokenType::Less), + "<=" => Ok(ScriptTokenType::LessEqual), + "||" => Ok(ScriptTokenType::DoublePipe), + "&&" => Ok(ScriptTokenType::DoubleAmpersand), + "%" => Ok(ScriptTokenType::Modulo), + "^" => Ok(ScriptTokenType::Caret), + "struct" => Ok(ScriptTokenType::Class), + "else" => Ok(ScriptTokenType::Else), + "fn" => Ok(ScriptTokenType::Function), + "for" => Ok(ScriptTokenType::For), + "if" => Ok(ScriptTokenType::If), + "null" => Ok(ScriptTokenType::Null), + "print" => Ok(ScriptTokenType::Print), + "return" => Ok(ScriptTokenType::Return), + "super" => Ok(ScriptTokenType::Super), + "this" => Ok(ScriptTokenType::This), + "let" => Ok(ScriptTokenType::Let), + "while" => Ok(ScriptTokenType::While), + "export" => Ok(ScriptTokenType::Export), + "import" => Ok(ScriptTokenType::Import), + "as" => Ok(ScriptTokenType::Alias), + "from" => Ok(ScriptTokenType::From), + _ => Err(TokenFromStringError { source: value }), + } + } +} + +#[derive(Clone)] +pub struct ScriptToken<'a> { + pub position: Span<'a>, + pub token_type: ScriptTokenType<'a>, +} + +impl<'a> Display for ScriptToken<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.token_type) + } +} + +impl<'a> Debug for ScriptToken<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "[{}:{}] {:?}", + self.position.location_line(), + self.position.get_column(), + self.token_type + ) + } +} + +#[cfg(test)] +mod token_tests { + use crate::parser::ScriptTokenType; + + #[test] + fn match_type_from_string() { + assert_eq!( + ScriptTokenType::try_from("("), + Ok(ScriptTokenType::LeftParen) + ); + assert_eq!( + ScriptTokenType::try_from(")"), + Ok(ScriptTokenType::RightParen) + ); + assert_eq!( + ScriptTokenType::try_from("{"), + Ok(ScriptTokenType::LeftBrace) + ); + assert_eq!( + ScriptTokenType::try_from("}"), + Ok(ScriptTokenType::RightBrace) + ); + assert_eq!(ScriptTokenType::try_from(","), Ok(ScriptTokenType::Comma)); + assert_eq!(ScriptTokenType::try_from("."), Ok(ScriptTokenType::Dot)); + assert_eq!(ScriptTokenType::try_from("-"), Ok(ScriptTokenType::Minus)); + assert_eq!(ScriptTokenType::try_from("+"), Ok(ScriptTokenType::Plus)); + assert_eq!( + ScriptTokenType::try_from(";"), + Ok(ScriptTokenType::Semicolon) + ); + assert_eq!(ScriptTokenType::try_from("/"), Ok(ScriptTokenType::Slash)); + assert_eq!( + ScriptTokenType::try_from("*"), + Ok(ScriptTokenType::Asterisk) + ); + assert_eq!(ScriptTokenType::try_from("!"), Ok(ScriptTokenType::Bang)); + assert_eq!( + ScriptTokenType::try_from("!="), + Ok(ScriptTokenType::BangEqual) + ); + assert_eq!(ScriptTokenType::try_from("="), Ok(ScriptTokenType::Equal)); + assert_eq!( + ScriptTokenType::try_from("=="), + Ok(ScriptTokenType::EqualEqual) + ); + assert_eq!(ScriptTokenType::try_from(">"), Ok(ScriptTokenType::Greater)); + assert_eq!( + ScriptTokenType::try_from(">="), + Ok(ScriptTokenType::GreaterEqual) + ); + assert_eq!(ScriptTokenType::try_from("<"), Ok(ScriptTokenType::Less)); + assert_eq!( + ScriptTokenType::try_from("<="), + Ok(ScriptTokenType::LessEqual) + ); + assert_eq!( + ScriptTokenType::try_from("||"), + Ok(ScriptTokenType::DoublePipe) + ); + assert_eq!( + ScriptTokenType::try_from("&&"), + Ok(ScriptTokenType::DoubleAmpersand) + ); + assert_eq!(ScriptTokenType::try_from("%"), Ok(ScriptTokenType::Modulo)); + assert_eq!(ScriptTokenType::try_from("^"), Ok(ScriptTokenType::Caret)); + assert_eq!( + ScriptTokenType::try_from("struct"), + Ok(ScriptTokenType::Class) + ); + assert_eq!(ScriptTokenType::try_from("else"), Ok(ScriptTokenType::Else)); + assert_eq!( + ScriptTokenType::try_from("fn"), + Ok(ScriptTokenType::Function) + ); + assert_eq!(ScriptTokenType::try_from("for"), Ok(ScriptTokenType::For)); + assert_eq!(ScriptTokenType::try_from("if"), Ok(ScriptTokenType::If)); + assert_eq!(ScriptTokenType::try_from("null"), Ok(ScriptTokenType::Null)); + assert_eq!( + ScriptTokenType::try_from("print"), + Ok(ScriptTokenType::Print) + ); + assert_eq!( + ScriptTokenType::try_from("return"), + Ok(ScriptTokenType::Return) + ); + assert_eq!( + ScriptTokenType::try_from("super"), + Ok(ScriptTokenType::Super) + ); + assert_eq!(ScriptTokenType::try_from("this"), Ok(ScriptTokenType::This)); + assert_eq!(ScriptTokenType::try_from("let"), Ok(ScriptTokenType::Let)); + assert_eq!( + ScriptTokenType::try_from("while"), + Ok(ScriptTokenType::While) + ); + assert_eq!( + ScriptTokenType::try_from("export"), + Ok(ScriptTokenType::Export) + ); + assert_eq!( + ScriptTokenType::try_from("import"), + Ok(ScriptTokenType::Import) + ); + assert_eq!(ScriptTokenType::try_from("as"), Ok(ScriptTokenType::Alias)); + assert_eq!(ScriptTokenType::try_from("from"), Ok(ScriptTokenType::From)); + } +} diff --git a/src/runtime/maths.rs b/src/runtime/maths.rs new file mode 100644 index 0000000000000000000000000000000000000000..2088f72d5f16875caf235487fd55a68973919ce8 --- /dev/null +++ b/src/runtime/maths.rs @@ -0,0 +1,212 @@ +use std::cmp::Ordering; +use std::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; + +/// Represents a numerical value in a script +#[derive(Copy, Clone, Debug)] +pub enum Number { + Integer(i64), + Float(f64), +} + +impl Number { + /// Create a new Number representing the provided integer + #[inline] + pub const fn integer(value: i64) -> Self { + Self::Integer(value) + } + /// Create a new Number representing the provided float + #[inline] + pub const fn float(value: f64) -> Self { + Self::Float(value) + } + pub const fn is_float(&self) -> bool { + matches!(self, Number::Float(..)) + } + pub const fn is_integer(&self) -> bool { + matches!(self, Number::Integer(..)) + } + /// Create a copy of the value represented as an Integer internally. + /// Will lose information if the value is a float and has a non-zero + /// mantissa + pub fn as_integer(&self) -> Self { + match self { + &Self::Float(value) => Self::Integer(value as i64), + &Self::Integer(value) => Self::Integer(value), + } + } + /// Create a copy of the value represented as a Float internally. + /// Will not lose information in the conversion + pub fn as_float(&self) -> Self { + match self { + &Self::Float(value) => Self::Float(value), + &Self::Integer(value) => Self::Float(value as f64), + } + } + + /// Unwrap this number into a native rust value, represented as + /// an integer + /// Will lose information if the value is a float and has a non-zero + /// mantissa + pub fn as_i64(&self) -> i64 { + match self { + Self::Integer(val) => *val, + Self::Float(val) => (*val) as i64, + } + } + /// Unwrap this number into a native rust value, represented as + /// a float + /// Will not lose information in the conversion + pub fn as_f64(&self) -> f64 { + match self { + Self::Integer(val) => (*val) as f64, + Self::Float(val) => *val, + } + } + + /// Check to see if both value _and_ type matches between this and another + /// Number. + /// An escape hatch for situations where it is critical that values are (or + /// aren't) the same internal type. + pub fn matches(&self, other: Number) -> bool { + use Number::*; + match (*self, other) { + (Integer(first), Integer(second)) => first == second, + (Float(first), Float(second)) => first == second, + _ => false, + } + } +} + +impl PartialEq for Number { + fn eq(&self, other: &Self) -> bool { + use Number::*; + match (self, other) { + (Integer(first), Integer(second)) => first == second, + (Float(first), Float(second)) => first == second, + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => (*int as f64) == *fl, + } + } +} + +impl PartialOrd for Number { + fn partial_cmp(&self, other: &Self) -> Option<Ordering> { + use Number::*; + match (self, other) { + (Integer(first), Integer(second)) => first.partial_cmp(second), + (Float(first), Float(second)) => first.partial_cmp(second), + (Float(first), Integer(second)) => first.partial_cmp(&(*second as f64)), + (Integer(first), Float(second)) => (*first as f64).partial_cmp(second), + } + } +} + +impl Add for Number { + type Output = Number; + fn add(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first.saturating_add(second)), + (Float(first), Float(second)) => Float(first + second), + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => Float(fl + int as f64), + } + } +} +impl AddAssign for Number { + #[inline] + fn add_assign(&mut self, rhs: Self) { + *self = *self + rhs; + } +} +impl Sub for Number { + type Output = Number; + fn sub(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first.saturating_sub(second)), + (Float(first), Float(second)) => Float(first - second), + (Float(first), Integer(second)) => Float(first - second as f64), + (Integer(first), Float(second)) => Float(first as f64 - second), + } + } +} +impl SubAssign for Number { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + *self = *self - rhs; + } +} +impl Mul for Number { + type Output = Number; + + fn mul(self, rhs: Self) -> Self::Output { + use Number::*; + match (self, rhs) { + (Integer(first), Integer(second)) => Integer(first * second), + (Float(first), Float(second)) => Float(first * second), + (Float(fl), Integer(int)) | (Integer(int), Float(fl)) => Float(fl * int as f64), + } + } +} +impl MulAssign for Number { + #[inline] + fn mul_assign(&mut self, rhs: Self) { + *self = *self * rhs + } +} +impl Div for Number { + type Output = Number; + fn div(self, rhs: Self) -> Self::Output { + Number::float(self.as_f64() / rhs.as_f64()) + } +} +impl DivAssign for Number { + #[inline] + fn div_assign(&mut self, rhs: Self) { + *self = *self / rhs; + } +} + +#[cfg(test)] +mod gcse_maths { + use crate::runtime::maths::Number; + + #[test] + fn eq_ignores_internal() { + assert_eq!(Number::integer(123), Number::float(123.0)); + assert_eq!(Number::integer(123), Number::integer(123)); + assert_eq!(Number::float(123.0), Number::float(123.0)); + } + + #[test] + fn addition() { + let first = Number::integer(123); + let second = Number::integer(456); + let third = Number::float(789.0); + let fourth = Number::float(123.4); + + assert_eq!(first + second, Number::integer(579)); + assert_eq!(third + fourth, Number::float(912.4)); + } + + #[test] + fn type_coercion() { + let first = Number::integer(150); + let second = Number::float(5.0); + + assert!(first.is_integer()); + assert!(second.is_float()); + + assert!((first + second).is_float(), "Addition did not create float"); + assert!( + (first - second).is_float(), + "Subtraction did not create float" + ); + assert!((first * second).is_float(), "Multiply did not create float"); + assert!((first / second).is_float(), "Divide did not create float"); + + assert!( + (first / first).is_float(), + "Divide should always create float" + ); + } +} diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs new file mode 100644 index 0000000000000000000000000000000000000000..ffb2b0426614bcf69cd51c0d0c25909fbb25230b --- /dev/null +++ b/src/runtime/mod.rs @@ -0,0 +1,3 @@ +mod maths; + +pub use maths::Number; diff --git a/test_script.mscr b/test_script.mscr new file mode 100644 index 0000000000000000000000000000000000000000..f5b05785290d57f60f30d1643067c9c52756f246 --- /dev/null +++ b/test_script.mscr @@ -0,0 +1,12 @@ +import { some_export } from 'mod:foo' + +let my_var = 123; +fn betwixt() { + if my_var == null { + print "My var is null"; + } +} + +betwixt() + +export { bar, baz } \ No newline at end of file