diff --git a/forge-script-lang/src/error/mod.rs b/forge-script-lang/src/error/mod.rs index f74116195a3c6555dc114ffe88fab73b799e9496..9d9ccf425097e18c745ea86baebb95b27ed463a7 100644 --- a/forge-script-lang/src/error/mod.rs +++ b/forge-script-lang/src/error/mod.rs @@ -223,7 +223,7 @@ pub fn format_forge_error<'a>(source: &'a str, fe: &'a ForgeError) -> String { highlight_len: found.token_type.len(), }, Some(format!( - "| Found {}, expected one of {}", + "Found {}, expected one of {}", found.token_type, expected .tokens() @@ -273,7 +273,7 @@ pub fn format_forge_error<'a>(source: &'a str, fe: &'a ForgeError) -> String { highlight_len: token.len(), }, Some(format!( - "| Found {}, expected one of {}", + "Found {}, expected one of {}", token, expected.as_slice().join(", ") )) @@ -292,7 +292,7 @@ pub fn format_forge_error<'a>(source: &'a str, fe: &'a ForgeError) -> String { column, highlight_len: token.len(), }, - Some(format!("| Expected EOF, found {}", token)) + Some(format!("Expected EOF, found {}", token)) ), ) } diff --git a/forge-script-lang/src/lexer/_span.rs b/forge-script-lang/src/lexer/_span.rs new file mode 100644 index 0000000000000000000000000000000000000000..c7c08b2e21e9a75efc5163a4992978402cd6da0b --- /dev/null +++ b/forge-script-lang/src/lexer/_span.rs @@ -0,0 +1,332 @@ +use nom::error::{ErrorKind, ParseError}; +use nom::{ + AsBytes, Compare, CompareResult, ExtendInto, FindSubstring, FindToken, IResult, InputIter, + InputLength, InputTake, InputTakeAtPosition, Needed, Offset, ParseTo, Slice, +}; +use std::ops::{Range, RangeFrom, RangeFull, RangeTo}; +use std::str::{CharIndices, Chars, FromStr}; + +#[derive(Copy, Clone, Debug)] +pub struct TextSpan<'a> { + value: &'a str, + offset: usize, + length: usize, +} + +impl<'a> PartialEq for TextSpan<'a> { + fn eq(&self, other: &Self) -> bool { + self.as_ref().eq(other.as_ref()) + } +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct TokenPosition { + pub offset: usize, + pub length: usize, + pub line: usize, + pub column: usize, +} + +impl<'a> TextSpan<'a> { + pub fn new(inp: &'a str) -> Self { + Self { + value: inp, + offset: 0, + length: inp.len(), + } + } + + pub fn offset(&self) -> usize { + self.offset + } + + pub fn len(&self) -> usize { + self.length + } + + pub fn calculate_position(&self) -> TokenPosition { + let mut current_line = 0; + let mut total_chars = 0; + + for line in self.value.lines() { + current_line += 1; + + if (total_chars + line.len()) >= self.offset { + return TokenPosition { + line: current_line, + offset: self.offset, + length: self.length, + column: self.offset - total_chars, + }; + } + + total_chars += line.len(); + } + + TokenPosition { + line: current_line, + offset: self.offset, + length: self.length, + column: self.value.lines().last().map(|li| li.len()).unwrap_or(0), + } + } + + pub fn advance_self_by(&mut self, amount: usize) { + self.offset += amount; + } + + pub fn advance_by(&self, amount: usize) -> Self { + Self { + offset: self.offset + amount, + ..*self + } + } + + pub fn into_value(self) -> &'a str { + &self.value[self.offset..(self.offset + self.length)] + } +} + +impl<'a> AsRef<str> for TextSpan<'a> { + fn as_ref(&self) -> &'a str { + self.value + .slice(self.offset..self.value.len().min(self.offset + self.length)) + } +} + +impl<'a> AsBytes for TextSpan<'a> { + fn as_bytes(&self) -> &[u8] { + self.as_ref().as_bytes() + } +} + +impl<'a> Compare<&'a str> for TextSpan<'a> { + fn compare(&self, t: &'a str) -> CompareResult { + self.as_ref().compare(t) + } + + fn compare_no_case(&self, t: &'a str) -> CompareResult { + self.as_ref().compare_no_case(t) + } +} + +impl<'a> ExtendInto for TextSpan<'a> { + type Item = char; + type Extender = String; + + fn new_builder(&self) -> Self::Extender { + self.as_ref().new_builder() + } + + fn extend_into(&self, acc: &mut Self::Extender) { + self.as_ref().extend_into(acc) + } +} + +impl<'a> FindSubstring<&'a str> for TextSpan<'a> { + fn find_substring(&self, substr: &'a str) -> Option<usize> { + self.as_ref().find_substring(substr) + } +} + +impl<'a> FindToken<&'a u8> for TextSpan<'a> { + fn find_token(&self, token: &'a u8) -> bool { + self.as_ref().find_token(token) + } +} + +impl<'a> InputIter for TextSpan<'a> { + type Item = char; + type Iter = CharIndices<'a>; + type IterElem = Chars<'a>; + + fn iter_indices(&self) -> Self::Iter { + self.into_value().iter_indices() + } + + fn iter_elements(&self) -> Self::IterElem { + self.into_value().iter_elements() + } + + fn position<P>(&self, predicate: P) -> Option<usize> + where + P: Fn(Self::Item) -> bool, + { + self.as_ref().position(predicate) + } + + fn slice_index(&self, count: usize) -> Result<usize, Needed> { + self.as_ref().slice_index(count) + } +} + +impl<'a> InputLength for TextSpan<'a> { + fn input_len(&self) -> usize { + self.as_ref().input_len() + } +} + +impl<'a> InputTake for TextSpan<'a> { + fn take(&self, count: usize) -> Self { + Self { + offset: self.offset, + value: self.value, + length: count, + } + } + + fn take_split(&self, count: usize) -> (Self, Self) { + ( + Self { + value: self.value, + offset: self.offset, + length: count, + }, + Self { + value: self.value, + offset: self.offset.saturating_add(count), + length: self.length.saturating_sub(count), + }, + ) + } +} + +impl<'a> InputTakeAtPosition for TextSpan<'a> { + type Item = char; + + fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.as_ref().find(predicate) { + Some(idx) => { + let (found, remainder) = self.take_split(idx); + Ok((remainder, found)) + } + None => Err(nom::Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position1<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.as_ref().find(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + Some(idx) => { + let (found, remainder) = self.take_split(idx); + Ok((remainder, found)) + } + None => Err(nom::Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.as_ref().find(predicate) { + Some(idx) => { + let (found, remainder) = self.take_split(idx); + Ok((remainder, found)) + } + None => Ok(( + Self { + value: self.value, + offset: self.value.len(), + length: 0, + }, + *self, + )), + } + } + + fn split_at_position1_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.as_ref().find(predicate) { + Some(0) => Err(nom::Err::Error(E::from_error_kind(*self, e))), + Some(idx) => { + let (found, remainder) = self.take_split(idx); + Ok((remainder, found)) + } + None => { + if self.length == 0 { + Err(nom::Err::Error(E::from_error_kind(*self, e))) + } else { + Ok(( + Self { + value: self.value, + offset: self.value.len(), + length: 0, + }, + *self, + )) + } + } + } + } +} + +impl<'a> Offset for TextSpan<'a> { + fn offset(&self, second: &Self) -> usize { + self.offset.saturating_sub(second.offset) + } +} + +impl<'a, R: FromStr> ParseTo<R> for TextSpan<'a> { + fn parse_to(&self) -> Option<R> { + self.as_ref().parse().ok() + } +} + +impl<'a> nom::Slice<Range<usize>> for TextSpan<'a> { + fn slice(&self, range: Range<usize>) -> Self { + let start = range.start; + let length = range.end - range.start; + Self { + value: self.value, + offset: self.offset + start, + length, + } + } +} +impl<'a> nom::Slice<RangeTo<usize>> for TextSpan<'a> { + fn slice(&self, range: RangeTo<usize>) -> Self { + Self { + value: self.value, + offset: self.offset, + length: range.end, + } + } +} +impl<'a> nom::Slice<RangeFrom<usize>> for TextSpan<'a> { + fn slice(&self, range: RangeFrom<usize>) -> Self { + let start = range.start; + let offset = self.offset + start; + let length = self.value.len() - offset; + + Self { + value: self.value, + offset, + length, + } + } +} +impl<'a> nom::Slice<RangeFull> for TextSpan<'a> { + fn slice(&self, range: RangeFull) -> Self { + *self + } +} diff --git a/forge-script-lang/src/lexer/atoms.rs b/forge-script-lang/src/lexer/atoms.rs index 0dd69b6df71e0038d145e463477a79b1a22275ab..dff7c21ca136367ab0af2ca13b4739fc8b7e6b17 100644 --- a/forge-script-lang/src/lexer/atoms.rs +++ b/forge-script-lang/src/lexer/atoms.rs @@ -1,10 +1,9 @@ use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0, multispace1, one_of}; use nom::combinator::value; -use nom::error::ParseError; use nom::multi::{many0, many1}; use nom::sequence::{delimited, terminated}; -use nom::{Compare, IResult, InputLength, InputTake}; +use nom::IResult; use nom_locate::LocatedSpan; pub type Span<'a> = LocatedSpan<&'a str>; diff --git a/forge-script-lang/src/lexer/mod.rs b/forge-script-lang/src/lexer/mod.rs index f0b09374e008c12a59897ed41510b8caa759e957..a92dd4422733f053c5765fa7fa01fa9be29c742f 100644 --- a/forge-script-lang/src/lexer/mod.rs +++ b/forge-script-lang/src/lexer/mod.rs @@ -1,3 +1,4 @@ +mod _span; mod atoms; mod keywords; mod operators; diff --git a/forge-script-lang/src/parser/forge_grammar.lalrpop b/forge-script-lang/src/parser/forge_grammar.lalrpop index 1066ca290abb411cfc9b5b48aa497f86a6dab1dd..7785f73ffdff9b69a4e3a288615f19601e9079f4 100644 --- a/forge-script-lang/src/parser/forge_grammar.lalrpop +++ b/forge-script-lang/src/parser/forge_grammar.lalrpop @@ -13,21 +13,19 @@ Block: Option<ExpressionList> = { }; ExpressionList: ExpressionList = { - <mut v:(<Expression> ";")+> <e:Expression?> => { - match e { - Some(val) => { - v.push(val); - ExpressionList { - expressions: v, - is_void: false, - } - }, - None => { - ExpressionList { - expressions: v, - is_void: true, - } - }, + <e:Expression> <b:";"?> => { + ExpressionList { + expressions: vec![e], + is_void: b.is_some(), + } + }, + <e:Expression> ";" <mut ls:ExpressionList> => { + let mut new = vec![e]; + new.append(&mut ls.expressions); + + ExpressionList { + expressions: new, + is_void: ls.is_void, } } }; @@ -38,67 +36,98 @@ pub Expression: Expression = { }; VoidExpression: VoidExpression = { - "print" <expr:ValueExpression> => Print { expr: Box::new(expr) }.into(), - "import" "{" <items:IdentifierList> "}" "from" <source:StringValue> => Import { source, items }.into(), - "export" "{" <items:IdentifierList> "}" => Export { items }.into(), + AsVoid<PrintStmt> => <>, + AsVoid<ImportStmt> => <>, + AsVoid<ExportStmt> => <>, }; ValueExpression: ValueExpression = { - #[precedence(level="1")] - Literal => ValueExpression::Literal(<>), - #[precedence(level="1")] - "(" <expr:ValueExpression> ")" => GroupedExpression { inner: Box::new(expr) }.into(), - - #[precedence(level="2")] #[assoc(side="right")] + AsValue<Literal> => <>, + AsValue<Identifier> => <>, + AsValue<IfStatement> => <>, + "(" <lhs:ValueExpression> "+" <rhs:ValueExpression> ")" => ValueExpression::Binary { lhs: Box::new(lhs), rhs: Box::new(rhs), operator: BinaryOp::Add }, "typeof" <expr:ValueExpression> => TypeofValue(Box::new(expr)).into(), - #[precedence(level="2")] #[assoc(side="right")] "-" <expr:ValueExpression> => ValueExpression::Unary { operand: Box::new(expr), operator: UnaryOp::Negate }, - #[precedence(level="2")] #[assoc(side="right")] "!" <expr:ValueExpression> => ValueExpression::Unary { operand: Box::new(expr), operator: UnaryOp::Not }, - - #[precedence(level = "3")] - IfStatement => <>.into(), - - #[precedence(level="4")] #[assoc(side="left")] - <lhs:ValueExpression> "*" <rhs:ValueExpression> => { - ValueExpression::Binary { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - operator: BinaryOp::Multiply, - } - }, - #[precedence(level="4")] #[assoc(side="left")] - <lhs:ValueExpression> "/" <rhs:ValueExpression> => { - ValueExpression::Binary { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - operator: BinaryOp::Divide, - } - }, - #[precedence(level="6")] #[assoc(side="left")] - <lhs:ValueExpression> "+" <rhs:ValueExpression> => { - ValueExpression::Binary { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - operator: BinaryOp::Add, - } - }, - #[precedence(level="6")] #[assoc(side="left")] - <lhs:ValueExpression> "-" <rhs:ValueExpression> => { - ValueExpression::Binary { - lhs: Box::new(lhs), - rhs: Box::new(rhs), - operator: BinaryOp::Subtract, - } - }, - - #[precedence(level="1")] - Identifier => <>.into() -}; + "(" <expr:ValueExpression> ")" => GroupedExpression { inner: Box::new(expr) }.into(), +} + +//ValueExpression: ValueExpression = { +// #[precedence(level="100")] +// "(" <expr:ValueExpression> ")" => GroupedExpression { inner: Box::new(expr) }.into(), +// #[precedence(level="0")] +// Literal => ValueExpression::Literal(<>), +// #[precedence(level="0")] +// Identifier => <>.into(), +// +// #[precedence(level="2")] +// "typeof" <expr:ValueExpression> => TypeofValue(Box::new(expr)).into(), +// #[precedence(level="2")] +// "-" <expr:ValueExpression> => ValueExpression::Unary { operand: Box::new(expr), operator: UnaryOp::Negate }, +// #[precedence(level="2")] +// "!" <expr:ValueExpression> => ValueExpression::Unary { operand: Box::new(expr), operator: UnaryOp::Not }, +// +// #[precedence(level = "3")] +// IfStatement => <>.into(), +// +// #[precedence(level="4")] #[assoc(side="left")] +// <lhs:ValueExpression> "*" <rhs:ValueExpression> => { +// ValueExpression::Binary { +// lhs: Box::new(lhs), +// rhs: Box::new(rhs), +// operator: BinaryOp::Multiply, +// } +// }, +// #[precedence(level="4")] #[assoc(side="left")] +// <lhs:ValueExpression> "/" <rhs:ValueExpression> => { +// ValueExpression::Binary { +// lhs: Box::new(lhs), +// rhs: Box::new(rhs), +// operator: BinaryOp::Divide, +// } +// }, +// #[precedence(level="6")] #[assoc(side="left")] +// <lhs:ValueExpression> "+" <rhs:ValueExpression> => { +// ValueExpression::Binary { +// lhs: Box::new(lhs), +// rhs: Box::new(rhs), +// operator: BinaryOp::Add, +// } +// }, +// #[precedence(level="6")] #[assoc(side="left")] +// <lhs:ValueExpression> "-" <rhs:ValueExpression> => { +// ValueExpression::Binary { +// lhs: Box::new(lhs), +// rhs: Box::new(rhs), +// operator: BinaryOp::Subtract, +// } +// }, +//}; + +AsVoid<R>: VoidExpression = { + R => <>.into() +} +AsValue<R>: ValueExpression = { + R => <>.into() +} + +TypeofStmt: TypeofValue = { + "typeof" <ValueExpression> => TypeofValue(Box::new(<>)) +} + +PrintStmt: Print = { + "print" <expr:ValueExpression> => Print { expr: Box::new(expr) }, +} +ImportStmt: Import = { + "import" "{" <items:IdentifierList> "}" "from" <source:StringValue> => Import { source, items }, +} +ExportStmt: Export = { + "export" "{" <items:IdentifierList> "}" => Export { items }, +} IfStatement: Conditional = { BareIfStatement => Conditional { blocks: vec![<>], fallback: None }, - <fi: BareIfStatement> "else" <bl:Block> => Conditional { blocks: vec![fi], fallback: bl }, + <fi:BareIfStatement> "else" <bl:Block> => Conditional { blocks: vec![fi], fallback: bl }, <fi:BareIfStatement> "else" <ls:IfStatement> => { let mut ls = ls; let mut new = vec![fi]; diff --git a/forge-script-lang/src/parser/forge_script.rs b/forge-script-lang/src/parser/forge_script.rs index dc9abccb9fac5ddd752065ba34e74f0c0dce216b..897f48a2b72504483f8a08be339504f3b4061af5 100644 --- a/forge-script-lang/src/parser/forge_script.rs +++ b/forge-script-lang/src/parser/forge_script.rs @@ -1,6 +1,3 @@ -// use lalrpop_util::lalrpop_mod; -// lalrpop_mod!(pub forge_script); - use crate::error::ForgeResult; use crate::lexer::{script_to_tokens, ScriptTokenType}; use crate::parser::ast::{Expression, Program}; @@ -17,9 +14,9 @@ macro_rules! export_grammar_fn { .iter() .map(|tok| { Ok(( - tok.position.start(), + tok.position.location_offset(), tok.token_type.clone(), - tok.position.start() + tok.position.len(), + tok.position.location_offset() + tok.token_type.len(), )) }) .collect::<Vec<ExprSpan>>(); @@ -35,24 +32,6 @@ macro_rules! export_grammar_fn { export_grammar_fn!(parse_program = Program => ProgramParser); export_grammar_fn!(parse_expression = Expression => ExpressionParser); -// pub fn parse_expression(source: &str) -> ForgeResult<Expression> { -// let tokens = script_to_tokens(source)? -// .iter() -// .map(|tok| { -// Ok(( -// tok.position.start(), -// tok.token_type.clone(), -// tok.position.start() + tok.position.len(), -// )) -// }) -// .collect::<Vec<ExprSpan>>(); -// -// let value = -// super::forge_grammar::ExpressionParser::new().parse::<ExprSpan, Vec<ExprSpan>>(tokens)?; -// -// Ok(value) -// } - #[cfg(test)] mod grammar_test { use super::parse_expression; @@ -65,7 +44,9 @@ mod grammar_test { #[test_case("false" => matches Ok(_) ; "Parse literal false")] #[test_case("true" => matches Ok(_) ; "Parse literal true")] #[test_case("null" => matches Ok(_) ; "Parse literal null")] - fn expression_parsing<'a>(prog: &'a str) -> ForgeResult<'a, Expression> { + #[test_case("if foo {}" => matches Ok(_) ; "Parse conditional")] + #[test_case("2 * 4 - 3" => matches Ok(_) ; "Parse arithmetic")] + fn expression_parsing(prog: &str) -> ForgeResult<Expression> { parse_expression(prog) } } diff --git a/forge-script-lang/src/utilities.rs b/forge-script-lang/src/utilities.rs index 0233d1a2a7fbf823b4087f8050241944343ae25e..225f2457014a8d5f5a5e3025aa291609e64daba2 100644 --- a/forge-script-lang/src/utilities.rs +++ b/forge-script-lang/src/utilities.rs @@ -49,6 +49,7 @@ type ColumnNum = usize; pub fn offset_to_line_column(source: &str, offset: usize) -> (LineNum, ColumnNum) { let mut remaining = offset; let mut current_line = 0; + for line in source.lines() { current_line += 1; if remaining < line.len() {