summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNao Pross <naopross@thearcway.org>2020-02-05 15:37:45 +0100
committerNao Pross <naopross@thearcway.org>2020-02-05 15:37:45 +0100
commit6cfdf24fb4efe1b36a6ce244bba132ba812e25a9 (patch)
tree40cc6d0b9a1409700e57dfb6fcc025b577220d27
parentInitial commit (diff)
downloadrlg-master.tar.gz
rlg-master.zip
Rewrite lexerHEADmaster
Diffstat (limited to '')
-rw-r--r--src/parser.rs153
1 files changed, 72 insertions, 81 deletions
diff --git a/src/parser.rs b/src/parser.rs
index c57fe14..68b1c11 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,108 +1,99 @@
use crate::journal::Journal;
use std::fs;
-use std::iter::Peekable;
-extern crate itertools;
extern crate time;
-use itertools::Itertools;
use time::Date;
#[derive(Debug, Eq, PartialEq, Clone)]
-pub enum Token {
- Word(String),
- DateSep,
- AccountSep,
- DecimalSep,
- Newline,
- Space,
- Indent,
- Marker(char),
- Comment(char),
- Numeric(String),
+pub enum Token<'s> {
+ Word(&'s str), Num(&'s str),
+ DecimalSep(char),
+ Newline, Space, Indent,
+ OpenKet(char), CloseKet(char),
+ Quote, Marker(char), CommentMarker,
}
-pub struct Lexer<I: Iterator<Item=char>> {
- iter: Peekable<I>
+
+struct Lexer<'s> {
+ text: &'s str,
+ tokens: Vec<Token<'s>>
}
-impl<I: Iterator<Item=char>> Lexer<I> {
- pub fn new(iter: I) -> Lexer<I> {
+impl<'s> Lexer<'s> {
+ fn new(input: &str) -> Lexer {
Lexer {
- iter: iter.peekable()
+ text: input,
+ tokens: Vec::new(),
}
}
-}
-impl<I: Iterator<Item=char>> Iterator for Lexer<I> {
- type Item = Token;
-
- fn next(&mut self) -> Option<Token> {
- // let ch = *self.iter.peek().unwrap_or(&'`');
- let ch = self.iter.peek()
- match ch {
- /* alphanumeric */
- c if c.is_alphabetic() => {
- Some(Token::Word(self.iter.by_ref()
- .peeking_take_while(|&c| c.is_alphabetic()).collect())) },
- c if c.is_numeric() => {
- Some(Token::Numeric(self.iter.by_ref()
- .peeking_take_while(|&c| c.is_numeric()).collect()))
- },
- /* whitespace */
- ' ' => {
- self.iter.next();
- Some(Token::Space)
- },
- '\n' => {
- self.iter.next();
- Some(Token::Newline)
- },
- '\t' => {
- self.iter.next();
- Some(Token::Indent)
- },
- /* separators */
- '/' => {
- self.iter.next();
- Some(Token::DateSep)
- },
- ':' => {
- self.iter.next();
- Some(Token::AccountSep)
- },
- ',' | '.' => {
- self.iter.next();
- Some(Token::DecimalSep)
- },
- /* comments */
- ';' | '#' | '%' => {
- self.iter.next();
- Some(Token::Comment(ch))
- },
- /* markers */
- '*' | '!' | '@' | '-' => {
- self.iter.next();
- Some(Token::Marker(ch))
- },
- '`' => {
- println!("--");
- None
- },
- _ => self.next(),
+ fn lex(&mut self) {
+ #[derive(Clone,Copy)]
+ enum Predicate {
+ Alphabetic,
+ Numeric,
+ }
+
+ let mut start = 0;
+ let mut lastp: Option<Predicate> = None;
+
+ for (i, ch) in self.text.char_indices() {
+ let token: Option<Token> = match ch {
+ '\t' => Some(Token::Indent),
+ '\n' => Some(Token::Newline),
+ c if c.is_whitespace() => Some(Token::Space),
+
+ '"' => Some(Token::Quote),
+ ',' | '.' => Some(Token::DecimalSep(ch)),
+
+ '(' | '[' | '{' => Some(Token::OpenKet(ch)),
+ ')' | ']' | '}' => Some(Token::CloseKet(ch)),
+
+ ';' | '#' | '%' => Some(Token::CommentMarker),
+ '*' | '!' | '@' | '-' | '/' | ':' => Some(Token::Marker(ch)),
+
+ c if c.is_alphabetic() => {
+ lastp = Some(Predicate::Alphabetic);
+ None
+ },
+ c if c.is_numeric() => {
+ lastp = Some(Predicate::Numeric);
+ None
+ },
+ _ => {
+ println!("Invalid syntax token: {}", ch);
+ None
+ }
+ };
+
+ if let Some(t) = token {
+ if let Some(p) = lastp {
+ if i != start {
+ self.tokens.push(match p {
+ Predicate::Alphabetic => Token::Word(&self.text[start..i]),
+ Predicate::Numeric => Token::Num(&self.text[start..i]),
+ });
+ }
+ }
+
+ self.tokens.push(t);
+ start = i +1;
+ }
}
}
-}
+ fn tokenize(input: & str) -> Vec<Token> {
+ let mut lexer = Lexer::new(input);
+ lexer.lex();
-pub fn lex(text: &str) -> Vec<Token> {
- Lexer::new(text.chars()).collect()
+ lexer.tokens
+ }
}
-
struct Parser {
}
@@ -112,7 +103,7 @@ pub fn parse(name: &str) -> Journal {
println!("{:?}", text);
- for token in lex(&text) {
+ for token in Lexer::tokenize(&text) {
println!("{:?}", token);
}