Skip to content

Commit

Permalink
whitespace preserving lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
CrowdHailer committed Jul 3, 2024
1 parent 509a485 commit d83570f
Show file tree
Hide file tree
Showing 8 changed files with 168 additions and 112 deletions.
13 changes: 12 additions & 1 deletion eyg/src/eyg/parse.gleam
Original file line number Diff line number Diff line change
@@ -1,8 +1,19 @@
import eyg/parse/lexer
import eyg/parse/parser
import eyg/parse/token
import gleam/result

pub fn from_string(src) {
src
|> lexer.lex()
|> parser.parse()
|> token.drop_whitespace()
|> parser.expression()
}

pub fn all_from_string(src) {
use #(source, remaining) <- result.try(from_string(src))
case remaining {
[] -> Ok(source)
[#(token, at), ..] -> Error(parser.UnexpectedToken(token, at))
}
}
16 changes: 14 additions & 2 deletions eyg/src/eyg/parse/lexer.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ fn pop(raw, start) {

// If we track whitespace token then can always return original start
case raw {
"\r\n" <> rest -> pop(rest, start + 2)
"\n" <> rest | " " <> rest | "\t" <> rest -> pop(rest, start + 1)
"\r\n" <> rest -> whitespace("\r\n", rest, done)
"\n" <> rest -> whitespace("\n", rest, done)
" " <> rest -> whitespace(" ", rest, done)
"\t" <> rest -> whitespace("\t", rest, done)

"(" <> rest -> done(t.LeftParen, 1, rest)
")" <> rest -> done(t.RightParen, 1, rest)
Expand Down Expand Up @@ -126,6 +128,16 @@ fn pop(raw, start) {
}
}

fn whitespace(buffer, rest, done) {
case rest {
"\r\n" <> rest -> whitespace(buffer <> "\r\n", rest, done)
"\n" <> rest -> whitespace(buffer <> "\n", rest, done)
" " <> rest -> whitespace(buffer <> " ", rest, done)
"\t" <> rest -> whitespace(buffer <> "\t", rest, done)
_ -> done(t.Whitespace(buffer), string.byte_size(buffer), rest)
}
}

fn string(buffer, length, rest, done) {
case rest {
"\"" <> rest -> done(t.String(buffer), length + 1, rest)
Expand Down
57 changes: 52 additions & 5 deletions eyg/src/eyg/parse/token.gleam
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import gleam.{type String as S}
import gleam/list

pub type Token {
Name(S)
Uppername(S)
Integer(S)
String(S)
Whitespace(String)
Name(String)
Uppername(String)
Integer(String)
String(String)
Let
Match
Perform
Expand Down Expand Up @@ -33,3 +34,49 @@ pub type Token {
UnexpectedGrapheme(String)
UnterminatedString(String)
}

pub fn drop_whitespace(tokens) {
list.filter(tokens, fn(token) {
case token {
#(Whitespace(_), _) -> False
_ -> True
}
})
}

pub fn to_string(token) {
case token {
Whitespace(raw) -> raw
Name(raw) -> raw
Uppername(raw) -> raw
Integer(raw) -> raw
String(raw) -> "\"" <> raw <> "\""
Let -> "let"
Match -> "match"
Perform -> "perform"
Deep -> "deep"
Shallow -> "shallow"
Handle -> "handle"
// Having keyword token instead of using name prevents keywords used as names
Equal -> "="
Comma -> ","
DotDot -> ".."
Dot -> "."
Colon -> ":"
RightArrow -> "->"
Minus -> "-"
Bang -> "!"
Bar -> "|"

LeftParen -> "("
RightParen -> ")"
LeftBrace -> "{"
RightBrace -> "}"
LeftSquare -> "["
RightSquare -> "]"

// Invalid token
UnexpectedGrapheme(raw) -> raw
UnterminatedString(raw) -> "\"" <> raw
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@ import eyg/analysis/type_/binding
import eyg/analysis/type_/binding/debug
import eyg/analysis/type_/binding/error
import eyg/analysis/type_/isomorphic as t
import eyg/parse/lexer
import eyg/parse/parser
import eyg/parse
import eygir/annotated
import gleam/list
import gleeunit/should

fn parse(src) {
src
|> lexer.lex()
|> parser.parse()
|> parse.all_from_string()
|> should.be_ok()
|> annotated.drop_annotation()
}
Expand Down
3 changes: 3 additions & 0 deletions eyg/test/eyg/compile/js_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import eyg/compile
import eyg/parse
import gleam/dynamic
import gleam/json
import gleam/pair
import gleeunit/should
import plinth/browser/window

Expand All @@ -10,6 +11,7 @@ fn test_compilation(source, js, evaled) {
source
|> parse.from_string()
|> should.be_ok()
|> pair.first
|> compile.to_js()
generated
|> should.equal(js)
Expand All @@ -24,6 +26,7 @@ fn test_eval(source, evaled) {
source
|> parse.from_string()
|> should.be_ok()
|> pair.first
|> compile.to_js()
generated
|> window.eval()
Expand Down
48 changes: 32 additions & 16 deletions eyg/test/eyg/parser/lexer_test.gleam
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,27 @@ import eyg/parse/token as t
import gleeunit/should

pub fn grouping_test() {
"( )"
"()"
|> lexer.lex
|> should.equal([#(t.LeftParen, 0), #(t.RightParen, 2)])
"{ }"
|> should.equal([#(t.LeftParen, 0), #(t.RightParen, 1)])
"{}"
|> lexer.lex
|> should.equal([#(t.LeftBrace, 0), #(t.RightBrace, 2)])
"[ ]"
|> should.equal([#(t.LeftBrace, 0), #(t.RightBrace, 1)])
"[]"
|> lexer.lex
|> should.equal([#(t.LeftSquare, 0), #(t.RightSquare, 2)])
|> should.equal([#(t.LeftSquare, 0), #(t.RightSquare, 1)])
}

pub fn punctuation_test() {
"= -> , . : !"
"=->,.:!"
|> lexer.lex
|> should.equal([
#(t.Equal, 0),
#(t.RightArrow, 2),
#(t.Comma, 5),
#(t.Dot, 7),
#(t.Colon, 9),
#(t.Bang, 11),
#(t.RightArrow, 1),
#(t.Comma, 3),
#(t.Dot, 4),
#(t.Colon, 5),
#(t.Bang, 6),
])
}

Expand All @@ -32,21 +32,26 @@ pub fn keyword_test() {
|> lexer.lex
|> should.equal([
#(t.Let, 0),
#(t.Whitespace(" "), 3),
#(t.Match, 4),
#(t.Whitespace(" "), 9),
#(t.Perform, 10),
#(t.Whitespace(" "), 17),
#(t.Deep, 18),
#(t.Whitespace(" "), 22),
#(t.Shallow, 23),
#(t.Whitespace(" "), 30),
#(t.Handle, 31),
])
}

pub fn string_test() {
"\"\" \"hello\" \"\\\\\""
"\"\"\"hello\"\"\\\\\""
|> lexer.lex
|> should.equal([
#(t.String(""), 0),
#(t.String("hello"), 3),
#(t.String("\\"), 11),
#(t.String("hello"), 2),
#(t.String("\\"), 9),
])
}

Expand All @@ -55,8 +60,11 @@ pub fn number_test() {
|> lexer.lex
|> should.equal([
#(t.Integer("1"), 0),
#(t.Whitespace(" "), 1),
#(t.Integer("01"), 2),
#(t.Whitespace(" "), 4),
#(t.Integer("1000"), 5),
#(t.Whitespace(" "), 9),
#(t.Minus, 10),
#(t.Integer("5"), 11),
])
Expand All @@ -65,15 +73,23 @@ pub fn number_test() {
pub fn name_test() {
"alice x1 _"
|> lexer.lex
|> should.equal([#(t.Name("alice"), 0), #(t.Name("x1"), 6), #(t.Name("_"), 9)])
|> should.equal([
#(t.Name("alice"), 0),
#(t.Whitespace(" "), 5),
#(t.Name("x1"), 6),
#(t.Whitespace(" "), 8),
#(t.Name("_"), 9),
])
}

pub fn uppername_test() {
"Ok MyThing A1"
|> lexer.lex
|> should.equal([
#(t.Uppername("Ok"), 0),
#(t.Whitespace(" "), 2),
#(t.Uppername("MyThing"), 3),
#(t.Whitespace(" "), 10),
#(t.Uppername("A1"), 11),
])
}
Expand Down
Loading

0 comments on commit d83570f

Please sign in to comment.