From 318781512b96483688c7b790ffab9dc9e78a88ff Mon Sep 17 00:00:00 2001 From: Matthew Holt Date: Wed, 21 Jan 2015 12:09:01 -0700 Subject: [PATCH] Wrote lexer tests --- config/lexer.go | 36 ++++++----- config/lexer_test.go | 139 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 156 insertions(+), 19 deletions(-) create mode 100644 config/lexer_test.go diff --git a/config/lexer.go b/config/lexer.go index 564218983..96c883cff 100644 --- a/config/lexer.go +++ b/config/lexer.go @@ -6,15 +6,23 @@ import ( "unicode" ) -// lexer is a utility which can get values, token by -// token, from a reader. A token is a word, and tokens -// are separated by whitespace. A word can be enclosed in -// quotes if it contains whitespace. -type lexer struct { - reader *bufio.Reader - token token - line int -} +type ( + // lexer is a utility which can get values, token by + // token, from a reader. A token is a word, and tokens + // are separated by whitespace. A word can be enclosed in + // quotes if it contains whitespace. + lexer struct { + reader *bufio.Reader + token token + line int + } + + // token represents a single processable unit. + token struct { + line int + text string + } +) // load prepares the lexer to scan a file for tokens. func (l *lexer) load(file io.Reader) error { @@ -63,10 +71,6 @@ func (l *lexer) next() bool { return makeToken() } } - if ch == '\\' && !escaped { - escaped = true - continue - } if ch == '\n' { l.line++ } @@ -108,9 +112,3 @@ func (l *lexer) next() bool { val = append(val, ch) } } - -// token represents a single processable unit. -type token struct { - line int - text string -} diff --git a/config/lexer_test.go b/config/lexer_test.go new file mode 100644 index 000000000..5b1cd2c25 --- /dev/null +++ b/config/lexer_test.go @@ -0,0 +1,139 @@ +package config + +import ( + "strings" + "testing" +) + +type lexerTestCase struct { + input string + expected []token +} + +func TestLexer(t *testing.T) { + testCases := []lexerTestCase{ + { + input: `host:123`, + expected: []token{ + {line: 1, text: "host:123"}, + }, + }, + { + input: `host:123 + + directive`, + expected: []token{ + {line: 1, text: "host:123"}, + {line: 3, text: "directive"}, + }, + }, + { + input: `host:123 { + directive + }`, + expected: []token{ + {line: 1, text: "host:123"}, + {line: 1, text: "{"}, + {line: 2, text: "directive"}, + {line: 3, text: "}"}, + }, + }, + { + input: `host:123 { directive }`, + expected: []token{ + {line: 1, text: "host:123"}, + {line: 1, text: "{"}, + {line: 1, text: "directive"}, + {line: 1, text: "}"}, + }, + }, + { + input: `host:123 { + #comment + directive + # comment + foobar # another comment + }`, + expected: []token{ + {line: 1, text: "host:123"}, + {line: 1, text: "{"}, + {line: 3, text: "directive"}, + {line: 5, text: "foobar"}, + {line: 6, text: "}"}, + }, + }, + { + input: `a "quoted value" b + foobar`, + expected: []token{ + {line: 1, text: "a"}, + {line: 1, text: "quoted value"}, + {line: 1, text: "b"}, + {line: 2, text: "foobar"}, + }, + }, + { + input: `A "quoted \"value\" inside" B`, + expected: []token{ + {line: 1, text: "A"}, + {line: 1, text: `quoted "value" inside`}, + {line: 1, text: "B"}, + }, + }, + { + input: `A "quoted value with line + break inside" { + foobar + }`, + expected: []token{ + {line: 1, text: "A"}, + {line: 1, text: "quoted value with line\n\t\t\t\t\tbreak inside"}, + {line: 2, text: "{"}, + {line: 3, text: "foobar"}, + {line: 4, text: "}"}, + }, + }, + { + input: "skip those\r\nCR characters", + expected: []token{ + {line: 1, text: "skip"}, + {line: 1, text: "those"}, + {line: 2, text: "CR"}, + {line: 2, text: "characters"}, + }, + }, + } + + for i, testCase := range testCases { + actual := tokenize(testCase.input) + lexerCompare(t, i, testCase.expected, actual) + } +} + +func tokenize(input string) (tokens []token) { + l := lexer{} + l.load(strings.NewReader(input)) + for l.next() { + tokens = append(tokens, l.token) + } + return +} + +func lexerCompare(t *testing.T, n int, expected, actual []token) { + if len(expected) != len(actual) { + t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual)) + } + + for i := 0; i < len(actual) && i < len(expected); i++ { + if actual[i].line != expected[i].line { + t.Errorf("Test case %d token %d ('%s'): expected line %d but was line %d", + n, i, expected[i].text, expected[i].line, actual[i].line) + break + } + if actual[i].text != expected[i].text { + t.Errorf("Test case %d token %d: expected text '%s' but was '%s'", + n, i, expected[i].text, actual[i].text) + break + } + } +}