From 318781512b96483688c7b790ffab9dc9e78a88ff Mon Sep 17 00:00:00 2001
From: Matthew Holt <Matthew.Holt+git@gmail.com>
Date: Wed, 21 Jan 2015 12:09:01 -0700
Subject: [PATCH] Wrote lexer tests

---
 config/lexer.go      |  36 ++++++-----
 config/lexer_test.go | 139 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 156 insertions(+), 19 deletions(-)
 create mode 100644 config/lexer_test.go

diff --git a/config/lexer.go b/config/lexer.go
index 564218983..96c883cff 100644
--- a/config/lexer.go
+++ b/config/lexer.go
@@ -6,15 +6,23 @@ import (
 	"unicode"
 )
 
-// lexer is a utility which can get values, token by
-// token, from a reader. A token is a word, and tokens
-// are separated by whitespace. A word can be enclosed in
-// quotes if it contains whitespace.
-type lexer struct {
-	reader *bufio.Reader
-	token  token
-	line   int
-}
+type (
+	// lexer is a utility which can get values, token by
+	// token, from a reader. A token is a word, and tokens
+	// are separated by whitespace. A word can be enclosed in
+	// quotes if it contains whitespace.
+	lexer struct {
+		reader *bufio.Reader
+		token  token
+		line   int
+	}
+
+	// token represents a single processable unit.
+	token struct {
+		line int
+		text string
+	}
+)
 
 // load prepares the lexer to scan a file for tokens.
 func (l *lexer) load(file io.Reader) error {
@@ -63,10 +71,6 @@ func (l *lexer) next() bool {
 					return makeToken()
 				}
 			}
-			if ch == '\\' && !escaped {
-				escaped = true
-				continue
-			}
 			if ch == '\n' {
 				l.line++
 			}
@@ -108,9 +112,3 @@ func (l *lexer) next() bool {
 		val = append(val, ch)
 	}
 }
-
-// token represents a single processable unit.
-type token struct {
-	line int
-	text string
-}
diff --git a/config/lexer_test.go b/config/lexer_test.go
new file mode 100644
index 000000000..5b1cd2c25
--- /dev/null
+++ b/config/lexer_test.go
@@ -0,0 +1,139 @@
+package config
+
+import (
+	"strings"
+	"testing"
+)
+
+type lexerTestCase struct {
+	input    string
+	expected []token
+}
+
+func TestLexer(t *testing.T) {
+	testCases := []lexerTestCase{
+		{
+			input: `host:123`,
+			expected: []token{
+				{line: 1, text: "host:123"},
+			},
+		},
+		{
+			input: `host:123
+
+					directive`,
+			expected: []token{
+				{line: 1, text: "host:123"},
+				{line: 3, text: "directive"},
+			},
+		},
+		{
+			input: `host:123 {
+						directive
+					}`,
+			expected: []token{
+				{line: 1, text: "host:123"},
+				{line: 1, text: "{"},
+				{line: 2, text: "directive"},
+				{line: 3, text: "}"},
+			},
+		},
+		{
+			input: `host:123 { directive }`,
+			expected: []token{
+				{line: 1, text: "host:123"},
+				{line: 1, text: "{"},
+				{line: 1, text: "directive"},
+				{line: 1, text: "}"},
+			},
+		},
+		{
+			input: `host:123 {
+						#comment
+						directive
+						# comment
+						foobar # another comment
+					}`,
+			expected: []token{
+				{line: 1, text: "host:123"},
+				{line: 1, text: "{"},
+				{line: 3, text: "directive"},
+				{line: 5, text: "foobar"},
+				{line: 6, text: "}"},
+			},
+		},
+		{
+			input: `a "quoted value" b
+					foobar`,
+			expected: []token{
+				{line: 1, text: "a"},
+				{line: 1, text: "quoted value"},
+				{line: 1, text: "b"},
+				{line: 2, text: "foobar"},
+			},
+		},
+		{
+			input: `A "quoted \"value\" inside" B`,
+			expected: []token{
+				{line: 1, text: "A"},
+				{line: 1, text: `quoted "value" inside`},
+				{line: 1, text: "B"},
+			},
+		},
+		{
+			input: `A "quoted value with line
+					break inside" {
+						foobar
+					}`,
+			expected: []token{
+				{line: 1, text: "A"},
+				{line: 1, text: "quoted value with line\n\t\t\t\t\tbreak inside"},
+				{line: 2, text: "{"},
+				{line: 3, text: "foobar"},
+				{line: 4, text: "}"},
+			},
+		},
+		{
+			input: "skip those\r\nCR characters",
+			expected: []token{
+				{line: 1, text: "skip"},
+				{line: 1, text: "those"},
+				{line: 2, text: "CR"},
+				{line: 2, text: "characters"},
+			},
+		},
+	}
+
+	for i, testCase := range testCases {
+		actual := tokenize(testCase.input)
+		lexerCompare(t, i, testCase.expected, actual)
+	}
+}
+
+func tokenize(input string) (tokens []token) {
+	l := lexer{}
+	l.load(strings.NewReader(input))
+	for l.next() {
+		tokens = append(tokens, l.token)
+	}
+	return
+}
+
+func lexerCompare(t *testing.T, n int, expected, actual []token) {
+	if len(expected) != len(actual) {
+		t.Errorf("Test case %d: expected %d token(s) but got %d", n, len(expected), len(actual))
+	}
+
+	for i := 0; i < len(actual) && i < len(expected); i++ {
+		if actual[i].line != expected[i].line {
+			t.Errorf("Test case %d token %d ('%s'): expected line %d but was line %d",
+				n, i, expected[i].text, expected[i].line, actual[i].line)
+			break
+		}
+		if actual[i].text != expected[i].text {
+			t.Errorf("Test case %d token %d: expected text '%s' but was '%s'",
+				n, i, expected[i].text, actual[i].text)
+			break
+		}
+	}
+}