Two quotes next to each other result in one escaped quote; Add Split Example, add/refactor tests for every platform.

This commit is contained in:
Makpoc 2015-10-24 15:33:04 +03:00
parent 0d004ccbab
commit d1b667fbce
2 changed files with 206 additions and 158 deletions

View file

@ -2,23 +2,23 @@ package middleware
import ( import (
"errors" "errors"
"fmt"
"runtime" "runtime"
"strings"
"unicode" "unicode"
"github.com/flynn/go-shlex" "github.com/flynn/go-shlex"
) )
var runtimeGoos = runtime.GOOS
// SplitCommandAndArgs takes a command string and parses it // SplitCommandAndArgs takes a command string and parses it
// shell-style into the command and its separate arguments. // shell-style into the command and its separate arguments.
func SplitCommandAndArgs(command string) (cmd string, args []string, err error) { func SplitCommandAndArgs(command string) (cmd string, args []string, err error) {
var parts []string var parts []string
if runtime.GOOS == "windows" { if runtimeGoos == "windows" {
parts = parseWindowsCommand(command) // parse it Windows-style parts = parseWindowsCommand(command) // parse it Windows-style
} else { } else {
parts, err = shlex.Split(command) // parse it Unix-style parts, err = parseUnixCommand(command) // parse it Unix-style
if err != nil { if err != nil {
err = errors.New("error parsing command: " + err.Error()) err = errors.New("error parsing command: " + err.Error())
return return
@ -38,112 +38,76 @@ func SplitCommandAndArgs(command string) (cmd string, args []string, err error)
return return
} }
// parseWindowsCommand is a sad but good-enough attempt to // parseUnixCommand parses a unix style command line and returns the
// split a command into the command and its arguments like // command and its arguments or an error
// the Windows command line would; only basic parsing is func parseUnixCommand(cmd string) ([]string, error) {
// supported. This function has to be used on Windows instead return shlex.Split(cmd)
// of the shlex package because this function treats backslash
// characters properly.
//
// Loosely based off the rules here: http://stackoverflow.com/a/4094897/1048862
// True parsing is much, much trickier.
func parseWindowsCommand2(cmd string) []string {
var parts []string
var part string
var quoted bool
var backslashes int
for _, ch := range cmd {
if ch == '\\' {
backslashes++
continue
}
var evenBacksl = (backslashes % 2) == 0
if backslashes > 0 && ch != '\\' {
numBacksl := (backslashes / 2) + 1
if ch == '"' {
numBacksl--
}
part += strings.Repeat(`\`, numBacksl)
backslashes = 0
}
if quoted {
if ch == '"' && evenBacksl {
quoted = false
continue
}
part += string(ch)
continue
}
if unicode.IsSpace(ch) && len(part) > 0 {
parts = append(parts, part)
part = ""
continue
}
if ch == '"' && evenBacksl {
quoted = true
continue
}
part += string(ch)
}
if len(part) > 0 {
parts = append(parts, part)
part = ""
}
return parts
} }
// parseWindowsCommand parses windows command lines and
// returns the command and the arguments as an array. It
// should be able to parse commonly used command lines.
// Only basic syntax is supported:
// - spaces in double quotes are not token delimiters
// - double quotes are escaped by either backspace or another double quote
// - except for the above case backspaces are path separators (not special)
//
// Many sources point out that escaping quotes using backslash can be unsafe.
// Use two double quotes when possible. (Source: http://stackoverflow.com/a/31413730/2616179 )
//
// This function has to be used on Windows instead
// of the shlex package because this function treats backslash
// characters properly.
func parseWindowsCommand(cmd string) []string { func parseWindowsCommand(cmd string) []string {
const backslash = '\\'
const quote = '"'
var parts []string var parts []string
var part string var part string
var inQuotes bool var inQuotes bool
var wasBackslash bool var lastRune rune
prefix := "DEBUG:"
fmt.Println(prefix, "Parsing cmd:", cmd)
for i, ch := range cmd { for i, ch := range cmd {
fmt.Println(" ", prefix, "Looking at char:", string(ch), "at index", string(i))
if ch == '\\' { if i != 0 {
wasBackslash = true lastRune = rune(cmd[i-1])
// put it in the part - for now we don't know if it's escaping char or path separator }
if ch == backslash {
// put it in the part - for now we don't know if it's an
// escaping char or path separator
part += string(ch) part += string(ch)
continue continue
} }
if ch == '"' { if ch == quote {
if wasBackslash { if lastRune == backslash {
// remove the backslash from the part and add the escaped quote instead // remove the backslash from the part and add the escaped quote instead
part = part[:len(part)-1] part = part[:len(part)-1]
part += string(ch) part += string(ch)
wasBackslash = false
continue continue
} else {
// normal escaping quotes
fmt.Println(" ", prefix, "and it's a quote")
inQuotes = !inQuotes
continue
} }
if lastRune == quote {
// revert the last change of the inQuotes state
// it was an escaping quote
inQuotes = !inQuotes
part += string(ch)
continue
}
// normal escaping quotes
inQuotes = !inQuotes
continue
} }
if unicode.IsSpace(ch) && !inQuotes && len(part) > 0 { if unicode.IsSpace(ch) && !inQuotes && len(part) > 0 {
fmt.Println(" ", prefix, "and it's a space outside quotes")
parts = append(parts, part) parts = append(parts, part)
part = "" part = ""
wasBackslash = false
continue continue
} }
wasBackslash = false
part += string(ch) part += string(ch)
} }
@ -152,6 +116,5 @@ func parseWindowsCommand(cmd string) []string {
part = "" part = ""
} }
fmt.Println(prefix, strings.Join(parts, ","))
return parts return parts
} }

View file

@ -2,107 +2,176 @@ package middleware
import ( import (
"fmt" "fmt"
"runtime"
"strings" "strings"
"testing" "testing"
) )
func TestParseUnixCommand(t *testing.T) {
tests := []struct {
input string
expected []string
}{
// 0 - emtpy command
{
input: ``,
expected: []string{},
},
// 1 - command without arguments
{
input: `command`,
expected: []string{`command`},
},
// 2 - command with single argument
{
input: `command arg1`,
expected: []string{`command`, `arg1`},
},
// 3 - command with multiple arguments
{
input: `command arg1 arg2`,
expected: []string{`command`, `arg1`, `arg2`},
},
// 4 - command with single argument with space character - in quotes
{
input: `command "arg1 arg1"`,
expected: []string{`command`, `arg1 arg1`},
},
// 5 - command with multiple spaces and tab character
{
input: "command arg1 arg2\targ3",
expected: []string{`command`, `arg1`, `arg2`, `arg3`},
},
// 6 - command with single argument with space character - escaped with backspace
{
input: `command arg1\ arg2`,
expected: []string{`command`, `arg1 arg2`},
},
// 7 - single quotes should escape special chars
{
input: `command 'arg1\ arg2'`,
expected: []string{`command`, `arg1\ arg2`},
},
}
for i, test := range tests {
errorPrefix := fmt.Sprintf("Test [%d]: ", i)
errorSuffix := fmt.Sprintf(" Command to parse: [%s]", test.input)
actual, _ := parseUnixCommand(test.input)
if len(actual) != len(test.expected) {
t.Errorf(errorPrefix+"Expected %d parts, got %d: %#v."+errorSuffix, len(test.expected), len(actual), actual)
continue
}
for j := 0; j < len(actual); j++ {
if expectedPart, actualPart := test.expected[j], actual[j]; expectedPart != actualPart {
t.Errorf(errorPrefix+"Expected: %v Actual: %v (index %d)."+errorSuffix, expectedPart, actualPart, j)
}
}
}
}
func TestParseWindowsCommand(t *testing.T) { func TestParseWindowsCommand(t *testing.T) {
tests := []struct { tests := []struct {
input string input string
expected []string expected []string
}{ }{
{ // 0 { // 0 - empty command - do not fail
input: ``,
expected: []string{},
},
{ // 1 - cmd without args
input: `cmd`, input: `cmd`,
expected: []string{`cmd`}, expected: []string{`cmd`},
}, },
{ // 1 { // 2 - multiple args
input: `cmd arg1 arg2`, input: `cmd arg1 arg2`,
expected: []string{`cmd`, `arg1`, `arg2`}, expected: []string{`cmd`, `arg1`, `arg2`},
}, },
{ // 2 { // 3 - multiple args with space
input: `cmd "combined arg" arg2`, input: `cmd "combined arg" arg2`,
expected: []string{`cmd`, `combined arg`, `arg2`}, expected: []string{`cmd`, `combined arg`, `arg2`},
}, },
{ // 3 { // 4 - path without spaces
input: `mkdir C:\Windows\foo\bar`, input: `mkdir C:\Windows\foo\bar`,
expected: []string{`mkdir`, `C:\Windows\foo\bar`}, expected: []string{`mkdir`, `C:\Windows\foo\bar`},
}, },
{ // 4 { // 5 - command with space in quotes
input: `"command here"`, input: `"command here"`,
expected: []string{`command here`}, expected: []string{`command here`},
}, },
{ // 5 { // 6 - argument with escaped quotes (two quotes)
input: `cmd ""arg""`,
expected: []string{`cmd`, `"arg"`},
},
{ // 7 - argument with escaped quotes (backslash)
input: `cmd \"arg\"`, input: `cmd \"arg\"`,
expected: []string{`cmd`, `"arg"`}, expected: []string{`cmd`, `"arg"`},
}, },
{ // 6 { // 8 - two quotes (escaped) inside an inQuote element
input: `cmd "a \"quoted value\""`, input: `cmd "a ""quoted value"`,
expected: []string{`cmd`, `a "quoted value"`}, expected: []string{`cmd`, `a "quoted value`},
}, },
{ // 7 // TODO - see how many quotes are dislayed if we use "", """, """""""
{ // 9 - two quotes outside an inQuote element
input: `cmd a ""quoted value`,
expected: []string{`cmd`, `a`, `"quoted`, `value`},
},
{ // 10 - path with space in quotes
input: `mkdir "C:\directory name\foobar"`, input: `mkdir "C:\directory name\foobar"`,
expected: []string{`mkdir`, `C:\directory name\foobar`}, expected: []string{`mkdir`, `C:\directory name\foobar`},
}, },
{ // 8 { // 11 - space without quotes
input: `mkdir C:\ space`, input: `mkdir C:\ space`,
expected: []string{`mkdir`, `C:\`, `space`}, expected: []string{`mkdir`, `C:\`, `space`},
}, },
{ // 9 { // 12 - space in quotes
input: `mkdir "C:\ space"`, input: `mkdir "C:\ space"`,
expected: []string{`mkdir`, `C:\ space`}, expected: []string{`mkdir`, `C:\ space`},
}, },
// 10 { // 13 - UNC
{
input: `mkdir \\?\C:\Users`, input: `mkdir \\?\C:\Users`,
expected: []string{`mkdir`, `\\?\C:\Users`}, expected: []string{`mkdir`, `\\?\C:\Users`},
}, },
// 11 { // 14 - UNC with space
{
input: `mkdir "\\?\C:\Program Files"`, input: `mkdir "\\?\C:\Program Files"`,
expected: []string{`mkdir`, `\\?\C:\Program Files`}, expected: []string{`mkdir`, `\\?\C:\Program Files`},
}, },
}
var nTests int { // 15 - unclosed quotes - treat as if the path ends with quote
for i, test := range tests { input: `mkdir "c:\Program files`,
fmt.Printf("====== Test %d ======\n", i) expected: []string{`mkdir`, `c:\Program files`},
actual := parseWindowsCommand(test.input) },
if len(actual) != len(test.expected) { { // 16 - quotes used inside the argument
fmt.Printf("Test %d: Expected %d parts, got %d: %#v", i, len(test.expected), len(actual), actual) input: `mkdir "c:\P"rogra"m f"iles`,
fmt.Println() expected: []string{`mkdir`, `c:\Program files`},
t.Errorf("Test %d: Expected %d parts, got %d: %#v", i, len(test.expected), len(actual), actual) },
continue
}
for j := 0; j < len(actual); j++ {
if expectedPart, actualPart := test.expected[j], actual[j]; expectedPart != actualPart {
fmt.Printf("Test %d: Expected: %v Actual: %v (index %d)", i, expectedPart, actualPart, j)
fmt.Println()
t.Errorf("Test %d: Expected: %v Actual: %v (index %d)", i, expectedPart, actualPart, j)
}
}
nTests += 1
} }
for _, test := range tests { for i, test := range tests {
fmt.Printf("====== Test %d ======\n", nTests) errorPrefix := fmt.Sprintf("Test [%d]: ", i)
actual := parseWindowsCommand2(test.input) errorSuffix := fmt.Sprintf(" Command to parse: [%s]", test.input)
actual := parseWindowsCommand(test.input)
if len(actual) != len(test.expected) { if len(actual) != len(test.expected) {
fmt.Printf("Test %d: Expected %d parts, got %d: %#v", nTests, len(test.expected), len(actual), actual) t.Errorf(errorPrefix+"Expected %d parts, got %d: %#v."+errorSuffix, len(test.expected), len(actual), actual)
fmt.Println()
t.Errorf("Test %d: Expected %d parts, got %d: %#v", nTests, len(test.expected), len(actual), actual)
continue continue
} }
for j := 0; j < len(actual); j++ { for j := 0; j < len(actual); j++ {
if expectedPart, actualPart := test.expected[j], actual[j]; expectedPart != actualPart { if expectedPart, actualPart := test.expected[j], actual[j]; expectedPart != actualPart {
fmt.Printf("Test %d: Expected: %v Actual: %v (index %d)", nTests, expectedPart, actualPart, j) t.Errorf(errorPrefix+"Expected: %v Actual: %v (index %d)."+errorSuffix, expectedPart, actualPart, j)
fmt.Println()
t.Errorf("Test %d: Expected: %v Actual: %v (index %d)", nTests, expectedPart, actualPart, j)
} }
} }
nTests += 1
} }
} }
func TestSplitCommandAndArgs(t *testing.T) { func TestSplitCommandAndArgs(t *testing.T) {
// force linux parsing. It's more robust and covers error cases
runtimeGoos = "linux"
defer func() {
runtimeGoos = runtime.GOOS
}()
var parseErrorContent = "error parsing command:" var parseErrorContent = "error parsing command:"
var noCommandErrContent = "no command contained in" var noCommandErrContent = "no command contained in"
@ -112,56 +181,42 @@ func TestSplitCommandAndArgs(t *testing.T) {
expectedArgs []string expectedArgs []string
expectedErrContent string expectedErrContent string
}{ }{
// Test case 0 - emtpy command // 0 - emtpy command
{ {
input: ``, input: ``,
expectedCommand: ``, expectedCommand: ``,
expectedArgs: nil, expectedArgs: nil,
expectedErrContent: noCommandErrContent, expectedErrContent: noCommandErrContent,
}, },
// Test case 1 - command without arguments // 1 - command without arguments
{ {
input: `command`, input: `command`,
expectedCommand: `command`, expectedCommand: `command`,
expectedArgs: nil, expectedArgs: nil,
expectedErrContent: ``, expectedErrContent: ``,
}, },
// Test case 2 - command with single argument // 2 - command with single argument
{ {
input: `command arg1`, input: `command arg1`,
expectedCommand: `command`, expectedCommand: `command`,
expectedArgs: []string{`arg1`}, expectedArgs: []string{`arg1`},
expectedErrContent: ``, expectedErrContent: ``,
}, },
// Test case 3 - command with multiple arguments // 3 - command with multiple arguments
{ {
input: `command arg1 arg2`, input: `command arg1 arg2`,
expectedCommand: `command`, expectedCommand: `command`,
expectedArgs: []string{`arg1`, `arg2`}, expectedArgs: []string{`arg1`, `arg2`},
expectedErrContent: ``, expectedErrContent: ``,
}, },
// Test case 4 - command with single argument with space character - in quotes // 4 - command with unclosed quotes
{
input: `command "arg1 arg1"`,
expectedCommand: `command`,
expectedArgs: []string{`arg1 arg1`},
expectedErrContent: ``,
},
// Test case 5 - command with multiple spaces and tab character
{
input: "command arg1 arg2\targ3",
expectedCommand: `command`,
expectedArgs: []string{`arg1`, `arg2`, "arg3"},
expectedErrContent: "",
},
// Test case 6 - command with unclosed quotes
{ {
input: `command "arg1 arg2`, input: `command "arg1 arg2`,
expectedCommand: "", expectedCommand: "",
expectedArgs: nil, expectedArgs: nil,
expectedErrContent: parseErrorContent, expectedErrContent: parseErrorContent,
}, },
// Test case 7 - command with unclosed quotes // 5 - command with unclosed quotes
{ {
input: `command 'arg1 arg2"`, input: `command 'arg1 arg2"`,
expectedCommand: "", expectedCommand: "",
@ -188,19 +243,49 @@ func TestSplitCommandAndArgs(t *testing.T) {
// test if command matches // test if command matches
if test.expectedCommand != actualCommand { if test.expectedCommand != actualCommand {
t.Errorf("Expected command: [%s], actual: [%s]."+errorSuffix, test.expectedCommand, actualCommand) t.Errorf(errorPrefix+"Expected command: [%s], actual: [%s]."+errorSuffix, test.expectedCommand, actualCommand)
} }
// test if arguments match // test if arguments match
if len(test.expectedArgs) != len(actualArgs) { if len(test.expectedArgs) != len(actualArgs) {
t.Errorf("Wrong number of arguments! Expected [%v], actual [%v]."+errorSuffix, test.expectedArgs, actualArgs) t.Errorf(errorPrefix+"Wrong number of arguments! Expected [%v], actual [%v]."+errorSuffix, test.expectedArgs, actualArgs)
} } else {
// test args only if the count matches.
for j, actualArg := range actualArgs { for j, actualArg := range actualArgs {
expectedArg := test.expectedArgs[j] expectedArg := test.expectedArgs[j]
if actualArg != expectedArg { if actualArg != expectedArg {
t.Errorf(errorPrefix+"Argument at position [%d] differ! Expected [%s], actual [%s]"+errorSuffix, j, expectedArg, actualArg) t.Errorf(errorPrefix+"Argument at position [%d] differ! Expected [%s], actual [%s]"+errorSuffix, j, expectedArg, actualArg)
}
} }
} }
} }
} }
func ExampleSplitCommandAndArgs() {
var commandLine string
var command string
var args []string
// just for the test - change GOOS and reset it at the end of the test
runtimeGoos = "windows"
defer func() {
runtimeGoos = runtime.GOOS
}()
commandLine = `mkdir /P "C:\Program Files"`
command, args, _ = SplitCommandAndArgs(commandLine)
fmt.Printf("Windows: %s: %s [%s]\n", commandLine, command, strings.Join(args, ","))
// set GOOS to linux
runtimeGoos = "linux"
commandLine = `mkdir -p /path/with\ space`
command, args, _ = SplitCommandAndArgs(commandLine)
fmt.Printf("Linux: %s: %s [%s]\n", commandLine, command, strings.Join(args, ","))
// Output:
// Windows: mkdir /P "C:\Program Files": mkdir [/P,C:\Program Files]
// Linux: mkdir -p /path/with\ space: mkdir [-p,/path/with space]
}