mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2024-11-01 15:00:00 +00:00
1260 lines
33 KiB
Go
1260 lines
33 KiB
Go
// Package parser contains stuff that are related to parsing a Markdown text.
|
|
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/text"
|
|
"github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
// A Reference interface represents a link reference in Markdown text.
|
|
type Reference interface {
|
|
// String implements Stringer.
|
|
String() string
|
|
|
|
// Label returns a label of the reference.
|
|
Label() []byte
|
|
|
|
// Destination returns a destination(URL) of the reference.
|
|
Destination() []byte
|
|
|
|
// Title returns a title of the reference.
|
|
Title() []byte
|
|
}
|
|
|
|
type reference struct {
|
|
label []byte
|
|
destination []byte
|
|
title []byte
|
|
}
|
|
|
|
// NewReference returns a new Reference.
|
|
func NewReference(label, destination, title []byte) Reference {
|
|
return &reference{label, destination, title}
|
|
}
|
|
|
|
func (r *reference) Label() []byte {
|
|
return r.label
|
|
}
|
|
|
|
func (r *reference) Destination() []byte {
|
|
return r.destination
|
|
}
|
|
|
|
func (r *reference) Title() []byte {
|
|
return r.title
|
|
}
|
|
|
|
func (r *reference) String() string {
|
|
return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
|
|
}
|
|
|
|
// An IDs interface is a collection of the element ids.
|
|
type IDs interface {
|
|
// Generate generates a new element id.
|
|
Generate(value []byte, kind ast.NodeKind) []byte
|
|
|
|
// Put puts a given element id to the used ids table.
|
|
Put(value []byte)
|
|
}
|
|
|
|
type ids struct {
|
|
values map[string]bool
|
|
}
|
|
|
|
func newIDs() IDs {
|
|
return &ids{
|
|
values: map[string]bool{},
|
|
}
|
|
}
|
|
|
|
func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
|
|
value = util.TrimLeftSpace(value)
|
|
value = util.TrimRightSpace(value)
|
|
result := []byte{}
|
|
for i := 0; i < len(value); {
|
|
v := value[i]
|
|
l := util.UTF8Len(v)
|
|
i += int(l)
|
|
if l != 1 {
|
|
continue
|
|
}
|
|
if util.IsAlphaNumeric(v) {
|
|
if 'A' <= v && v <= 'Z' {
|
|
v += 'a' - 'A'
|
|
}
|
|
result = append(result, v)
|
|
} else if util.IsSpace(v) || v == '-' || v == '_' {
|
|
result = append(result, '-')
|
|
}
|
|
}
|
|
if len(result) == 0 {
|
|
if kind == ast.KindHeading {
|
|
result = []byte("heading")
|
|
} else {
|
|
result = []byte("id")
|
|
}
|
|
}
|
|
if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
|
|
s.values[util.BytesToReadOnlyString(result)] = true
|
|
return result
|
|
}
|
|
for i := 1; ; i++ {
|
|
newResult := fmt.Sprintf("%s-%d", result, i)
|
|
if _, ok := s.values[newResult]; !ok {
|
|
s.values[newResult] = true
|
|
return []byte(newResult)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
func (s *ids) Put(value []byte) {
|
|
s.values[util.BytesToReadOnlyString(value)] = true
|
|
}
|
|
|
|
// ContextKey is a key that is used to set arbitrary values to the context.
|
|
type ContextKey int
|
|
|
|
// ContextKeyMax is a maximum value of the ContextKey.
|
|
var ContextKeyMax ContextKey
|
|
|
|
// NewContextKey return a new ContextKey value.
|
|
func NewContextKey() ContextKey {
|
|
ContextKeyMax++
|
|
return ContextKeyMax
|
|
}
|
|
|
|
// A Context interface holds a information that are necessary to parse
|
|
// Markdown text.
|
|
type Context interface {
|
|
// String implements Stringer.
|
|
String() string
|
|
|
|
// Get returns a value associated with the given key.
|
|
Get(ContextKey) interface{}
|
|
|
|
// ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
|
|
ComputeIfAbsent(ContextKey, func() interface{}) interface{}
|
|
|
|
// Set sets the given value to the context.
|
|
Set(ContextKey, interface{})
|
|
|
|
// AddReference adds the given reference to this context.
|
|
AddReference(Reference)
|
|
|
|
// Reference returns (a reference, true) if a reference associated with
|
|
// the given label exists, otherwise (nil, false).
|
|
Reference(label string) (Reference, bool)
|
|
|
|
// References returns a list of references.
|
|
References() []Reference
|
|
|
|
// IDs returns a collection of the element ids.
|
|
IDs() IDs
|
|
|
|
// BlockOffset returns a first non-space character position on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
// BlockOffset returns -1 if current line is blank.
|
|
BlockOffset() int
|
|
|
|
// BlockOffset sets a first non-space character position on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
SetBlockOffset(int)
|
|
|
|
// BlockIndent returns an indent width on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
// BlockIndent returns -1 if current line is blank.
|
|
BlockIndent() int
|
|
|
|
// BlockIndent sets an indent width on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
SetBlockIndent(int)
|
|
|
|
// FirstDelimiter returns a first delimiter of the current delimiter list.
|
|
FirstDelimiter() *Delimiter
|
|
|
|
// LastDelimiter returns a last delimiter of the current delimiter list.
|
|
LastDelimiter() *Delimiter
|
|
|
|
// PushDelimiter appends the given delimiter to the tail of the current
|
|
// delimiter list.
|
|
PushDelimiter(delimiter *Delimiter)
|
|
|
|
// RemoveDelimiter removes the given delimiter from the current delimiter list.
|
|
RemoveDelimiter(d *Delimiter)
|
|
|
|
// ClearDelimiters clears the current delimiter list.
|
|
ClearDelimiters(bottom ast.Node)
|
|
|
|
// OpenedBlocks returns a list of nodes that are currently in parsing.
|
|
OpenedBlocks() []Block
|
|
|
|
// SetOpenedBlocks sets a list of nodes that are currently in parsing.
|
|
SetOpenedBlocks([]Block)
|
|
|
|
// LastOpenedBlock returns a last node that is currently in parsing.
|
|
LastOpenedBlock() Block
|
|
|
|
// IsInLinkLabel returns true if current position seems to be in link label.
|
|
IsInLinkLabel() bool
|
|
}
|
|
|
|
// A ContextConfig struct is a data structure that holds configuration of the Context.
|
|
type ContextConfig struct {
|
|
IDs IDs
|
|
}
|
|
|
|
// An ContextOption is a functional option type for the Context.
|
|
type ContextOption func(*ContextConfig)
|
|
|
|
// WithIDs is a functional option for the Context.
|
|
func WithIDs(ids IDs) ContextOption {
|
|
return func(c *ContextConfig) {
|
|
c.IDs = ids
|
|
}
|
|
}
|
|
|
|
type parseContext struct {
|
|
store []interface{}
|
|
ids IDs
|
|
refs map[string]Reference
|
|
blockOffset int
|
|
blockIndent int
|
|
delimiters *Delimiter
|
|
lastDelimiter *Delimiter
|
|
openedBlocks []Block
|
|
}
|
|
|
|
// NewContext returns a new Context.
|
|
func NewContext(options ...ContextOption) Context {
|
|
cfg := &ContextConfig{
|
|
IDs: newIDs(),
|
|
}
|
|
for _, option := range options {
|
|
option(cfg)
|
|
}
|
|
|
|
return &parseContext{
|
|
store: make([]interface{}, ContextKeyMax+1),
|
|
refs: map[string]Reference{},
|
|
ids: cfg.IDs,
|
|
blockOffset: -1,
|
|
blockIndent: -1,
|
|
delimiters: nil,
|
|
lastDelimiter: nil,
|
|
openedBlocks: []Block{},
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) Get(key ContextKey) interface{} {
|
|
return p.store[key]
|
|
}
|
|
|
|
func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
|
|
v := p.store[key]
|
|
if v == nil {
|
|
v = f()
|
|
p.store[key] = v
|
|
}
|
|
return v
|
|
}
|
|
|
|
func (p *parseContext) Set(key ContextKey, value interface{}) {
|
|
p.store[key] = value
|
|
}
|
|
|
|
func (p *parseContext) IDs() IDs {
|
|
return p.ids
|
|
}
|
|
|
|
func (p *parseContext) BlockOffset() int {
|
|
return p.blockOffset
|
|
}
|
|
|
|
func (p *parseContext) SetBlockOffset(v int) {
|
|
p.blockOffset = v
|
|
}
|
|
|
|
func (p *parseContext) BlockIndent() int {
|
|
return p.blockIndent
|
|
}
|
|
|
|
func (p *parseContext) SetBlockIndent(v int) {
|
|
p.blockIndent = v
|
|
}
|
|
|
|
func (p *parseContext) LastDelimiter() *Delimiter {
|
|
return p.lastDelimiter
|
|
}
|
|
|
|
func (p *parseContext) FirstDelimiter() *Delimiter {
|
|
return p.delimiters
|
|
}
|
|
|
|
func (p *parseContext) PushDelimiter(d *Delimiter) {
|
|
if p.delimiters == nil {
|
|
p.delimiters = d
|
|
p.lastDelimiter = d
|
|
} else {
|
|
l := p.lastDelimiter
|
|
p.lastDelimiter = d
|
|
l.NextDelimiter = d
|
|
d.PreviousDelimiter = l
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) RemoveDelimiter(d *Delimiter) {
|
|
if d.PreviousDelimiter == nil {
|
|
p.delimiters = d.NextDelimiter
|
|
} else {
|
|
d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
|
|
if d.NextDelimiter != nil {
|
|
d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
|
|
}
|
|
}
|
|
if d.NextDelimiter == nil {
|
|
p.lastDelimiter = d.PreviousDelimiter
|
|
}
|
|
if p.delimiters != nil {
|
|
p.delimiters.PreviousDelimiter = nil
|
|
}
|
|
if p.lastDelimiter != nil {
|
|
p.lastDelimiter.NextDelimiter = nil
|
|
}
|
|
d.NextDelimiter = nil
|
|
d.PreviousDelimiter = nil
|
|
if d.Length != 0 {
|
|
ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
|
|
} else {
|
|
d.Parent().RemoveChild(d.Parent(), d)
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) ClearDelimiters(bottom ast.Node) {
|
|
if p.lastDelimiter == nil {
|
|
return
|
|
}
|
|
var c ast.Node
|
|
for c = p.lastDelimiter; c != nil && c != bottom; {
|
|
prev := c.PreviousSibling()
|
|
if d, ok := c.(*Delimiter); ok {
|
|
p.RemoveDelimiter(d)
|
|
}
|
|
c = prev
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) AddReference(ref Reference) {
|
|
key := util.ToLinkReference(ref.Label())
|
|
if _, ok := p.refs[key]; !ok {
|
|
p.refs[key] = ref
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) Reference(label string) (Reference, bool) {
|
|
v, ok := p.refs[label]
|
|
return v, ok
|
|
}
|
|
|
|
func (p *parseContext) References() []Reference {
|
|
ret := make([]Reference, 0, len(p.refs))
|
|
for _, v := range p.refs {
|
|
ret = append(ret, v)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func (p *parseContext) String() string {
|
|
refs := []string{}
|
|
for _, r := range p.refs {
|
|
refs = append(refs, r.String())
|
|
}
|
|
|
|
return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
|
|
}
|
|
|
|
func (p *parseContext) OpenedBlocks() []Block {
|
|
return p.openedBlocks
|
|
}
|
|
|
|
func (p *parseContext) SetOpenedBlocks(v []Block) {
|
|
p.openedBlocks = v
|
|
}
|
|
|
|
func (p *parseContext) LastOpenedBlock() Block {
|
|
if l := len(p.openedBlocks); l != 0 {
|
|
return p.openedBlocks[l-1]
|
|
}
|
|
return Block{}
|
|
}
|
|
|
|
func (p *parseContext) IsInLinkLabel() bool {
|
|
tlist := p.Get(linkLabelStateKey)
|
|
return tlist != nil
|
|
}
|
|
|
|
// State represents parser's state.
|
|
// State is designed to use as a bit flag.
|
|
type State int
|
|
|
|
const (
|
|
// None is a default value of the [State].
|
|
None State = 1 << iota
|
|
|
|
// Continue indicates parser can continue parsing.
|
|
Continue
|
|
|
|
// Close indicates parser cannot parse anymore.
|
|
Close
|
|
|
|
// HasChildren indicates parser may have child blocks.
|
|
HasChildren
|
|
|
|
// NoChildren indicates parser does not have child blocks.
|
|
NoChildren
|
|
|
|
// RequireParagraph indicates parser requires that the last node
|
|
// must be a paragraph and is not converted to other nodes by
|
|
// ParagraphTransformers.
|
|
RequireParagraph
|
|
)
|
|
|
|
// A Config struct is a data structure that holds configuration of the Parser.
|
|
type Config struct {
|
|
Options map[OptionName]interface{}
|
|
BlockParsers util.PrioritizedSlice /*<BlockParser>*/
|
|
InlineParsers util.PrioritizedSlice /*<InlineParser>*/
|
|
ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
|
|
ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
|
|
EscapedSpace bool
|
|
}
|
|
|
|
// NewConfig returns a new Config.
|
|
func NewConfig() *Config {
|
|
return &Config{
|
|
Options: map[OptionName]interface{}{},
|
|
BlockParsers: util.PrioritizedSlice{},
|
|
InlineParsers: util.PrioritizedSlice{},
|
|
ParagraphTransformers: util.PrioritizedSlice{},
|
|
ASTTransformers: util.PrioritizedSlice{},
|
|
}
|
|
}
|
|
|
|
// An Option interface is a functional option type for the Parser.
|
|
type Option interface {
|
|
SetParserOption(*Config)
|
|
}
|
|
|
|
// OptionName is a name of parser options.
|
|
type OptionName string
|
|
|
|
// Attribute is an option name that spacify attributes of elements.
|
|
const optAttribute OptionName = "Attribute"
|
|
|
|
type withAttribute struct {
|
|
}
|
|
|
|
func (o *withAttribute) SetParserOption(c *Config) {
|
|
c.Options[optAttribute] = true
|
|
}
|
|
|
|
// WithAttribute is a functional option that enables custom attributes.
|
|
func WithAttribute() Option {
|
|
return &withAttribute{}
|
|
}
|
|
|
|
// A Parser interface parses Markdown text into AST nodes.
|
|
type Parser interface {
|
|
// Parse parses the given Markdown text into AST nodes.
|
|
Parse(reader text.Reader, opts ...ParseOption) ast.Node
|
|
|
|
// AddOption adds the given option to this parser.
|
|
AddOptions(...Option)
|
|
}
|
|
|
|
// A SetOptioner interface sets the given option to the object.
|
|
type SetOptioner interface {
|
|
// SetOption sets the given option to the object.
|
|
// Unacceptable options may be passed.
|
|
// Thus implementations must ignore unacceptable options.
|
|
SetOption(name OptionName, value interface{})
|
|
}
|
|
|
|
// A BlockParser interface parses a block level element like Paragraph, List,
|
|
// Blockquote etc.
|
|
type BlockParser interface {
|
|
// Trigger returns a list of characters that triggers Parse method of
|
|
// this parser.
|
|
// If Trigger returns a nil, Open will be called with any lines.
|
|
Trigger() []byte
|
|
|
|
// Open parses the current line and returns a result of parsing.
|
|
//
|
|
// Open must not parse beyond the current line.
|
|
// If Open has been able to parse the current line, Open must advance a reader
|
|
// position by consumed byte length.
|
|
//
|
|
// If Open has not been able to parse the current line, Open should returns
|
|
// (nil, NoChildren). If Open has been able to parse the current line, Open
|
|
// should returns a new Block node and returns HasChildren or NoChildren.
|
|
Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
|
|
|
|
// Continue parses the current line and returns a result of parsing.
|
|
//
|
|
// Continue must not parse beyond the current line.
|
|
// If Continue has been able to parse the current line, Continue must advance
|
|
// a reader position by consumed byte length.
|
|
//
|
|
// If Continue has not been able to parse the current line, Continue should
|
|
// returns Close. If Continue has been able to parse the current line,
|
|
// Continue should returns (Continue | NoChildren) or
|
|
// (Continue | HasChildren)
|
|
Continue(node ast.Node, reader text.Reader, pc Context) State
|
|
|
|
// Close will be called when the parser returns Close.
|
|
Close(node ast.Node, reader text.Reader, pc Context)
|
|
|
|
// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
|
|
// otherwise false.
|
|
CanInterruptParagraph() bool
|
|
|
|
// CanAcceptIndentedLine returns true if the parser can open new node when
|
|
// the given line is being indented more than 3 spaces.
|
|
CanAcceptIndentedLine() bool
|
|
}
|
|
|
|
// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
|
|
type InlineParser interface {
|
|
// Trigger returns a list of characters that triggers Parse method of
|
|
// this parser.
|
|
// Trigger characters must be a punctuation or a halfspace.
|
|
// Halfspaces triggers this parser when character is any spaces characters or
|
|
// a head of line
|
|
Trigger() []byte
|
|
|
|
// Parse parse the given block into an inline node.
|
|
//
|
|
// Parse can parse beyond the current line.
|
|
// If Parse has been able to parse the current line, it must advance a reader
|
|
// position by consumed byte length.
|
|
Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
|
|
}
|
|
|
|
// A CloseBlocker interface is a callback function that will be
|
|
// called when block is closed in the inline parsing.
|
|
type CloseBlocker interface {
|
|
// CloseBlock will be called when a block is closed.
|
|
CloseBlock(parent ast.Node, block text.Reader, pc Context)
|
|
}
|
|
|
|
// A ParagraphTransformer transforms parsed Paragraph nodes.
|
|
// For example, link references are searched in parsed Paragraphs.
|
|
type ParagraphTransformer interface {
|
|
// Transform transforms the given paragraph.
|
|
Transform(node *ast.Paragraph, reader text.Reader, pc Context)
|
|
}
|
|
|
|
// ASTTransformer transforms entire Markdown document AST tree.
|
|
type ASTTransformer interface {
|
|
// Transform transforms the given AST tree.
|
|
Transform(node *ast.Document, reader text.Reader, pc Context)
|
|
}
|
|
|
|
// DefaultBlockParsers returns a new list of default BlockParsers.
|
|
// Priorities of default BlockParsers are:
|
|
//
|
|
// SetextHeadingParser, 100
|
|
// ThematicBreakParser, 200
|
|
// ListParser, 300
|
|
// ListItemParser, 400
|
|
// CodeBlockParser, 500
|
|
// ATXHeadingParser, 600
|
|
// FencedCodeBlockParser, 700
|
|
// BlockquoteParser, 800
|
|
// HTMLBlockParser, 900
|
|
// ParagraphParser, 1000
|
|
func DefaultBlockParsers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(NewSetextHeadingParser(), 100),
|
|
util.Prioritized(NewThematicBreakParser(), 200),
|
|
util.Prioritized(NewListParser(), 300),
|
|
util.Prioritized(NewListItemParser(), 400),
|
|
util.Prioritized(NewCodeBlockParser(), 500),
|
|
util.Prioritized(NewATXHeadingParser(), 600),
|
|
util.Prioritized(NewFencedCodeBlockParser(), 700),
|
|
util.Prioritized(NewBlockquoteParser(), 800),
|
|
util.Prioritized(NewHTMLBlockParser(), 900),
|
|
util.Prioritized(NewParagraphParser(), 1000),
|
|
}
|
|
}
|
|
|
|
// DefaultInlineParsers returns a new list of default InlineParsers.
|
|
// Priorities of default InlineParsers are:
|
|
//
|
|
// CodeSpanParser, 100
|
|
// LinkParser, 200
|
|
// AutoLinkParser, 300
|
|
// RawHTMLParser, 400
|
|
// EmphasisParser, 500
|
|
func DefaultInlineParsers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(NewCodeSpanParser(), 100),
|
|
util.Prioritized(NewLinkParser(), 200),
|
|
util.Prioritized(NewAutoLinkParser(), 300),
|
|
util.Prioritized(NewRawHTMLParser(), 400),
|
|
util.Prioritized(NewEmphasisParser(), 500),
|
|
}
|
|
}
|
|
|
|
// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
|
|
// Priorities of default ParagraphTransformers are:
|
|
//
|
|
// LinkReferenceParagraphTransformer, 100
|
|
func DefaultParagraphTransformers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(LinkReferenceParagraphTransformer, 100),
|
|
}
|
|
}
|
|
|
|
// A Block struct holds a node and correspond parser pair.
|
|
type Block struct {
|
|
// Node is a BlockNode.
|
|
Node ast.Node
|
|
// Parser is a BlockParser.
|
|
Parser BlockParser
|
|
}
|
|
|
|
type parser struct {
|
|
options map[OptionName]interface{}
|
|
blockParsers [256][]BlockParser
|
|
freeBlockParsers []BlockParser
|
|
inlineParsers [256][]InlineParser
|
|
closeBlockers []CloseBlocker
|
|
paragraphTransformers []ParagraphTransformer
|
|
astTransformers []ASTTransformer
|
|
escapedSpace bool
|
|
config *Config
|
|
initSync sync.Once
|
|
}
|
|
|
|
type withBlockParsers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withBlockParsers) SetParserOption(c *Config) {
|
|
c.BlockParsers = append(c.BlockParsers, o.value...)
|
|
}
|
|
|
|
// WithBlockParsers is a functional option that allow you to add
|
|
// BlockParsers to the parser.
|
|
func WithBlockParsers(bs ...util.PrioritizedValue) Option {
|
|
return &withBlockParsers{bs}
|
|
}
|
|
|
|
type withInlineParsers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withInlineParsers) SetParserOption(c *Config) {
|
|
c.InlineParsers = append(c.InlineParsers, o.value...)
|
|
}
|
|
|
|
// WithInlineParsers is a functional option that allow you to add
|
|
// InlineParsers to the parser.
|
|
func WithInlineParsers(bs ...util.PrioritizedValue) Option {
|
|
return &withInlineParsers{bs}
|
|
}
|
|
|
|
type withParagraphTransformers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withParagraphTransformers) SetParserOption(c *Config) {
|
|
c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
|
|
}
|
|
|
|
// WithParagraphTransformers is a functional option that allow you to add
|
|
// ParagraphTransformers to the parser.
|
|
func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
|
|
return &withParagraphTransformers{ps}
|
|
}
|
|
|
|
type withASTTransformers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withASTTransformers) SetParserOption(c *Config) {
|
|
c.ASTTransformers = append(c.ASTTransformers, o.value...)
|
|
}
|
|
|
|
// WithASTTransformers is a functional option that allow you to add
|
|
// ASTTransformers to the parser.
|
|
func WithASTTransformers(ps ...util.PrioritizedValue) Option {
|
|
return &withASTTransformers{ps}
|
|
}
|
|
|
|
type withEscapedSpace struct {
|
|
}
|
|
|
|
func (o *withEscapedSpace) SetParserOption(c *Config) {
|
|
c.EscapedSpace = true
|
|
}
|
|
|
|
// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
|
|
func WithEscapedSpace() Option {
|
|
return &withEscapedSpace{}
|
|
}
|
|
|
|
type withOption struct {
|
|
name OptionName
|
|
value interface{}
|
|
}
|
|
|
|
func (o *withOption) SetParserOption(c *Config) {
|
|
c.Options[o.name] = o.value
|
|
}
|
|
|
|
// WithOption is a functional option that allow you to set
|
|
// an arbitrary option to the parser.
|
|
func WithOption(name OptionName, value interface{}) Option {
|
|
return &withOption{name, value}
|
|
}
|
|
|
|
// NewParser returns a new Parser with given options.
|
|
func NewParser(options ...Option) Parser {
|
|
config := NewConfig()
|
|
for _, opt := range options {
|
|
opt.SetParserOption(config)
|
|
}
|
|
|
|
p := &parser{
|
|
options: map[OptionName]interface{}{},
|
|
config: config,
|
|
}
|
|
|
|
return p
|
|
}
|
|
|
|
func (p *parser) AddOptions(opts ...Option) {
|
|
for _, opt := range opts {
|
|
opt.SetParserOption(p.config)
|
|
}
|
|
}
|
|
|
|
func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
bp, ok := v.Value.(BlockParser)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
|
|
}
|
|
tcs := bp.Trigger()
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
if tcs == nil {
|
|
p.freeBlockParsers = append(p.freeBlockParsers, bp)
|
|
} else {
|
|
for _, tc := range tcs {
|
|
if p.blockParsers[tc] == nil {
|
|
p.blockParsers[tc] = []BlockParser{}
|
|
}
|
|
p.blockParsers[tc] = append(p.blockParsers[tc], bp)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
ip, ok := v.Value.(InlineParser)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
|
|
}
|
|
tcs := ip.Trigger()
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
if cb, ok := ip.(CloseBlocker); ok {
|
|
p.closeBlockers = append(p.closeBlockers, cb)
|
|
}
|
|
for _, tc := range tcs {
|
|
if p.inlineParsers[tc] == nil {
|
|
p.inlineParsers[tc] = []InlineParser{}
|
|
}
|
|
p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
|
|
}
|
|
}
|
|
|
|
func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
pt, ok := v.Value.(ParagraphTransformer)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
|
|
}
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
p.paragraphTransformers = append(p.paragraphTransformers, pt)
|
|
}
|
|
|
|
func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
at, ok := v.Value.(ASTTransformer)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
|
|
}
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
p.astTransformers = append(p.astTransformers, at)
|
|
}
|
|
|
|
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
|
|
type ParseConfig struct {
|
|
Context Context
|
|
}
|
|
|
|
// A ParseOption is a functional option type for the Parser.Parse.
|
|
type ParseOption func(c *ParseConfig)
|
|
|
|
// WithContext is a functional option that allow you to override
|
|
// a default context.
|
|
func WithContext(context Context) ParseOption {
|
|
return func(c *ParseConfig) {
|
|
c.Context = context
|
|
}
|
|
}
|
|
|
|
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
|
|
p.initSync.Do(func() {
|
|
p.config.BlockParsers.Sort()
|
|
for _, v := range p.config.BlockParsers {
|
|
p.addBlockParser(v, p.config.Options)
|
|
}
|
|
for i := range p.blockParsers {
|
|
if p.blockParsers[i] != nil {
|
|
p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
|
|
}
|
|
}
|
|
|
|
p.config.InlineParsers.Sort()
|
|
for _, v := range p.config.InlineParsers {
|
|
p.addInlineParser(v, p.config.Options)
|
|
}
|
|
p.config.ParagraphTransformers.Sort()
|
|
for _, v := range p.config.ParagraphTransformers {
|
|
p.addParagraphTransformer(v, p.config.Options)
|
|
}
|
|
p.config.ASTTransformers.Sort()
|
|
for _, v := range p.config.ASTTransformers {
|
|
p.addASTTransformer(v, p.config.Options)
|
|
}
|
|
p.escapedSpace = p.config.EscapedSpace
|
|
p.config = nil
|
|
})
|
|
c := &ParseConfig{}
|
|
for _, opt := range opts {
|
|
opt(c)
|
|
}
|
|
if c.Context == nil {
|
|
c.Context = NewContext()
|
|
}
|
|
pc := c.Context
|
|
root := ast.NewDocument()
|
|
p.parseBlocks(root, reader, pc)
|
|
|
|
blockReader := text.NewBlockReader(reader.Source(), nil)
|
|
p.walkBlock(root, func(node ast.Node) {
|
|
p.parseBlock(blockReader, node, pc)
|
|
})
|
|
for _, at := range p.astTransformers {
|
|
at.Transform(root, reader, pc)
|
|
}
|
|
// root.Dump(reader.Source(), 0)
|
|
return root
|
|
}
|
|
|
|
func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
|
|
for _, pt := range p.paragraphTransformers {
|
|
pt.Transform(node, reader, pc)
|
|
if node.Parent() == nil {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
|
|
blocks := pc.OpenedBlocks()
|
|
for i := from; i >= to; i-- {
|
|
node := blocks[i].Node
|
|
paragraph, ok := node.(*ast.Paragraph)
|
|
if ok && node.Parent() != nil {
|
|
p.transformParagraph(paragraph, reader, pc)
|
|
}
|
|
if node.Parent() != nil { // closes only if node has not been transformed
|
|
blocks[i].Parser.Close(blocks[i].Node, reader, pc)
|
|
}
|
|
}
|
|
if from == len(blocks)-1 {
|
|
blocks = blocks[0:to]
|
|
} else {
|
|
blocks = append(blocks[0:to], blocks[from+1:]...)
|
|
}
|
|
pc.SetOpenedBlocks(blocks)
|
|
}
|
|
|
|
type blockOpenResult int
|
|
|
|
const (
|
|
paragraphContinuation blockOpenResult = iota + 1
|
|
newBlocksOpened
|
|
noBlocksOpened
|
|
)
|
|
|
|
func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
|
|
result := blockOpenResult(noBlocksOpened)
|
|
continuable := false
|
|
lastBlock := pc.LastOpenedBlock()
|
|
if lastBlock.Node != nil {
|
|
continuable = ast.IsParagraph(lastBlock.Node)
|
|
}
|
|
retry:
|
|
var bps []BlockParser
|
|
line, _ := reader.PeekLine()
|
|
w, pos := util.IndentWidth(line, reader.LineOffset())
|
|
if w >= len(line) {
|
|
pc.SetBlockOffset(-1)
|
|
pc.SetBlockIndent(-1)
|
|
} else {
|
|
pc.SetBlockOffset(pos)
|
|
pc.SetBlockIndent(w)
|
|
}
|
|
if line == nil || line[0] == '\n' {
|
|
goto continuable
|
|
}
|
|
bps = p.freeBlockParsers
|
|
if pos < len(line) {
|
|
bps = p.blockParsers[line[pos]]
|
|
if bps == nil {
|
|
bps = p.freeBlockParsers
|
|
}
|
|
}
|
|
if bps == nil {
|
|
goto continuable
|
|
}
|
|
|
|
for _, bp := range bps {
|
|
if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
|
|
continue
|
|
}
|
|
if w > 3 && !bp.CanAcceptIndentedLine() {
|
|
continue
|
|
}
|
|
lastBlock = pc.LastOpenedBlock()
|
|
last := lastBlock.Node
|
|
node, state := bp.Open(parent, reader, pc)
|
|
if node != nil {
|
|
// Parser requires last node to be a paragraph.
|
|
// With table extension:
|
|
//
|
|
// 0
|
|
// -:
|
|
// -
|
|
//
|
|
// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
|
|
// are being paragraph when the Settext heading parser tries to parse the 3rd
|
|
// line.
|
|
// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
|
|
// by a paragraph transformer. So this text should be converted to a table and
|
|
// an empty list.
|
|
if state&RequireParagraph != 0 {
|
|
if last == parent.LastChild() {
|
|
// Opened paragraph may be transformed by ParagraphTransformers in
|
|
// closeBlocks().
|
|
lastBlock.Parser.Close(last, reader, pc)
|
|
blocks := pc.OpenedBlocks()
|
|
pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
|
|
if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
|
|
// Paragraph has been transformed.
|
|
// So this parser is considered as failing.
|
|
continuable = false
|
|
goto retry
|
|
}
|
|
}
|
|
}
|
|
node.SetBlankPreviousLines(blankLine)
|
|
if last != nil && last.Parent() == nil {
|
|
lastPos := len(pc.OpenedBlocks()) - 1
|
|
p.closeBlocks(lastPos, lastPos, reader, pc)
|
|
}
|
|
parent.AppendChild(parent, node)
|
|
result = newBlocksOpened
|
|
be := Block{node, bp}
|
|
pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
|
|
if state&HasChildren != 0 {
|
|
parent = node
|
|
goto retry // try child block
|
|
}
|
|
break // no children, can not open more blocks on this line
|
|
}
|
|
}
|
|
|
|
continuable:
|
|
if result == noBlocksOpened && continuable {
|
|
state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
|
|
if state&Continue != 0 {
|
|
result = paragraphContinuation
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
type lineStat struct {
|
|
lineNum int
|
|
level int
|
|
isBlank bool
|
|
}
|
|
|
|
func isBlankLine(lineNum, level int, stats []lineStat) bool {
|
|
ret := true
|
|
for i := len(stats) - 1 - level; i >= 0; i-- {
|
|
ret = false
|
|
s := stats[i]
|
|
if s.lineNum == lineNum {
|
|
if s.level < level && s.isBlank {
|
|
return true
|
|
} else if s.level == level {
|
|
return s.isBlank
|
|
}
|
|
}
|
|
if s.lineNum < lineNum {
|
|
return ret
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
|
|
pc.SetOpenedBlocks([]Block{})
|
|
blankLines := make([]lineStat, 0, 128)
|
|
var isBlank bool
|
|
for { // process blocks separated by blank lines
|
|
_, lines, ok := reader.SkipBlankLines()
|
|
if !ok {
|
|
return
|
|
}
|
|
lineNum, _ := reader.Position()
|
|
if lines != 0 {
|
|
blankLines = blankLines[0:0]
|
|
l := len(pc.OpenedBlocks())
|
|
for i := 0; i < l; i++ {
|
|
blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
|
|
}
|
|
}
|
|
isBlank = isBlankLine(lineNum-1, 0, blankLines)
|
|
// first, we try to open blocks
|
|
if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
|
|
return
|
|
}
|
|
reader.AdvanceLine()
|
|
for { // process opened blocks line by line
|
|
openedBlocks := pc.OpenedBlocks()
|
|
l := len(openedBlocks)
|
|
if l == 0 {
|
|
break
|
|
}
|
|
lastIndex := l - 1
|
|
for i := 0; i < l; i++ {
|
|
be := openedBlocks[i]
|
|
line, _ := reader.PeekLine()
|
|
if line == nil {
|
|
p.closeBlocks(lastIndex, 0, reader, pc)
|
|
reader.AdvanceLine()
|
|
return
|
|
}
|
|
lineNum, _ := reader.Position()
|
|
blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
|
|
// If node is a paragraph, p.openBlocks determines whether it is continuable.
|
|
// So we do not process paragraphs here.
|
|
if !ast.IsParagraph(be.Node) {
|
|
state := be.Parser.Continue(be.Node, reader, pc)
|
|
if state&Continue != 0 {
|
|
// When current node is a container block and has no children,
|
|
// we try to open new child nodes
|
|
if state&HasChildren != 0 && i == lastIndex {
|
|
isBlank = isBlankLine(lineNum-1, i, blankLines)
|
|
p.openBlocks(be.Node, isBlank, reader, pc)
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
// current node may be closed or lazy continuation
|
|
isBlank = isBlankLine(lineNum-1, i, blankLines)
|
|
thisParent := parent
|
|
if i != 0 {
|
|
thisParent = openedBlocks[i-1].Node
|
|
}
|
|
lastNode := openedBlocks[lastIndex].Node
|
|
result := p.openBlocks(thisParent, isBlank, reader, pc)
|
|
if result != paragraphContinuation {
|
|
// lastNode is a paragraph and was transformed by the paragraph
|
|
// transformers.
|
|
if openedBlocks[lastIndex].Node != lastNode {
|
|
lastIndex--
|
|
}
|
|
p.closeBlocks(lastIndex, i, reader, pc)
|
|
}
|
|
break
|
|
}
|
|
|
|
reader.AdvanceLine()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
|
|
for c := block.FirstChild(); c != nil; c = c.NextSibling() {
|
|
p.walkBlock(c, cb)
|
|
}
|
|
cb(block)
|
|
}
|
|
|
|
const (
|
|
lineBreakHard uint8 = 1 << iota
|
|
lineBreakSoft
|
|
lineBreakVisible
|
|
)
|
|
|
|
func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
|
|
if parent.IsRaw() {
|
|
return
|
|
}
|
|
escaped := false
|
|
source := block.Source()
|
|
block.Reset(parent.Lines())
|
|
for {
|
|
retry:
|
|
line, _ := block.PeekLine()
|
|
if line == nil {
|
|
break
|
|
}
|
|
lineLength := len(line)
|
|
var lineBreakFlags uint8
|
|
hasNewLine := line[lineLength-1] == '\n'
|
|
if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
|
|
line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
|
|
lineLength -= 2
|
|
lineBreakFlags |= lineBreakHard | lineBreakVisible
|
|
} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
|
|
line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
|
|
hasNewLine { // ends with \\r\n
|
|
lineLength -= 3
|
|
lineBreakFlags |= lineBreakHard | lineBreakVisible
|
|
} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
|
|
hasNewLine { // ends with [space][space]\n
|
|
lineLength -= 3
|
|
lineBreakFlags |= lineBreakHard
|
|
} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
|
|
line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
|
|
lineLength -= 4
|
|
lineBreakFlags |= lineBreakHard
|
|
} else if hasNewLine {
|
|
// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
|
|
// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
|
|
// See https://spec.commonmark.org/0.30/#soft-line-breaks
|
|
lineBreakFlags |= lineBreakSoft
|
|
}
|
|
|
|
l, startPosition := block.Position()
|
|
n := 0
|
|
for i := 0; i < lineLength; i++ {
|
|
c := line[i]
|
|
if c == '\n' {
|
|
break
|
|
}
|
|
isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
|
|
isPunct := util.IsPunct(c)
|
|
if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
|
|
parserChar := c
|
|
if isSpace || (i == 0 && !isPunct) {
|
|
parserChar = ' '
|
|
}
|
|
ips := p.inlineParsers[parserChar]
|
|
if ips != nil {
|
|
block.Advance(n)
|
|
n = 0
|
|
savedLine, savedPosition := block.Position()
|
|
if i != 0 {
|
|
_, currentPosition := block.Position()
|
|
ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
|
|
_, startPosition = block.Position()
|
|
}
|
|
var inlineNode ast.Node
|
|
for _, ip := range ips {
|
|
inlineNode = ip.Parse(parent, block, pc)
|
|
if inlineNode != nil {
|
|
break
|
|
}
|
|
block.SetPosition(savedLine, savedPosition)
|
|
}
|
|
if inlineNode != nil {
|
|
parent.AppendChild(parent, inlineNode)
|
|
goto retry
|
|
}
|
|
}
|
|
}
|
|
if escaped {
|
|
escaped = false
|
|
n++
|
|
continue
|
|
}
|
|
|
|
if c == '\\' {
|
|
escaped = true
|
|
n++
|
|
continue
|
|
}
|
|
|
|
escaped = false
|
|
n++
|
|
}
|
|
if n != 0 {
|
|
block.Advance(n)
|
|
}
|
|
currentL, currentPosition := block.Position()
|
|
if l != currentL {
|
|
continue
|
|
}
|
|
diff := startPosition.Between(currentPosition)
|
|
var text *ast.Text
|
|
if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
|
|
text = ast.NewTextSegment(diff)
|
|
} else {
|
|
text = ast.NewTextSegment(diff.TrimRightSpace(source))
|
|
}
|
|
text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
|
|
text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
|
|
parent.AppendChild(parent, text)
|
|
block.AdvanceLine()
|
|
}
|
|
|
|
ProcessDelimiters(nil, pc)
|
|
for _, ip := range p.closeBlockers {
|
|
ip.CloseBlock(parent, block, pc)
|
|
}
|
|
}
|