gotosocial/vendor/github.com/yuin/goldmark/parser/parser.go

1260 lines
33 KiB
Go
Raw Normal View History

// Package parser contains stuff that are related to parsing a Markdown text.
package parser
import (
"fmt"
"strings"
"sync"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
// A Reference interface represents a link reference in Markdown text.
type Reference interface {
// String implements Stringer.
String() string
// Label returns a label of the reference.
Label() []byte
// Destination returns a destination(URL) of the reference.
Destination() []byte
// Title returns a title of the reference.
Title() []byte
}
type reference struct {
label []byte
destination []byte
title []byte
}
// NewReference returns a new Reference.
func NewReference(label, destination, title []byte) Reference {
return &reference{label, destination, title}
}
func (r *reference) Label() []byte {
return r.label
}
func (r *reference) Destination() []byte {
return r.destination
}
func (r *reference) Title() []byte {
return r.title
}
func (r *reference) String() string {
return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
}
// An IDs interface is a collection of the element ids.
type IDs interface {
// Generate generates a new element id.
Generate(value []byte, kind ast.NodeKind) []byte
// Put puts a given element id to the used ids table.
Put(value []byte)
}
type ids struct {
values map[string]bool
}
func newIDs() IDs {
return &ids{
values: map[string]bool{},
}
}
func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
value = util.TrimLeftSpace(value)
value = util.TrimRightSpace(value)
result := []byte{}
for i := 0; i < len(value); {
v := value[i]
l := util.UTF8Len(v)
i += int(l)
if l != 1 {
continue
}
if util.IsAlphaNumeric(v) {
if 'A' <= v && v <= 'Z' {
v += 'a' - 'A'
}
result = append(result, v)
} else if util.IsSpace(v) || v == '-' || v == '_' {
result = append(result, '-')
}
}
if len(result) == 0 {
if kind == ast.KindHeading {
result = []byte("heading")
} else {
result = []byte("id")
}
}
if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
s.values[util.BytesToReadOnlyString(result)] = true
return result
}
for i := 1; ; i++ {
newResult := fmt.Sprintf("%s-%d", result, i)
if _, ok := s.values[newResult]; !ok {
s.values[newResult] = true
return []byte(newResult)
}
}
}
func (s *ids) Put(value []byte) {
s.values[util.BytesToReadOnlyString(value)] = true
}
// ContextKey is a key that is used to set arbitrary values to the context.
type ContextKey int
// ContextKeyMax is a maximum value of the ContextKey.
var ContextKeyMax ContextKey
// NewContextKey return a new ContextKey value.
func NewContextKey() ContextKey {
ContextKeyMax++
return ContextKeyMax
}
// A Context interface holds a information that are necessary to parse
// Markdown text.
type Context interface {
// String implements Stringer.
String() string
// Get returns a value associated with the given key.
Get(ContextKey) interface{}
// ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
ComputeIfAbsent(ContextKey, func() interface{}) interface{}
// Set sets the given value to the context.
Set(ContextKey, interface{})
// AddReference adds the given reference to this context.
AddReference(Reference)
// Reference returns (a reference, true) if a reference associated with
// the given label exists, otherwise (nil, false).
Reference(label string) (Reference, bool)
// References returns a list of references.
References() []Reference
// IDs returns a collection of the element ids.
IDs() IDs
// BlockOffset returns a first non-space character position on current line.
// This value is valid only for BlockParser.Open.
// BlockOffset returns -1 if current line is blank.
BlockOffset() int
// BlockOffset sets a first non-space character position on current line.
// This value is valid only for BlockParser.Open.
SetBlockOffset(int)
// BlockIndent returns an indent width on current line.
// This value is valid only for BlockParser.Open.
// BlockIndent returns -1 if current line is blank.
BlockIndent() int
// BlockIndent sets an indent width on current line.
// This value is valid only for BlockParser.Open.
SetBlockIndent(int)
// FirstDelimiter returns a first delimiter of the current delimiter list.
FirstDelimiter() *Delimiter
// LastDelimiter returns a last delimiter of the current delimiter list.
LastDelimiter() *Delimiter
// PushDelimiter appends the given delimiter to the tail of the current
// delimiter list.
PushDelimiter(delimiter *Delimiter)
// RemoveDelimiter removes the given delimiter from the current delimiter list.
RemoveDelimiter(d *Delimiter)
// ClearDelimiters clears the current delimiter list.
ClearDelimiters(bottom ast.Node)
// OpenedBlocks returns a list of nodes that are currently in parsing.
OpenedBlocks() []Block
// SetOpenedBlocks sets a list of nodes that are currently in parsing.
SetOpenedBlocks([]Block)
// LastOpenedBlock returns a last node that is currently in parsing.
LastOpenedBlock() Block
// IsInLinkLabel returns true if current position seems to be in link label.
IsInLinkLabel() bool
}
// A ContextConfig struct is a data structure that holds configuration of the Context.
type ContextConfig struct {
IDs IDs
}
// An ContextOption is a functional option type for the Context.
type ContextOption func(*ContextConfig)
// WithIDs is a functional option for the Context.
func WithIDs(ids IDs) ContextOption {
return func(c *ContextConfig) {
c.IDs = ids
}
}
type parseContext struct {
store []interface{}
ids IDs
refs map[string]Reference
blockOffset int
blockIndent int
delimiters *Delimiter
lastDelimiter *Delimiter
openedBlocks []Block
}
// NewContext returns a new Context.
func NewContext(options ...ContextOption) Context {
cfg := &ContextConfig{
IDs: newIDs(),
}
for _, option := range options {
option(cfg)
}
return &parseContext{
store: make([]interface{}, ContextKeyMax+1),
refs: map[string]Reference{},
ids: cfg.IDs,
blockOffset: -1,
blockIndent: -1,
delimiters: nil,
lastDelimiter: nil,
openedBlocks: []Block{},
}
}
func (p *parseContext) Get(key ContextKey) interface{} {
return p.store[key]
}
func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
v := p.store[key]
if v == nil {
v = f()
p.store[key] = v
}
return v
}
func (p *parseContext) Set(key ContextKey, value interface{}) {
p.store[key] = value
}
func (p *parseContext) IDs() IDs {
return p.ids
}
func (p *parseContext) BlockOffset() int {
return p.blockOffset
}
func (p *parseContext) SetBlockOffset(v int) {
p.blockOffset = v
}
func (p *parseContext) BlockIndent() int {
return p.blockIndent
}
func (p *parseContext) SetBlockIndent(v int) {
p.blockIndent = v
}
func (p *parseContext) LastDelimiter() *Delimiter {
return p.lastDelimiter
}
func (p *parseContext) FirstDelimiter() *Delimiter {
return p.delimiters
}
func (p *parseContext) PushDelimiter(d *Delimiter) {
if p.delimiters == nil {
p.delimiters = d
p.lastDelimiter = d
} else {
l := p.lastDelimiter
p.lastDelimiter = d
l.NextDelimiter = d
d.PreviousDelimiter = l
}
}
func (p *parseContext) RemoveDelimiter(d *Delimiter) {
if d.PreviousDelimiter == nil {
p.delimiters = d.NextDelimiter
} else {
d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
if d.NextDelimiter != nil {
d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
}
}
if d.NextDelimiter == nil {
p.lastDelimiter = d.PreviousDelimiter
}
if p.delimiters != nil {
p.delimiters.PreviousDelimiter = nil
}
if p.lastDelimiter != nil {
p.lastDelimiter.NextDelimiter = nil
}
d.NextDelimiter = nil
d.PreviousDelimiter = nil
if d.Length != 0 {
ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
} else {
d.Parent().RemoveChild(d.Parent(), d)
}
}
func (p *parseContext) ClearDelimiters(bottom ast.Node) {
if p.lastDelimiter == nil {
return
}
var c ast.Node
for c = p.lastDelimiter; c != nil && c != bottom; {
prev := c.PreviousSibling()
if d, ok := c.(*Delimiter); ok {
p.RemoveDelimiter(d)
}
c = prev
}
}
func (p *parseContext) AddReference(ref Reference) {
key := util.ToLinkReference(ref.Label())
if _, ok := p.refs[key]; !ok {
p.refs[key] = ref
}
}
func (p *parseContext) Reference(label string) (Reference, bool) {
v, ok := p.refs[label]
return v, ok
}
func (p *parseContext) References() []Reference {
ret := make([]Reference, 0, len(p.refs))
for _, v := range p.refs {
ret = append(ret, v)
}
return ret
}
func (p *parseContext) String() string {
refs := []string{}
for _, r := range p.refs {
refs = append(refs, r.String())
}
return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
}
func (p *parseContext) OpenedBlocks() []Block {
return p.openedBlocks
}
func (p *parseContext) SetOpenedBlocks(v []Block) {
p.openedBlocks = v
}
func (p *parseContext) LastOpenedBlock() Block {
if l := len(p.openedBlocks); l != 0 {
return p.openedBlocks[l-1]
}
return Block{}
}
func (p *parseContext) IsInLinkLabel() bool {
tlist := p.Get(linkLabelStateKey)
return tlist != nil
}
// State represents parser's state.
// State is designed to use as a bit flag.
type State int
const (
// None is a default value of the [State].
None State = 1 << iota
// Continue indicates parser can continue parsing.
Continue
// Close indicates parser cannot parse anymore.
Close
// HasChildren indicates parser may have child blocks.
HasChildren
// NoChildren indicates parser does not have child blocks.
NoChildren
// RequireParagraph indicates parser requires that the last node
// must be a paragraph and is not converted to other nodes by
// ParagraphTransformers.
RequireParagraph
)
// A Config struct is a data structure that holds configuration of the Parser.
type Config struct {
Options map[OptionName]interface{}
BlockParsers util.PrioritizedSlice /*<BlockParser>*/
InlineParsers util.PrioritizedSlice /*<InlineParser>*/
ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
EscapedSpace bool
}
// NewConfig returns a new Config.
func NewConfig() *Config {
return &Config{
Options: map[OptionName]interface{}{},
BlockParsers: util.PrioritizedSlice{},
InlineParsers: util.PrioritizedSlice{},
ParagraphTransformers: util.PrioritizedSlice{},
ASTTransformers: util.PrioritizedSlice{},
}
}
// An Option interface is a functional option type for the Parser.
type Option interface {
SetParserOption(*Config)
}
// OptionName is a name of parser options.
type OptionName string
// Attribute is an option name that spacify attributes of elements.
const optAttribute OptionName = "Attribute"
type withAttribute struct {
}
func (o *withAttribute) SetParserOption(c *Config) {
c.Options[optAttribute] = true
}
// WithAttribute is a functional option that enables custom attributes.
func WithAttribute() Option {
return &withAttribute{}
}
// A Parser interface parses Markdown text into AST nodes.
type Parser interface {
// Parse parses the given Markdown text into AST nodes.
Parse(reader text.Reader, opts ...ParseOption) ast.Node
// AddOption adds the given option to this parser.
AddOptions(...Option)
}
// A SetOptioner interface sets the given option to the object.
type SetOptioner interface {
// SetOption sets the given option to the object.
// Unacceptable options may be passed.
// Thus implementations must ignore unacceptable options.
SetOption(name OptionName, value interface{})
}
// A BlockParser interface parses a block level element like Paragraph, List,
// Blockquote etc.
type BlockParser interface {
// Trigger returns a list of characters that triggers Parse method of
// this parser.
// If Trigger returns a nil, Open will be called with any lines.
Trigger() []byte
// Open parses the current line and returns a result of parsing.
//
// Open must not parse beyond the current line.
// If Open has been able to parse the current line, Open must advance a reader
// position by consumed byte length.
//
// If Open has not been able to parse the current line, Open should returns
// (nil, NoChildren). If Open has been able to parse the current line, Open
// should returns a new Block node and returns HasChildren or NoChildren.
Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
// Continue parses the current line and returns a result of parsing.
//
// Continue must not parse beyond the current line.
// If Continue has been able to parse the current line, Continue must advance
// a reader position by consumed byte length.
//
// If Continue has not been able to parse the current line, Continue should
// returns Close. If Continue has been able to parse the current line,
// Continue should returns (Continue | NoChildren) or
// (Continue | HasChildren)
Continue(node ast.Node, reader text.Reader, pc Context) State
// Close will be called when the parser returns Close.
Close(node ast.Node, reader text.Reader, pc Context)
// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
// otherwise false.
CanInterruptParagraph() bool
// CanAcceptIndentedLine returns true if the parser can open new node when
// the given line is being indented more than 3 spaces.
CanAcceptIndentedLine() bool
}
// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
type InlineParser interface {
// Trigger returns a list of characters that triggers Parse method of
// this parser.
// Trigger characters must be a punctuation or a halfspace.
// Halfspaces triggers this parser when character is any spaces characters or
// a head of line
Trigger() []byte
// Parse parse the given block into an inline node.
//
// Parse can parse beyond the current line.
// If Parse has been able to parse the current line, it must advance a reader
// position by consumed byte length.
Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
}
// A CloseBlocker interface is a callback function that will be
// called when block is closed in the inline parsing.
type CloseBlocker interface {
// CloseBlock will be called when a block is closed.
CloseBlock(parent ast.Node, block text.Reader, pc Context)
}
// A ParagraphTransformer transforms parsed Paragraph nodes.
// For example, link references are searched in parsed Paragraphs.
type ParagraphTransformer interface {
// Transform transforms the given paragraph.
Transform(node *ast.Paragraph, reader text.Reader, pc Context)
}
// ASTTransformer transforms entire Markdown document AST tree.
type ASTTransformer interface {
// Transform transforms the given AST tree.
Transform(node *ast.Document, reader text.Reader, pc Context)
}
// DefaultBlockParsers returns a new list of default BlockParsers.
// Priorities of default BlockParsers are:
//
// SetextHeadingParser, 100
// ThematicBreakParser, 200
// ListParser, 300
// ListItemParser, 400
// CodeBlockParser, 500
// ATXHeadingParser, 600
// FencedCodeBlockParser, 700
// BlockquoteParser, 800
// HTMLBlockParser, 900
// ParagraphParser, 1000
func DefaultBlockParsers() []util.PrioritizedValue {
return []util.PrioritizedValue{
util.Prioritized(NewSetextHeadingParser(), 100),
util.Prioritized(NewThematicBreakParser(), 200),
util.Prioritized(NewListParser(), 300),
util.Prioritized(NewListItemParser(), 400),
util.Prioritized(NewCodeBlockParser(), 500),
util.Prioritized(NewATXHeadingParser(), 600),
util.Prioritized(NewFencedCodeBlockParser(), 700),
util.Prioritized(NewBlockquoteParser(), 800),
util.Prioritized(NewHTMLBlockParser(), 900),
util.Prioritized(NewParagraphParser(), 1000),
}
}
// DefaultInlineParsers returns a new list of default InlineParsers.
// Priorities of default InlineParsers are:
//
// CodeSpanParser, 100
// LinkParser, 200
// AutoLinkParser, 300
// RawHTMLParser, 400
// EmphasisParser, 500
func DefaultInlineParsers() []util.PrioritizedValue {
return []util.PrioritizedValue{
util.Prioritized(NewCodeSpanParser(), 100),
util.Prioritized(NewLinkParser(), 200),
util.Prioritized(NewAutoLinkParser(), 300),
util.Prioritized(NewRawHTMLParser(), 400),
util.Prioritized(NewEmphasisParser(), 500),
}
}
// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
// Priorities of default ParagraphTransformers are:
//
// LinkReferenceParagraphTransformer, 100
func DefaultParagraphTransformers() []util.PrioritizedValue {
return []util.PrioritizedValue{
util.Prioritized(LinkReferenceParagraphTransformer, 100),
}
}
// A Block struct holds a node and correspond parser pair.
type Block struct {
// Node is a BlockNode.
Node ast.Node
// Parser is a BlockParser.
Parser BlockParser
}
type parser struct {
options map[OptionName]interface{}
blockParsers [256][]BlockParser
freeBlockParsers []BlockParser
inlineParsers [256][]InlineParser
closeBlockers []CloseBlocker
paragraphTransformers []ParagraphTransformer
astTransformers []ASTTransformer
escapedSpace bool
config *Config
initSync sync.Once
}
type withBlockParsers struct {
value []util.PrioritizedValue
}
func (o *withBlockParsers) SetParserOption(c *Config) {
c.BlockParsers = append(c.BlockParsers, o.value...)
}
// WithBlockParsers is a functional option that allow you to add
// BlockParsers to the parser.
func WithBlockParsers(bs ...util.PrioritizedValue) Option {
return &withBlockParsers{bs}
}
type withInlineParsers struct {
value []util.PrioritizedValue
}
func (o *withInlineParsers) SetParserOption(c *Config) {
c.InlineParsers = append(c.InlineParsers, o.value...)
}
// WithInlineParsers is a functional option that allow you to add
// InlineParsers to the parser.
func WithInlineParsers(bs ...util.PrioritizedValue) Option {
return &withInlineParsers{bs}
}
type withParagraphTransformers struct {
value []util.PrioritizedValue
}
func (o *withParagraphTransformers) SetParserOption(c *Config) {
c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
}
// WithParagraphTransformers is a functional option that allow you to add
// ParagraphTransformers to the parser.
func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
return &withParagraphTransformers{ps}
}
type withASTTransformers struct {
value []util.PrioritizedValue
}
func (o *withASTTransformers) SetParserOption(c *Config) {
c.ASTTransformers = append(c.ASTTransformers, o.value...)
}
// WithASTTransformers is a functional option that allow you to add
// ASTTransformers to the parser.
func WithASTTransformers(ps ...util.PrioritizedValue) Option {
return &withASTTransformers{ps}
}
type withEscapedSpace struct {
}
func (o *withEscapedSpace) SetParserOption(c *Config) {
c.EscapedSpace = true
}
// WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
func WithEscapedSpace() Option {
return &withEscapedSpace{}
}
type withOption struct {
name OptionName
value interface{}
}
func (o *withOption) SetParserOption(c *Config) {
c.Options[o.name] = o.value
}
// WithOption is a functional option that allow you to set
// an arbitrary option to the parser.
func WithOption(name OptionName, value interface{}) Option {
return &withOption{name, value}
}
// NewParser returns a new Parser with given options.
func NewParser(options ...Option) Parser {
config := NewConfig()
for _, opt := range options {
opt.SetParserOption(config)
}
p := &parser{
options: map[OptionName]interface{}{},
config: config,
}
return p
}
func (p *parser) AddOptions(opts ...Option) {
for _, opt := range opts {
opt.SetParserOption(p.config)
}
}
func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
bp, ok := v.Value.(BlockParser)
if !ok {
panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
}
tcs := bp.Trigger()
so, ok := v.Value.(SetOptioner)
if ok {
for oname, ovalue := range options {
so.SetOption(oname, ovalue)
}
}
if tcs == nil {
p.freeBlockParsers = append(p.freeBlockParsers, bp)
} else {
for _, tc := range tcs {
if p.blockParsers[tc] == nil {
p.blockParsers[tc] = []BlockParser{}
}
p.blockParsers[tc] = append(p.blockParsers[tc], bp)
}
}
}
func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
ip, ok := v.Value.(InlineParser)
if !ok {
panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
}
tcs := ip.Trigger()
so, ok := v.Value.(SetOptioner)
if ok {
for oname, ovalue := range options {
so.SetOption(oname, ovalue)
}
}
if cb, ok := ip.(CloseBlocker); ok {
p.closeBlockers = append(p.closeBlockers, cb)
}
for _, tc := range tcs {
if p.inlineParsers[tc] == nil {
p.inlineParsers[tc] = []InlineParser{}
}
p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
}
}
func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
pt, ok := v.Value.(ParagraphTransformer)
if !ok {
panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
}
so, ok := v.Value.(SetOptioner)
if ok {
for oname, ovalue := range options {
so.SetOption(oname, ovalue)
}
}
p.paragraphTransformers = append(p.paragraphTransformers, pt)
}
func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
at, ok := v.Value.(ASTTransformer)
if !ok {
panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
}
so, ok := v.Value.(SetOptioner)
if ok {
for oname, ovalue := range options {
so.SetOption(oname, ovalue)
}
}
p.astTransformers = append(p.astTransformers, at)
}
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
type ParseConfig struct {
Context Context
}
// A ParseOption is a functional option type for the Parser.Parse.
type ParseOption func(c *ParseConfig)
// WithContext is a functional option that allow you to override
// a default context.
func WithContext(context Context) ParseOption {
return func(c *ParseConfig) {
c.Context = context
}
}
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
p.initSync.Do(func() {
p.config.BlockParsers.Sort()
for _, v := range p.config.BlockParsers {
p.addBlockParser(v, p.config.Options)
}
for i := range p.blockParsers {
if p.blockParsers[i] != nil {
p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
}
}
p.config.InlineParsers.Sort()
for _, v := range p.config.InlineParsers {
p.addInlineParser(v, p.config.Options)
}
p.config.ParagraphTransformers.Sort()
for _, v := range p.config.ParagraphTransformers {
p.addParagraphTransformer(v, p.config.Options)
}
p.config.ASTTransformers.Sort()
for _, v := range p.config.ASTTransformers {
p.addASTTransformer(v, p.config.Options)
}
p.escapedSpace = p.config.EscapedSpace
p.config = nil
})
c := &ParseConfig{}
for _, opt := range opts {
opt(c)
}
if c.Context == nil {
c.Context = NewContext()
}
pc := c.Context
root := ast.NewDocument()
p.parseBlocks(root, reader, pc)
blockReader := text.NewBlockReader(reader.Source(), nil)
p.walkBlock(root, func(node ast.Node) {
p.parseBlock(blockReader, node, pc)
})
for _, at := range p.astTransformers {
at.Transform(root, reader, pc)
}
// root.Dump(reader.Source(), 0)
return root
}
func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
for _, pt := range p.paragraphTransformers {
pt.Transform(node, reader, pc)
if node.Parent() == nil {
return true
}
}
return false
}
func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
blocks := pc.OpenedBlocks()
for i := from; i >= to; i-- {
node := blocks[i].Node
paragraph, ok := node.(*ast.Paragraph)
if ok && node.Parent() != nil {
p.transformParagraph(paragraph, reader, pc)
}
if node.Parent() != nil { // closes only if node has not been transformed
blocks[i].Parser.Close(blocks[i].Node, reader, pc)
}
}
if from == len(blocks)-1 {
blocks = blocks[0:to]
} else {
blocks = append(blocks[0:to], blocks[from+1:]...)
}
pc.SetOpenedBlocks(blocks)
}
type blockOpenResult int
const (
paragraphContinuation blockOpenResult = iota + 1
newBlocksOpened
noBlocksOpened
)
func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
result := blockOpenResult(noBlocksOpened)
continuable := false
lastBlock := pc.LastOpenedBlock()
if lastBlock.Node != nil {
continuable = ast.IsParagraph(lastBlock.Node)
}
retry:
var bps []BlockParser
line, _ := reader.PeekLine()
w, pos := util.IndentWidth(line, reader.LineOffset())
if w >= len(line) {
pc.SetBlockOffset(-1)
pc.SetBlockIndent(-1)
} else {
pc.SetBlockOffset(pos)
pc.SetBlockIndent(w)
}
if line == nil || line[0] == '\n' {
goto continuable
}
bps = p.freeBlockParsers
if pos < len(line) {
bps = p.blockParsers[line[pos]]
if bps == nil {
bps = p.freeBlockParsers
}
}
if bps == nil {
goto continuable
}
for _, bp := range bps {
if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
continue
}
if w > 3 && !bp.CanAcceptIndentedLine() {
continue
}
lastBlock = pc.LastOpenedBlock()
last := lastBlock.Node
node, state := bp.Open(parent, reader, pc)
if node != nil {
// Parser requires last node to be a paragraph.
// With table extension:
//
// 0
// -:
// -
//
// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
// are being paragraph when the Settext heading parser tries to parse the 3rd
// line.
// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
// by a paragraph transformer. So this text should be converted to a table and
// an empty list.
if state&RequireParagraph != 0 {
if last == parent.LastChild() {
// Opened paragraph may be transformed by ParagraphTransformers in
// closeBlocks().
lastBlock.Parser.Close(last, reader, pc)
blocks := pc.OpenedBlocks()
pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
// Paragraph has been transformed.
// So this parser is considered as failing.
continuable = false
goto retry
}
}
}
node.SetBlankPreviousLines(blankLine)
if last != nil && last.Parent() == nil {
lastPos := len(pc.OpenedBlocks()) - 1
p.closeBlocks(lastPos, lastPos, reader, pc)
}
parent.AppendChild(parent, node)
result = newBlocksOpened
be := Block{node, bp}
pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
if state&HasChildren != 0 {
parent = node
goto retry // try child block
}
break // no children, can not open more blocks on this line
}
}
continuable:
if result == noBlocksOpened && continuable {
state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
if state&Continue != 0 {
result = paragraphContinuation
}
}
return result
}
type lineStat struct {
lineNum int
level int
isBlank bool
}
func isBlankLine(lineNum, level int, stats []lineStat) bool {
ret := true
for i := len(stats) - 1 - level; i >= 0; i-- {
ret = false
s := stats[i]
if s.lineNum == lineNum {
if s.level < level && s.isBlank {
return true
} else if s.level == level {
return s.isBlank
}
}
if s.lineNum < lineNum {
return ret
}
}
return ret
}
func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
pc.SetOpenedBlocks([]Block{})
blankLines := make([]lineStat, 0, 128)
var isBlank bool
for { // process blocks separated by blank lines
_, lines, ok := reader.SkipBlankLines()
if !ok {
return
}
lineNum, _ := reader.Position()
if lines != 0 {
blankLines = blankLines[0:0]
l := len(pc.OpenedBlocks())
for i := 0; i < l; i++ {
blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
}
}
isBlank = isBlankLine(lineNum-1, 0, blankLines)
// first, we try to open blocks
if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
return
}
reader.AdvanceLine()
for { // process opened blocks line by line
openedBlocks := pc.OpenedBlocks()
l := len(openedBlocks)
if l == 0 {
break
}
lastIndex := l - 1
for i := 0; i < l; i++ {
be := openedBlocks[i]
line, _ := reader.PeekLine()
if line == nil {
p.closeBlocks(lastIndex, 0, reader, pc)
reader.AdvanceLine()
return
}
lineNum, _ := reader.Position()
blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
// If node is a paragraph, p.openBlocks determines whether it is continuable.
// So we do not process paragraphs here.
if !ast.IsParagraph(be.Node) {
state := be.Parser.Continue(be.Node, reader, pc)
if state&Continue != 0 {
// When current node is a container block and has no children,
// we try to open new child nodes
if state&HasChildren != 0 && i == lastIndex {
isBlank = isBlankLine(lineNum-1, i, blankLines)
p.openBlocks(be.Node, isBlank, reader, pc)
break
}
continue
}
}
// current node may be closed or lazy continuation
isBlank = isBlankLine(lineNum-1, i, blankLines)
thisParent := parent
if i != 0 {
thisParent = openedBlocks[i-1].Node
}
lastNode := openedBlocks[lastIndex].Node
result := p.openBlocks(thisParent, isBlank, reader, pc)
if result != paragraphContinuation {
// lastNode is a paragraph and was transformed by the paragraph
// transformers.
if openedBlocks[lastIndex].Node != lastNode {
lastIndex--
}
p.closeBlocks(lastIndex, i, reader, pc)
}
break
}
reader.AdvanceLine()
}
}
}
func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
for c := block.FirstChild(); c != nil; c = c.NextSibling() {
p.walkBlock(c, cb)
}
cb(block)
}
const (
lineBreakHard uint8 = 1 << iota
lineBreakSoft
lineBreakVisible
)
func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
if parent.IsRaw() {
return
}
escaped := false
source := block.Source()
block.Reset(parent.Lines())
for {
retry:
line, _ := block.PeekLine()
if line == nil {
break
}
lineLength := len(line)
var lineBreakFlags uint8
hasNewLine := line[lineLength-1] == '\n'
if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
lineLength -= 2
lineBreakFlags |= lineBreakHard | lineBreakVisible
} else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
hasNewLine { // ends with \\r\n
lineLength -= 3
lineBreakFlags |= lineBreakHard | lineBreakVisible
} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
hasNewLine { // ends with [space][space]\n
lineLength -= 3
lineBreakFlags |= lineBreakHard
} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
lineLength -= 4
lineBreakFlags |= lineBreakHard
} else if hasNewLine {
// If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
// If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
// See https://spec.commonmark.org/0.30/#soft-line-breaks
lineBreakFlags |= lineBreakSoft
}
l, startPosition := block.Position()
n := 0
for i := 0; i < lineLength; i++ {
c := line[i]
if c == '\n' {
break
}
isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
isPunct := util.IsPunct(c)
if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
parserChar := c
if isSpace || (i == 0 && !isPunct) {
parserChar = ' '
}
ips := p.inlineParsers[parserChar]
if ips != nil {
block.Advance(n)
n = 0
savedLine, savedPosition := block.Position()
if i != 0 {
_, currentPosition := block.Position()
ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
_, startPosition = block.Position()
}
var inlineNode ast.Node
for _, ip := range ips {
inlineNode = ip.Parse(parent, block, pc)
if inlineNode != nil {
break
}
block.SetPosition(savedLine, savedPosition)
}
if inlineNode != nil {
parent.AppendChild(parent, inlineNode)
goto retry
}
}
}
if escaped {
escaped = false
n++
continue
}
if c == '\\' {
escaped = true
n++
continue
}
escaped = false
n++
}
if n != 0 {
block.Advance(n)
}
currentL, currentPosition := block.Position()
if l != currentL {
continue
}
diff := startPosition.Between(currentPosition)
var text *ast.Text
if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
text = ast.NewTextSegment(diff)
} else {
text = ast.NewTextSegment(diff.TrimRightSpace(source))
}
text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
parent.AppendChild(parent, text)
block.AdvanceLine()
}
ProcessDelimiters(nil, pc)
for _, ip := range p.closeBlockers {
ip.CloseBlock(parent, block, pc)
}
}