// GoToSocial // Copyright (C) GoToSocial Authors admin@gotosocial.org // SPDX-License-Identifier: AGPL-3.0-or-later // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU Affero General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Affero General Public License for more details. // // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . package text import ( "bytes" "context" "regexp" "strings" "codeberg.org/gruf/go-byteutil" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/renderer/html" ) // FromMarkdown fulfils FormatFunc by parsing // the given markdown input into a FormatResult. // // Inline (aka unsafe) HTML elements are allowed, // as they should be sanitized afterwards anyway. func (f *Formatter) FromMarkdown( ctx context.Context, parseMention gtsmodel.ParseMentionFunc, authorID string, statusID string, input string, ) *FormatResult { return f.fromMarkdown( ctx, false, // basic = false parseMention, authorID, statusID, input, ) } // FromMarkdownBasic fulfils FormatFunc by parsing // the given markdown input into a FormatResult. // // Unlike FromMarkdown, it will only parse emojis with // the custom renderer, leaving aside mentions and tags. // // Inline (aka unsafe) HTML elements are not allowed. // // If the result is a single paragraph, // it will not be wrapped in

tags. func (f *Formatter) FromMarkdownBasic( ctx context.Context, parseMention gtsmodel.ParseMentionFunc, authorID string, statusID string, input string, ) *FormatResult { res := f.fromMarkdown( ctx, true, // basic = true parseMention, authorID, statusID, input, ) res.HTML = unwrapParagraph(res.HTML) return res } // fromMarkdown parses the given input text either // with or without emojis, and returns the result. func (f *Formatter) fromMarkdown( ctx context.Context, basic bool, parseMention gtsmodel.ParseMentionFunc, authorID string, statusID string, input string, ) *FormatResult { var ( result = new(FormatResult) opts []renderer.Option ) if basic { // Don't allow raw HTML tags, // markdown syntax only. opts = []renderer.Option{ html.WithXHTML(), html.WithHardWraps(), } } else { opts = []renderer.Option{ html.WithXHTML(), html.WithHardWraps(), // Allow raw HTML tags, we // sanitize at the end anyway. html.WithUnsafe(), } } // Instantiate goldmark parser for // markdown, using custom renderer // to add hashtag/mention links. md := goldmark.New( goldmark.WithRendererOptions( opts..., ), goldmark.WithExtensions( &customRenderer{ ctx, f.db, parseMention, authorID, statusID, // If basic, pass // emojiOnly = true. basic, result, }, // Turns URLs into links. extension.NewLinkify( extension.WithLinkifyURLRegexp(regexes.LinkScheme), ), extension.Strikethrough, ), ) // Convert input string to bytes // without performing any allocs. bInput := byteutil.S2B(input) // Parse input into HTML. var htmlBytes bytes.Buffer if err := md.Convert( bInput, &htmlBytes, ); err != nil { log.Errorf(ctx, "error formatting markdown input to HTML: %s", err) } // Clean and shrink HTML. result.HTML = byteutil.B2S(htmlBytes.Bytes()) result.HTML = SanitizeHTML(result.HTML) result.HTML = MinifyHTML(result.HTML) return result } var parasRegexp = regexp.MustCompile(``) // unwrapParagraph removes opening and closing paragraph tags // of input HTML, if input html is a single paragraph only. func unwrapParagraph(html string) string { if !strings.HasPrefix(html, "

") { return html } if !strings.HasSuffix(html, "

") { return html } // Make a substring excluding the // opening and closing paragraph tags. sub := html[3 : len(html)-4] // If there are still other paragraph tags left // inside the substring, return html unchanged. containsOtherParas := parasRegexp.MatchString(sub) if containsOtherParas { return html } // Return the substring. return sub }