mirror of
https://github.com/superseriousbusiness/gotosocial.git
synced 2025-03-10 15:48:52 +01:00
* [feature] Parse content warning as HTML, serialize via API to plaintext * tidy up some cruft * whoops * oops * i'm da joker baybee * clemency muy lorde * rename some of the text functions for clarity * jiggle the opts * fiddle de deee * hopefully the last test fix i ever have to do in my beautiful life
195 lines
4.6 KiB
Go
195 lines
4.6 KiB
Go
// GoToSocial
|
|
// Copyright (C) GoToSocial Authors admin@gotosocial.org
|
|
// SPDX-License-Identifier: AGPL-3.0-or-later
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package text
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"codeberg.org/gruf/go-byteutil"
|
|
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
|
|
"github.com/superseriousbusiness/gotosocial/internal/log"
|
|
"github.com/superseriousbusiness/gotosocial/internal/regexes"
|
|
"github.com/yuin/goldmark"
|
|
"github.com/yuin/goldmark/extension"
|
|
"github.com/yuin/goldmark/renderer"
|
|
"github.com/yuin/goldmark/renderer/html"
|
|
)
|
|
|
|
// FromMarkdown fulfils FormatFunc by parsing
|
|
// the given markdown input into a FormatResult.
|
|
//
|
|
// Inline (aka unsafe) HTML elements are allowed,
|
|
// as they should be sanitized afterwards anyway.
|
|
func (f *Formatter) FromMarkdown(
|
|
ctx context.Context,
|
|
parseMention gtsmodel.ParseMentionFunc,
|
|
authorID string,
|
|
statusID string,
|
|
input string,
|
|
) *FormatResult {
|
|
return f.fromMarkdown(
|
|
ctx,
|
|
false, // basic = false
|
|
parseMention,
|
|
authorID,
|
|
statusID,
|
|
input,
|
|
)
|
|
}
|
|
|
|
// FromMarkdownBasic fulfils FormatFunc by parsing
|
|
// the given markdown input into a FormatResult.
|
|
//
|
|
// Unlike FromMarkdown, it will only parse emojis with
|
|
// the custom renderer, leaving aside mentions and tags.
|
|
//
|
|
// Inline (aka unsafe) HTML elements are not allowed.
|
|
//
|
|
// If the result is a single paragraph,
|
|
// it will not be wrapped in <p> tags.
|
|
func (f *Formatter) FromMarkdownBasic(
|
|
ctx context.Context,
|
|
parseMention gtsmodel.ParseMentionFunc,
|
|
authorID string,
|
|
statusID string,
|
|
input string,
|
|
) *FormatResult {
|
|
res := f.fromMarkdown(
|
|
ctx,
|
|
true, // basic = true
|
|
parseMention,
|
|
authorID,
|
|
statusID,
|
|
input,
|
|
)
|
|
|
|
res.HTML = unwrapParagraph(res.HTML)
|
|
return res
|
|
}
|
|
|
|
// fromMarkdown parses the given input text either
|
|
// with or without emojis, and returns the result.
|
|
func (f *Formatter) fromMarkdown(
|
|
ctx context.Context,
|
|
basic bool,
|
|
parseMention gtsmodel.ParseMentionFunc,
|
|
authorID string,
|
|
statusID string,
|
|
input string,
|
|
) *FormatResult {
|
|
var (
|
|
result = new(FormatResult)
|
|
opts []renderer.Option
|
|
)
|
|
|
|
if basic {
|
|
// Don't allow raw HTML tags,
|
|
// markdown syntax only.
|
|
opts = []renderer.Option{
|
|
html.WithXHTML(),
|
|
html.WithHardWraps(),
|
|
}
|
|
} else {
|
|
opts = []renderer.Option{
|
|
html.WithXHTML(),
|
|
html.WithHardWraps(),
|
|
|
|
// Allow raw HTML tags, we
|
|
// sanitize at the end anyway.
|
|
html.WithUnsafe(),
|
|
}
|
|
}
|
|
|
|
// Instantiate goldmark parser for
|
|
// markdown, using custom renderer
|
|
// to add hashtag/mention links.
|
|
md := goldmark.New(
|
|
goldmark.WithRendererOptions(
|
|
opts...,
|
|
),
|
|
goldmark.WithExtensions(
|
|
&customRenderer{
|
|
ctx,
|
|
f.db,
|
|
parseMention,
|
|
authorID,
|
|
statusID,
|
|
// If basic, pass
|
|
// emojiOnly = true.
|
|
basic,
|
|
result,
|
|
},
|
|
// Turns URLs into links.
|
|
extension.NewLinkify(
|
|
extension.WithLinkifyURLRegexp(regexes.LinkScheme),
|
|
),
|
|
extension.Strikethrough,
|
|
),
|
|
)
|
|
|
|
// Convert input string to bytes
|
|
// without performing any allocs.
|
|
bInput := byteutil.S2B(input)
|
|
|
|
// Parse input into HTML.
|
|
var htmlBytes bytes.Buffer
|
|
if err := md.Convert(
|
|
bInput,
|
|
&htmlBytes,
|
|
); err != nil {
|
|
log.Errorf(ctx, "error formatting markdown input to HTML: %s", err)
|
|
}
|
|
|
|
// Clean and shrink HTML.
|
|
result.HTML = byteutil.B2S(htmlBytes.Bytes())
|
|
result.HTML = SanitizeHTML(result.HTML)
|
|
result.HTML = MinifyHTML(result.HTML)
|
|
|
|
return result
|
|
}
|
|
|
|
var parasRegexp = regexp.MustCompile(`</?p>`)
|
|
|
|
// unwrapParagraph removes opening and closing paragraph tags
|
|
// of input HTML, if input html is a single paragraph only.
|
|
func unwrapParagraph(html string) string {
|
|
if !strings.HasPrefix(html, "<p>") {
|
|
return html
|
|
}
|
|
|
|
if !strings.HasSuffix(html, "</p>") {
|
|
return html
|
|
}
|
|
|
|
// Make a substring excluding the
|
|
// opening and closing paragraph tags.
|
|
sub := html[3 : len(html)-4]
|
|
|
|
// If there are still other paragraph tags left
|
|
// inside the substring, return html unchanged.
|
|
containsOtherParas := parasRegexp.MatchString(sub)
|
|
if containsOtherParas {
|
|
return html
|
|
}
|
|
|
|
// Return the substring.
|
|
return sub
|
|
}
|