From 1c0f8b0147516968397d09360375dcb3e3934b65 Mon Sep 17 00:00:00 2001
From: Daniel Roe
Date: Sun, 4 Dec 2022 22:10:10 +0000
Subject: [PATCH] perf: replace `parse5` with `ultrahtml` (#336)
---
components/status/StatusActionsMore.vue | 6 +-
composables/content.ts | 152 ++++++++----------
composables/statusDrafts.ts | 4 +-
package.json | 2 +-
pnpm-lock.yaml | 10 +-
tests/__snapshots__/content-rich.test.ts.snap | 34 +---
tests/__snapshots__/html-parse.test.ts.snap | 23 +--
tests/html-parse.test.ts | 6 +-
8 files changed, 94 insertions(+), 143 deletions(-)
diff --git a/components/status/StatusActionsMore.vue b/components/status/StatusActionsMore.vue
index 7cf740ba..21be6eb9 100644
--- a/components/status/StatusActionsMore.vue
+++ b/components/status/StatusActionsMore.vue
@@ -68,7 +68,7 @@ const deleteAndRedraft = async () => {
}
const { text } = await useMasto().statuses.remove(status.id)
- openPublishDialog('dialog', getDraftFromStatus(status, text), true)
+ openPublishDialog('dialog', await getDraftFromStatus(status, text), true)
}
const reply = () => {
@@ -81,9 +81,9 @@ const reply = () => {
}
}
-function editStatus() {
+async function editStatus() {
openPublishDialog(`edit-${status.id}`, {
- ...getDraftFromStatus(status),
+ ...await getDraftFromStatus(status),
editingStatus: status,
})
}
diff --git a/composables/content.ts b/composables/content.ts
index 4d44ad59..b8b81ed3 100644
--- a/composables/content.ts
+++ b/composables/content.ts
@@ -1,48 +1,45 @@
import type { Emoji } from 'masto'
-import type { DefaultTreeAdapterMap } from 'parse5'
-import { parseFragment, serialize } from 'parse5'
+import type { Node } from 'ultrahtml'
+import { TEXT_NODE, parse, render, walkSync } from 'ultrahtml'
import type { VNode } from 'vue'
import { Fragment, h, isVNode } from 'vue'
import { RouterLink } from 'vue-router'
import ContentCode from '~/components/content/ContentCode.vue'
import AccountHoverWrapper from '~/components/account/AccountHoverWrapper.vue'
-type Node = DefaultTreeAdapterMap['childNode']
-type Element = DefaultTreeAdapterMap['element']
-
-function handleMention(el: Element) {
+function handleMention(el: Node) {
// Redirect mentions to the user page
- if (el.tagName === 'a' && el.attrs.find(i => i.name === 'class' && i.value.includes('mention'))) {
- const href = el.attrs.find(i => i.name === 'href')
+ if (el.name === 'a' && el.attributes.class?.includes('mention')) {
+ const href = el.attributes.href
if (href) {
- const matchUser = href.value.match(UserLinkRE)
+ const matchUser = href.match(UserLinkRE)
if (matchUser) {
const [, server, username] = matchUser
const handle = `@${username}@${server.replace(/(.+\.)(.+\..+)/, '$2')}`
- href.value = `/${server}/@${username}`
+ el.attributes.href = `/${server}/@${username}`
return h(AccountHoverWrapper, { handle, class: 'inline-block' }, () => nodeToVNode(el))
}
- const matchTag = href.value.match(TagLinkRE)
+ const matchTag = href.match(TagLinkRE)
if (matchTag) {
const [, , name] = matchTag
- href.value = `/${currentServer.value}/tags/${name}`
+ el.attributes.href = `/${currentServer.value}/tags/${name}`
}
}
}
return undefined
}
-function handleCodeBlock(el: Element) {
- if (el.tagName === 'pre' && el.childNodes[0]?.nodeName === 'code') {
- const codeEl = el.childNodes[0] as Element
- const classes = codeEl.attrs.find(i => i.name === 'class')?.value
+function handleCodeBlock(el: Node) {
+ if (el.name === 'pre' && el.children[0]?.name === 'code') {
+ const codeEl = el.children[0] as Node
+ const classes = codeEl.attributes.class as string
const lang = classes?.split(/\s/g).find(i => i.startsWith('language-'))?.replace('language-', '')
- const code = codeEl.childNodes[0] ? treeToText(codeEl.childNodes[0]) : ''
+ const code = codeEl.children[0] ? treeToText(codeEl.children[0]) : ''
return h(ContentCode, { lang, code: encodeURIComponent(code) })
}
}
-function handleNode(el: Element) {
+function handleNode(el: Node) {
return handleCodeBlock(el) || handleMention(el) || el
}
@@ -51,7 +48,7 @@ function handleNode(el: Element) {
* with interop of custom emojis and inline Markdown syntax
*/
export function parseMastodonHTML(html: string, customEmojis: Record = {}) {
- const processed = html
+ let processed = html
// custom emojis
.replace(/:([\w-]+?):/g, (_, name) => {
const emoji = customEmojis[name]
@@ -66,36 +63,36 @@ export function parseMastodonHTML(html: string, customEmojis: Record${code}
`
})
- const tree = parseFragment(processed)
-
- function walk(node: Node) {
- if ('childNodes' in node)
- node.childNodes = node.childNodes.flatMap(n => walk(n))
-
- if (node.nodeName === '#text') {
- // @ts-expect-error casing
- const text = node.value as string
- const converted = text
- .replace(/\*\*\*(.*?)\*\*\*/g, '$1')
- .replace(/\*\*(.*?)\*\*/g, '$1')
- .replace(/\*(.*?)\*/g, '$1')
- .replace(/~~(.*?)~~/g, '$1')
- .replace(/`([^`]+?)`/g, '$1
')
-
- if (converted !== text)
- return parseFragment(converted).childNodes
+ walkSync(parse(processed), (node) => {
+ if (node.type !== TEXT_NODE)
+ return
+ const replacements = [
+ [/\*\*\*(.*?)\*\*\*/g, '$1'],
+ [/\*\*(.*?)\*\*/g, '$1'],
+ [/\*(.*?)\*/g, '$1'],
+ [/~~(.*?)~~/g, '$1'],
+ [/`([^`]+?)`/g, '$1
'],
+ ] as const
+ for (const [re, replacement] of replacements) {
+ for (const match of node.value.matchAll(re)) {
+ if (node.loc) {
+ const start = match.index! + node.loc[0].start
+ const end = start + match[0].length + node.loc[0].start
+ processed = processed.slice(0, start) + match[0].replace(re, replacement) + processed.slice(end)
+ }
+ else {
+ processed = processed.replace(match[0], match[0].replace(re, replacement))
+ }
+ }
}
- return [node]
- }
+ })
- tree.childNodes = tree.childNodes.flatMap(n => walk(n))
-
- return tree
+ return parse(processed)
}
-export function convertMastodonHTML(html: string, customEmojis: Record = {}) {
+export async function convertMastodonHTML(html: string, customEmojis: Record = {}) {
const tree = parseMastodonHTML(html, customEmojis)
- return serialize(tree)
+ return await render(tree)
}
/**
@@ -106,31 +103,28 @@ export function contentToVNode(
customEmojis: Record = {},
): VNode {
const tree = parseMastodonHTML(content, customEmojis)
- return h(Fragment, tree.childNodes.map(n => treeToVNode(n)))
+ return h(Fragment, (tree.children as Node[]).map(n => treeToVNode(n)))
}
function nodeToVNode(node: Node): VNode | string | null {
- if (node.nodeName === '#text') {
- // @ts-expect-error casing
- return input.value as string
- }
+ if (node.type === TEXT_NODE)
+ return node.value
- if ('childNodes' in node) {
- const attrs = Object.fromEntries(node.attrs.map(i => [i.name, i.value]))
- if (node.nodeName === 'a' && (attrs.href?.startsWith('/') || attrs.href?.startsWith('.'))) {
- attrs.to = attrs.href
- delete attrs.href
- delete attrs.target
+ if ('children' in node) {
+ if (node.name === 'a' && (node.attributes.href?.startsWith('/') || node.attributes.href?.startsWith('.'))) {
+ node.attributes.to = node.attributes.href
+ delete node.attributes.href
+ delete node.attributes.target
return h(
RouterLink as any,
- attrs,
- () => node.childNodes.map(treeToVNode),
+ node.attributes,
+ () => node.children.map(treeToVNode),
)
}
return h(
- node.nodeName,
- attrs,
- node.childNodes.map(treeToVNode),
+ node.name,
+ node.attributes,
+ node.children.map(treeToVNode),
)
}
return null
@@ -139,12 +133,10 @@ function nodeToVNode(node: Node): VNode | string | null {
function treeToVNode(
input: Node,
): VNode | string | null {
- if (input.nodeName === '#text') {
- // @ts-expect-error casing
+ if (input.type === TEXT_NODE)
return input.value as string
- }
- if ('childNodes' in input) {
+ if ('children' in input) {
const node = handleNode(input)
if (node == null)
return null
@@ -156,8 +148,8 @@ function treeToVNode(
}
export function htmlToText(html: string) {
- const tree = parseFragment(html)
- return tree.childNodes.map(n => treeToText(n)).join('').trim()
+ const tree = parse(html)
+ return (tree.children as Node[]).map(n => treeToText(n)).join('').trim()
}
export function treeToText(input: Node): string {
@@ -165,20 +157,18 @@ export function treeToText(input: Node): string {
let body = ''
let post = ''
- if (input.nodeName === '#text')
- // @ts-expect-error casing
+ if (input.type === TEXT_NODE)
return input.value
- if (input.nodeName === 'br')
+ if (input.name === 'br')
return '\n'
- if (['p', 'pre'].includes(input.nodeName))
+ if (['p', 'pre'].includes(input.name))
pre = '\n'
- if (input.nodeName === 'code') {
- if (input.parentNode?.nodeName === 'pre') {
- const clz = input.attrs.find(attr => attr.name === 'class')
- const lang = clz?.value.replace('language-', '')
+ if (input.name === 'code') {
+ if (input.parent?.name === 'pre') {
+ const lang = input.attributes.class?.replace('language-', '')
pre = `\`\`\`${lang || ''}\n`
post = '\n```'
@@ -188,24 +178,24 @@ export function treeToText(input: Node): string {
post = '`'
}
}
- else if (input.nodeName === 'b' || input.nodeName === 'strong') {
+ else if (input.name === 'b' || input.name === 'strong') {
pre = '**'
post = '**'
}
- else if (input.nodeName === 'i' || input.nodeName === 'em') {
+ else if (input.name === 'i' || input.name === 'em') {
pre = '*'
post = '*'
}
- else if (input.nodeName === 'del') {
+ else if (input.name === 'del') {
pre = '~~'
post = '~~'
}
- if ('childNodes' in input)
- body = input.childNodes.map(n => treeToText(n)).join('')
+ if ('children' in input)
+ body = (input.children as Node[]).map(n => treeToText(n)).join('')
- if (input.nodeName === 'img' && input.attrs.some(attr => attr.name === 'class' && attr.value.includes('custom-emoji')))
- return `:${input.attrs.find(attr => attr.name === 'data-emoji-id')?.value}:`
+ if (input.name === 'img' && input.attributes.class?.includes('custom-emoji'))
+ return `:${input.attributes['data-emoji-id']}:`
return pre + body + post
}
diff --git a/composables/statusDrafts.ts b/composables/statusDrafts.ts
index 1aa85393..fa59e181 100644
--- a/composables/statusDrafts.ts
+++ b/composables/statusDrafts.ts
@@ -40,9 +40,9 @@ export function getDefaultDraft(options: Partial {
return getDefaultDraft({
- status: text || convertMastodonHTML(status.content),
+ status: text || await convertMastodonHTML(status.content),
mediaIds: status.mediaAttachments.map(att => att.id),
visibility: status.visibility,
attachments: status.mediaAttachments,
diff --git a/package.json b/package.json
index edbed7fa..26d07ab4 100644
--- a/package.json
+++ b/package.json
@@ -61,7 +61,6 @@
"lru-cache": "^7.14.1",
"masto": "^4.7.5",
"nuxt": "^3.0.0",
- "parse5": "^7.1.2",
"pinia": "^2.0.27",
"postcss-nested": "^6.0.0",
"prettier": "^2.8.0",
@@ -74,6 +73,7 @@
"tippy.js": "^6.3.7",
"typescript": "^4.9.3",
"ufo": "^1.0.1",
+ "ultrahtml": "^1.0.4",
"unplugin-auto-import": "^0.12.0",
"vite-plugin-inspect": "^0.7.9",
"vitest": "^0.25.3",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 0cc8b531..0b5a7a41 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -44,7 +44,6 @@ specifiers:
lru-cache: ^7.14.1
masto: ^4.7.5
nuxt: ^3.0.0
- parse5: ^7.1.2
pinia: ^2.0.27
postcss-nested: ^6.0.0
prettier: ^2.8.0
@@ -57,6 +56,7 @@ specifiers:
tippy.js: ^6.3.7
typescript: ^4.9.3
ufo: ^1.0.1
+ ultrahtml: ^1.0.4
unplugin-auto-import: ^0.12.0
vite-plugin-inspect: ^0.7.9
vitest: ^0.25.3
@@ -108,7 +108,6 @@ devDependencies:
lru-cache: 7.14.1
masto: 4.7.5
nuxt: 3.0.0_s5ps7njkmjlaqajutnox5ntcla
- parse5: 7.1.2
pinia: 2.0.27_typescript@4.9.3
postcss-nested: 6.0.0
prettier: 2.8.0
@@ -121,6 +120,7 @@ devDependencies:
tippy.js: 6.3.7
typescript: 4.9.3
ufo: 1.0.1
+ ultrahtml: 1.0.4
unplugin-auto-import: 0.12.0
vite-plugin-inspect: 0.7.9
vitest: 0.25.3
@@ -6367,12 +6367,6 @@ packages:
parse-path: 7.0.0
dev: true
- /parse5/7.1.2:
- resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==}
- dependencies:
- entities: 4.4.0
- dev: true
-
/parseurl/1.3.3:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'}
diff --git a/tests/__snapshots__/content-rich.test.ts.snap b/tests/__snapshots__/content-rich.test.ts.snap
index 6e129390..bbf017ab 100644
--- a/tests/__snapshots__/content-rich.test.ts.snap
+++ b/tests/__snapshots__/content-rich.test.ts.snap
@@ -1,45 +1,25 @@
// Vitest Snapshot v1
exports[`content-rich > code frame 1`] = `
-"Testing code block
-
-
-import { useMouse, usePreferredDark } from '@vueuse/core'
-
+"Testing code block
import { useMouse, usePreferredDark } from '@vueuse/core'
// tracks mouse position
const { x, y } = useMouse()
// is the user prefers dark theme
-const isDark = usePreferredDark()
-
-"
+const isDark = usePreferredDark()
"
`;
exports[`content-rich > code frame 2 1`] = `
"
-
+
Testing
+
const a = hello
-const a = hello
-
"
`;
-exports[`content-rich > code frame empty 1`] = `
-"
-
-
-
-"
-`;
+exports[`content-rich > code frame empty 1`] = `"
"`;
-exports[`content-rich > code frame no lang 1`] = `
-"
-hello world
-
no lang
-
-"
-`;
+exports[`content-rich > code frame no lang 1`] = `"hello world
no lang"`;
exports[`content-rich > custom emoji 1`] = `
"Daniel Roe
@@ -61,7 +41,7 @@ exports[`content-rich > link + mention 1`] = `
>
(migrated from chai+mocha)
diff --git a/tests/__snapshots__/html-parse.test.ts.snap b/tests/__snapshots__/html-parse.test.ts.snap
index da4a8a19..ffcd8d65 100644
--- a/tests/__snapshots__/html-parse.test.ts.snap
+++ b/tests/__snapshots__/html-parse.test.ts.snap
@@ -1,24 +1,18 @@
// Vitest Snapshot v1
exports[`html-parse > code frame > html 1`] = `
-"Testing code block
-
-import { useMouse, usePreferredDark } from '@vueuse/core'
-
+"Testing code block
import { useMouse, usePreferredDark } from '@vueuse/core'
// tracks mouse position
const { x, y } = useMouse()
// is the user prefers dark theme
-const isDark = usePreferredDark()
-
-"
+const isDark = usePreferredDark()
"
`;
exports[`html-parse > code frame > text 1`] = `
"Testing code block
\`\`\`ts
-import { useMouse, usePreferredDark } from '@vueuse/core'
-
+import { useMouse, usePreferredDark } from '@vueuse/core'
// tracks mouse position
const { x, y } = useMouse()
// is the user prefers dark theme
@@ -34,9 +28,8 @@ exports[`html-parse > code frame 2 > html 1`] = `
>
Testing
+ const a = hello
-const a = hello
-
"
`;
@@ -65,13 +58,7 @@ exports[`html-parse > empty > html 1`] = `""`;
exports[`html-parse > empty > text 1`] = `""`;
-exports[`html-parse > inline markdown > html 1`] = `
-"text code
bold italic del
-
-code block
-
-"
-`;
+exports[`html-parse > inline markdown > html 1`] = `"text code
bold italic del
code block
"`;
exports[`html-parse > inline markdown > text 1`] = `
"text \`code\` **bold** *italic* ~~del~~
diff --git a/tests/html-parse.test.ts b/tests/html-parse.test.ts
index e4703919..6308a94c 100644
--- a/tests/html-parse.test.ts
+++ b/tests/html-parse.test.ts
@@ -1,7 +1,7 @@
import type { Emoji } from 'masto'
import { describe, expect, it } from 'vitest'
import { format } from 'prettier'
-import { serialize } from 'parse5'
+import { render as renderTree } from 'ultrahtml'
import { parseMastodonHTML, treeToText } from '~/composables/content'
describe('html-parse', () => {
@@ -53,9 +53,9 @@ describe('html-parse', () => {
async function render(input: string, emojis?: Record) {
const tree = parseMastodonHTML(input, emojis)
- const html = serialize(tree)
+ const html = await renderTree(tree)
let formatted = ''
- const serializedText = tree.childNodes.map(n => treeToText(n)).join('').trim()
+ const serializedText = tree.children.map(n => treeToText(n)).join('').trim()
try {
formatted = format(html, {