mirror of
https://github.com/cheeaun/phanpy.git
synced 2025-02-24 16:58:47 +01:00
Further improve lang detection perf
This commit is contained in:
parent
f9a73777e7
commit
7546b42c7c
3 changed files with 53 additions and 5 deletions
|
@ -1866,7 +1866,16 @@ const Textarea = forwardRef((props, ref) => {
|
||||||
// Newline to prevent multiple line breaks at the end from being collapsed, no idea why
|
// Newline to prevent multiple line breaks at the end from being collapsed, no idea why
|
||||||
}, 500);
|
}, 500);
|
||||||
|
|
||||||
const debouncedAutoDetectLanguage = useDebouncedCallback((text) => {
|
const debouncedAutoDetectLanguage = useDebouncedCallback(() => {
|
||||||
|
// Make use of the highlightRef to get the DOM
|
||||||
|
// Clone the dom
|
||||||
|
const dom = composeHighlightRef.current?.cloneNode(true);
|
||||||
|
if (!dom) return;
|
||||||
|
// Remove mark
|
||||||
|
dom.querySelectorAll('mark').forEach((mark) => {
|
||||||
|
mark.remove();
|
||||||
|
});
|
||||||
|
const text = dom.innerText?.trim();
|
||||||
if (!text) return;
|
if (!text) return;
|
||||||
const langs = detectLangs(text);
|
const langs = detectLangs(text);
|
||||||
if (langs?.length) {
|
if (langs?.length) {
|
||||||
|
@ -1875,7 +1884,7 @@ const Textarea = forwardRef((props, ref) => {
|
||||||
languages: langs,
|
languages: langs,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}, 1000);
|
}, 2000);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<text-expander
|
<text-expander
|
||||||
|
@ -1944,7 +1953,7 @@ const Textarea = forwardRef((props, ref) => {
|
||||||
autoResizeTextarea(target);
|
autoResizeTextarea(target);
|
||||||
props.onInput?.(e);
|
props.onInput?.(e);
|
||||||
throttleHighlightText(text);
|
throttleHighlightText(text);
|
||||||
debouncedAutoDetectLanguage(text);
|
debouncedAutoDetectLanguage();
|
||||||
}}
|
}}
|
||||||
style={{
|
style={{
|
||||||
width: '100%',
|
width: '100%',
|
||||||
|
|
|
@ -161,6 +161,8 @@ const SIZE_CLASS = {
|
||||||
};
|
};
|
||||||
|
|
||||||
const detectLang = mem((text) => {
|
const detectLang = mem((text) => {
|
||||||
|
text = text?.trim();
|
||||||
|
|
||||||
// Ref: https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md
|
// Ref: https://github.com/komodojp/tinyld/blob/develop/docs/benchmark.md
|
||||||
// 500 should be enough for now, also the default max chars for Mastodon
|
// 500 should be enough for now, also the default max chars for Mastodon
|
||||||
if (text?.length > 500) {
|
if (text?.length > 500) {
|
||||||
|
@ -284,7 +286,40 @@ function Status({
|
||||||
emojiReactions,
|
emojiReactions,
|
||||||
} = status;
|
} = status;
|
||||||
|
|
||||||
let languageAutoDetected = content && detectLang(getHTMLText(content));
|
const [languageAutoDetected, setLanguageAutoDetected] = useState(null);
|
||||||
|
useEffect(() => {
|
||||||
|
if (!content) return;
|
||||||
|
if (_language) return;
|
||||||
|
let timer;
|
||||||
|
timer = setTimeout(() => {
|
||||||
|
let detected = detectLang(
|
||||||
|
getHTMLText(content, {
|
||||||
|
preProcess: (dom) => {
|
||||||
|
// Remove anything that can skew the language detection
|
||||||
|
|
||||||
|
// Remove .mention, .hashtag, pre, code, a:has(.invisible)
|
||||||
|
dom
|
||||||
|
.querySelectorAll(
|
||||||
|
'.mention, .hashtag, pre, code, a:has(.invisible)',
|
||||||
|
)
|
||||||
|
.forEach((a) => {
|
||||||
|
a.remove();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove links that contains text that starts with https?://
|
||||||
|
dom.querySelectorAll('a').forEach((a) => {
|
||||||
|
const text = a.innerText.trim();
|
||||||
|
if (text.startsWith('https://') || text.startsWith('http://')) {
|
||||||
|
a.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
setLanguageAutoDetected(detected);
|
||||||
|
}, 1000);
|
||||||
|
return () => clearTimeout(timer);
|
||||||
|
}, [content, _language]);
|
||||||
const language = _language || languageAutoDetected;
|
const language = _language || languageAutoDetected;
|
||||||
|
|
||||||
// if (!mediaAttachments?.length) mediaFirst = false;
|
// if (!mediaAttachments?.length) mediaFirst = false;
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import mem from './mem';
|
import mem from './mem';
|
||||||
|
|
||||||
const div = document.createElement('div');
|
const div = document.createElement('div');
|
||||||
function getHTMLText(html) {
|
function getHTMLText(html, opts) {
|
||||||
if (!html) return '';
|
if (!html) return '';
|
||||||
|
const { preProcess } = opts || {};
|
||||||
|
|
||||||
div.innerHTML = html
|
div.innerHTML = html
|
||||||
.replace(/<\/p>/g, '</p>\n\n')
|
.replace(/<\/p>/g, '</p>\n\n')
|
||||||
.replace(/<\/li>/g, '</li>\n');
|
.replace(/<\/li>/g, '</li>\n');
|
||||||
|
@ -10,6 +12,8 @@ function getHTMLText(html) {
|
||||||
br.replaceWith('\n');
|
br.replaceWith('\n');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
preProcess?.(div);
|
||||||
|
|
||||||
// MASTODON-SPECIFIC classes
|
// MASTODON-SPECIFIC classes
|
||||||
// Remove .invisible
|
// Remove .invisible
|
||||||
div.querySelectorAll('.invisible').forEach((el) => {
|
div.querySelectorAll('.invisible').forEach((el) => {
|
||||||
|
|
Loading…
Reference in a new issue