From 77dca707929d680381298258025a06a02dc86342 Mon Sep 17 00:00:00 2001 From: jj Date: Fri, 7 Feb 2025 22:47:36 +0000 Subject: [PATCH 01/21] api/instagram: yet another attempt at resurrection --- api/src/processing/services/instagram.js | 130 ++++++++++++++++++----- 1 file changed, 104 insertions(+), 26 deletions(-) diff --git a/api/src/processing/services/instagram.js b/api/src/processing/services/instagram.js index d9a646aa..25f5e099 100644 --- a/api/src/processing/services/instagram.js +++ b/api/src/processing/services/instagram.js @@ -1,6 +1,7 @@ import { genericUserAgent } from "../../config.js"; import { createStream } from "../../stream/manage.js"; import { getCookie, updateCookie } from "../cookie/manager.js"; +import { randomBytes } from "node:crypto"; const commonHeaders = { "user-agent": genericUserAgent, @@ -8,6 +9,7 @@ const commonHeaders = { "sec-fetch-site": "same-origin", "x-ig-app-id": "936619743392459" } + const mobileHeaders = { "x-ig-app-locale": "en_US", "x-ig-device-locale": "en_US", @@ -19,6 +21,7 @@ const mobileHeaders = { "x-fb-server-cluster": "True", "content-length": "0", } + const embedHeaders = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language": "en-GB,en;q=0.9", @@ -41,6 +44,16 @@ const cachedDtsg = { expiry: 0 } +const getNumberFromQuery = (name, data) => { + const s = data?.match(new RegExp(name + '=(\\d+)'))?.[1]; + if (+s) return +s; +} + +const getObjectFromEntries = (name, data) => { + const obj = data?.match(new RegExp('\\["' + name + '",.*?,({.*?}),\\d+\\]'))?.[1]; + return obj && JSON.parse(obj); +} + export default function(obj) { const dispatcher = obj.dispatcher; @@ -91,6 +104,7 @@ export default function(obj) { updateCookie(cookie, data.headers); return data.json(); } + async function getMediaId(id, { cookie, token } = {}) { const oembedURL = new URL('https://i.instagram.com/api/v1/oembed/'); oembedURL.searchParams.set('url', `https://www.instagram.com/p/${id}/`); @@ -119,6 +133,7 @@ export default function(obj) { return mediaInfo?.items?.[0]; } + async function requestHTML(id, cookie) { const data = await fetch(`https://www.instagram.com/p/${id}/embed/captioned/`, { headers: { @@ -136,35 +151,98 @@ export default function(obj) { return embedData; } - async function requestGQL(id, cookie) { - let dtsgId; - if (cookie) { - dtsgId = await findDtsgId(cookie); - } - const url = new URL('https://www.instagram.com/api/graphql/'); + async function getGQLParams(id, cookie) { + const req = await fetch(`https://www.instagram.com/p/${id}/`, { + headers: { + ...embedHeaders, + cookie + } + }); - const requestData = { - jazoest: '26406', - variables: JSON.stringify({ - shortcode: id, - __relay_internal__pv__PolarisShareMenurelayprovider: false - }), - doc_id: '7153618348081770' + const html = await req.text(); + const siteData = getObjectFromEntries('SiteData', html); + const polarisSiteData = getObjectFromEntries('PolarisSiteData', html); + const webConfig = getObjectFromEntries('DGWWebConfig', html); + const pushInfo = getObjectFromEntries('InstagramWebPushInfo', html); + const lsd = getObjectFromEntries('LSD', html)?.token || randomBytes(8).toString('base64url'); + const csrf = getObjectFromEntries('InstagramSecurityConfig', html)?.csrf_token; + + const anon_cookie = [ + csrf && "csrftoken=" + csrf, + polarisSiteData?.device_id && "ig_did=" + polarisSiteData?.device_id, + "wd=1280x720", + "dpr=2", + polarisSiteData?.machine_id && "mid=" + polarisSiteData.machine_id, + "ig_nrcb=1" + ].filter(a => a).join('; '); + + return { + headers: { + 'X-IG-App-Id': webConfig?.appId || '936619743392459', + 'X-FB-LSD': lsd, + 'X-CSRFToken': csrf, + 'X-Bloks-Version-Id': getObjectFromEntries('WebBloksVersioningID', html)?.versioningID, + 'x-asbd-id': 129477, + cookie: anon_cookie + }, + body: { + __hs: siteData?.haste_session || '20126.HYP:instagram_web_pkg.2.1...0', + __rev: pushInfo?.rollout_hash || '1019933358', + __s: '::' + Math.random().toString(36).substring(2).replace(/\d/g, '').slice(0, 6), + __hsi: siteData?.hsi || '7436540909012459023', + __dyn: randomBytes(154).toString('base64url'), + __csr: randomBytes(154).toString('base64url'), + __comet_req: getNumberFromQuery('__comet_req', html) || '7', + lsd, + jazoest: getNumberFromQuery('jazoest', html) || Math.floor(Math.random() * 10000), + __spin_r: siteData?.__spin_r || '1019933358', + __spin_b: siteData?.__spin_b || 'trunk', + __spin_t: siteData?.__spin_t || Math.floor(new Date().getTime() / 1000), + } }; - if (dtsgId) { - requestData.fb_dtsg = dtsgId; - } + } - return (await request(url, cookie, 'POST', requestData)) - .data - ?.xdt_api__v1__media__shortcode__web_info - ?.items - ?.[0]; + async function requestGQL(id, cookie) { + const { headers, body } = await getGQLParams(id, cookie); + + const req = await fetch('https://www.instagram.com/graphql/query', { + method: 'POST', + headers: { + ...embedHeaders, + ...headers, + cookie, + 'content-type': 'application/x-www-form-urlencoded', + 'X-FB-Friendly-Name': 'PolarisPostActionLoadPostQueryQuery', + }, + body: new URLSearchParams({ + av: '0', + __d: 'www', + __user: '0', + __a: '1', + __req: 'b', + dpr: '2', + __ccg: 'EXCELLENT', + ...body, + fb_api_caller_class: 'RelayModern', + fb_api_req_friendly_name: 'PolarisPostActionLoadPostQueryQuery', + variables: JSON.stringify({ + shortcode: id, + fetch_tagged_user_count: null, + hoisted_comment_id: null, + hoisted_reply_id: null + }), + server_timestamps: true, + doc_id: '8845758582119845' + }).toString() + }); + + return { gql_data: (await req.json()).data } } function extractOldPost(data, id, alwaysProxy) { - const sidecar = data?.gql_data?.shortcode_media?.edge_sidecar_to_children; + const shortcodeMedia = data?.gql_data?.shortcode_media || data?.gql_data?.xdt_shortcode_media; + const sidecar = shortcodeMedia?.edge_sidecar_to_children; if (sidecar) { const picker = sidecar.edges.filter(e => e.node?.display_url) .map((e, i) => { @@ -196,15 +274,15 @@ export default function(obj) { }); if (picker.length) return { picker } - } else if (data?.gql_data?.shortcode_media?.video_url) { + } else if (shortcodeMedia?.video_url) { return { - urls: data.gql_data.shortcode_media.video_url, + urls: shortcodeMedia.video_url, filename: `instagram_${id}.mp4`, audioFilename: `instagram_${id}_audio` } - } else if (data?.gql_data?.shortcode_media?.display_url) { + } else if (shortcodeMedia?.display_url) { return { - urls: data.gql_data?.shortcode_media.display_url, + urls: shortcodeMedia.display_url, isPhoto: true } } From 6e8b4f30c14c7210ba18d1e5d603c051ed448a59 Mon Sep 17 00:00:00 2001 From: jj Date: Sat, 8 Feb 2025 13:53:29 +0000 Subject: [PATCH 02/21] api/url: add function for resolving shortlinks motivation: we frequently need to resolve shortlinks to full URLs let's have a common standard function for doing this safely instead of reinventing the wheel in every single service module --- api/src/misc/utils.js | 15 ++++++++------- api/src/processing/url.js | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/api/src/misc/utils.js b/api/src/misc/utils.js index 331528d4..ffa75433 100644 --- a/api/src/misc/utils.js +++ b/api/src/misc/utils.js @@ -1,12 +1,13 @@ +import { request } from 'undici'; const redirectStatuses = new Set([301, 302, 303, 307, 308]); -export async function getRedirectingURL(url, dispatcher) { - const location = await fetch(url, { - redirect: 'manual', - dispatcher, - }).then((r) => { - if (redirectStatuses.has(r.status) && r.headers.has('location')) { - return r.headers.get('location'); +export async function getRedirectingURL(url, dispatcher, userAgent) { + const location = await request(url, { + dispatcher, method: 'HEAD', + headers: { 'user-agent': userAgent } + }).then(r => { + if (redirectStatuses.has(r.statusCode) && r.headers['location']) { + return r.headers['location']; } }).catch(() => null); diff --git a/api/src/processing/url.js b/api/src/processing/url.js index cfbbecc0..5c4035eb 100644 --- a/api/src/processing/url.js +++ b/api/src/processing/url.js @@ -4,6 +4,7 @@ import { strict as assert } from "node:assert"; import { env } from "../config.js"; import { services } from "./service-config.js"; import { friendlyServiceName } from "./service-alias.js"; +import { getRedirectingURL } from "../misc/utils.js"; function aliasURL(url) { assert(url instanceof URL); @@ -221,3 +222,17 @@ export function extract(url) { return { host, patternMatch }; } + +export async function resolveRedirectingURL(url, dispatcher, userAgent) { + const originalService = getHostIfValid(normalizeURL(url)); + if (!originalService) return; + + const canonicalURL = await getRedirectingURL(url, dispatcher, userAgent); + if (!canonicalURL) return; + + const { host, patternMatch } = extract(normalizeURL(canonicalURL)); + + if (host === originalService) { + return patternMatch; + } +} From 9e6582b76c32ed08c55459e3ad0025887a358622 Mon Sep 17 00:00:00 2001 From: jj Date: Sat, 8 Feb 2025 16:05:51 +0000 Subject: [PATCH 03/21] api/xiaohongshu: use shortlink resolver --- api/src/processing/services/xiaohongshu.js | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/api/src/processing/services/xiaohongshu.js b/api/src/processing/services/xiaohongshu.js index bbb53ab1..06de21aa 100644 --- a/api/src/processing/services/xiaohongshu.js +++ b/api/src/processing/services/xiaohongshu.js @@ -1,7 +1,6 @@ -import { extract, normalizeURL } from "../url.js"; +import { resolveRedirectingURL } from "../url.js"; import { genericUserAgent } from "../../config.js"; import { createStream } from "../../stream/manage.js"; -import { getRedirectingURL } from "../../misc/utils.js"; const https = (url) => { return url.replace(/^http:/i, 'https:'); @@ -12,19 +11,13 @@ export default async function ({ id, token, shareId, h265, isAudioOnly, dispatch let xsecToken = token; if (!noteId) { - const extractedURL = await getRedirectingURL( + const patternMatch = await resolveRedirectingURL( `https://xhslink.com/a/${shareId}`, dispatcher ); - if (extractedURL) { - const { patternMatch } = extract(normalizeURL(extractedURL)); - - if (patternMatch) { - noteId = patternMatch.id; - xsecToken = patternMatch.token; - } - } + noteId = patternMatch?.id; + xsecToken = patternMatch?.token; } if (!noteId || !xsecToken) return { error: "fetch.short_link" }; From a758b1dbc64994e0670ee795f6b8817e3a8c9e74 Mon Sep 17 00:00:00 2001 From: jj Date: Sat, 8 Feb 2025 16:06:36 +0000 Subject: [PATCH 04/21] api/snapchat: use shortlink resolver --- api/src/processing/services/snapchat.js | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/api/src/processing/services/snapchat.js b/api/src/processing/services/snapchat.js index 4c62a5ff..10359a03 100644 --- a/api/src/processing/services/snapchat.js +++ b/api/src/processing/services/snapchat.js @@ -1,7 +1,6 @@ -import { extract, normalizeURL } from "../url.js"; +import { resolveRedirectingURL } from "../url.js"; import { genericUserAgent } from "../../config.js"; import { createStream } from "../../stream/manage.js"; -import { getRedirectingURL } from "../../misc/utils.js"; const SPOTLIGHT_VIDEO_REGEX = //; const NEXT_DATA_REGEX = /