mirror of
https://github.com/nova-r/fediplug.git
synced 2025-03-10 15:49:00 +01:00
slight link extraction refactor
This commit is contained in:
parent
33e7da196f
commit
8e451e9d62
5 changed files with 41 additions and 4 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -3,5 +3,6 @@
|
||||||
*.mp4
|
*.mp4
|
||||||
*.pyc
|
*.pyc
|
||||||
*.secret
|
*.secret
|
||||||
|
.cache
|
||||||
.vscode
|
.vscode
|
||||||
__pycache__
|
__pycache__
|
||||||
|
|
1
Pipfile
1
Pipfile
|
@ -16,3 +16,4 @@ youtube-dl = "==2017.11.26"
|
||||||
[dev-packages]
|
[dev-packages]
|
||||||
|
|
||||||
"e1839a8" = {path = ".", editable = true}
|
"e1839a8" = {path = ".", editable = true}
|
||||||
|
pytest = "*"
|
||||||
|
|
29
Pipfile.lock
generated
29
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
||||||
{
|
{
|
||||||
"_meta": {
|
"_meta": {
|
||||||
"hash": {
|
"hash": {
|
||||||
"sha256": "fc88391d1ef7a5701ec70beb432898e855e039105cb787bf9dd91ba3aa35c604"
|
"sha256": "971f24d8efade8b46ac288d607d6200a9ecab3a67a81b3f717d38baf720be05d"
|
||||||
},
|
},
|
||||||
"host-environment-markers": {
|
"host-environment-markers": {
|
||||||
"implementation_name": "cpython",
|
"implementation_name": "cpython",
|
||||||
|
@ -153,6 +153,13 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"develop": {
|
"develop": {
|
||||||
|
"attrs": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:a17a9573a6f475c99b551c0e0a812707ddda1ec9653bed04c13841404ed6f450",
|
||||||
|
"sha256:1c7960ccfd6a005cd9f7ba884e6316b5e430a3f1a6c37c5f87d8b43f83b54ec9"
|
||||||
|
],
|
||||||
|
"version": "==17.4.0"
|
||||||
|
},
|
||||||
"certifi": {
|
"certifi": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
|
"sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
|
||||||
|
@ -232,6 +239,26 @@
|
||||||
],
|
],
|
||||||
"version": "==1.2.1"
|
"version": "==1.2.1"
|
||||||
},
|
},
|
||||||
|
"pluggy": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:7f8ae7f5bdf75671a718d2daf0a64b7885f74510bcd98b1a0bb420eb9a9d0cff"
|
||||||
|
],
|
||||||
|
"version": "==0.6.0"
|
||||||
|
},
|
||||||
|
"py": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:8cca5c229d225f8c1e3085be4fcf306090b00850fefad892f9d96c7b6e2f310f",
|
||||||
|
"sha256:ca18943e28235417756316bfada6cd96b23ce60dd532642690dcfdaba988a76d"
|
||||||
|
],
|
||||||
|
"version": "==1.5.2"
|
||||||
|
},
|
||||||
|
"pytest": {
|
||||||
|
"hashes": [
|
||||||
|
"sha256:b84878865558194630c6147f44bdaef27222a9f153bbd4a08908b16bf285e0b1",
|
||||||
|
"sha256:53548280ede7818f4dc2ad96608b9f08ae2cc2ca3874f2ceb6f97e3583f25bc4"
|
||||||
|
],
|
||||||
|
"version": "==3.3.2"
|
||||||
|
},
|
||||||
"python-dateutil": {
|
"python-dateutil": {
|
||||||
"hashes": [
|
"hashes": [
|
||||||
"sha256:95511bae634d69bc7329ba55e646499a842bc4ec342ad54a8cdb65645a0aad3c",
|
"sha256:95511bae634d69bc7329ba55e646499a842bc4ec342ad54a8cdb65645a0aad3c",
|
||||||
|
|
|
@ -85,8 +85,8 @@ def stream(api_base_url):
|
||||||
def extract_tags(toot):
|
def extract_tags(toot):
|
||||||
return [tag['name'] for tag in toot['tags']]
|
return [tag['name'] for tag in toot['tags']]
|
||||||
|
|
||||||
def has_external_link_class(class_string):
|
def link_is_internal(link):
|
||||||
classes = class_string.split(' ')
|
classes = link.attrib.get('class', '').split(' ')
|
||||||
if classes:
|
if classes:
|
||||||
return 'mention' in classes
|
return 'mention' in classes
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ def has_external_link_class(class_string):
|
||||||
def extract_links(toot):
|
def extract_links(toot):
|
||||||
html = HTML(toot['content'])
|
html = HTML(toot['content'])
|
||||||
all_links = html.cssselect('a')
|
all_links = html.cssselect('a')
|
||||||
return [link.attrib['href'] for link in all_links if not has_external_link_class(link.attrib.get('class', ''))]
|
return [link.attrib['href'] for link in all_links if not link_is_internal(link)]
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
from getpass import getpass
|
from getpass import getpass
|
||||||
|
|
8
test_fediplay.py
Normal file
8
test_fediplay.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
import fediplay
|
||||||
|
|
||||||
|
def test_extract_links():
|
||||||
|
toot = {
|
||||||
|
'content': "<p><a href=\"https://cybre.space/tags/nowplaying\" class=\"mention hashtag\" rel=\"tag\">#<span>nowplaying</span></a> <a href=\"https://cybre.space/tags/fediplay\" class=\"mention hashtag\" rel=\"tag\">#<span>fediplay</span></a> Grimes ft. Janelle Mon\u00e1e - Venus Fly <a href=\"https://www.youtube.com/watch?v=eTLTXDHrgtw\" rel=\"nofollow noopener\" target=\"_blank\"><span class=\"invisible\">https://www.</span><span class=\"ellipsis\">youtube.com/watch?v=eTLTXDHrgt</span><span class=\"invisible\">w</span></a></p>"
|
||||||
|
}
|
||||||
|
urls = fediplay.extract_links(toot)
|
||||||
|
assert urls == ['https://www.youtube.com/watch?v=eTLTXDHrgtw']
|
Loading…
Reference in a new issue