1+ import json
12import math
3+ import re
4+
25import aiohttp
6+ from bs4 import BeautifulSoup
37
48from loading_sdk .settings import (
59 API_URL ,
610 API_VERSION ,
11+ BASE_URL ,
712 EDITORIAL_POST_TYPES ,
813 EDITORIAL_SORT ,
914 USER_AGENT ,
@@ -17,6 +22,68 @@ async def async_loading_api_client(email=None, password=None):
1722 return client
1823
1924
25+ class AboutPageExtractor :
26+ async def extract_about_data (self ):
27+ about_page_source = await self ._get_source (f"{ BASE_URL } /om" )
28+ main_script_url = self ._extract_main_script_url (about_page_source )
29+ main_script_source = await self ._get_source (f"{ BASE_URL } /{ main_script_url } " )
30+ about_script_url = self ._get_about_script_url (main_script_source )
31+ about_script_source = await self ._get_source (about_script_url )
32+
33+ return self ._get_about_data (about_script_source )
34+
35+ async def _get_source (self , url ):
36+ headers = {"User-Agent" : USER_AGENT }
37+
38+ async with aiohttp .ClientSession () as session :
39+ async with session .get (url , headers = headers ) as response :
40+ return await response .text ()
41+
42+ def _get_about_script_url (self , source_code ):
43+ chunk_urls = []
44+
45+ # Extracts the code with the javascript chunks.
46+ p = re .compile ("(static/js/).+?(?=\{)(.+?(?=\[)).+(.chunk.js)" )
47+ m = p .search (source_code )
48+
49+ if m :
50+ # Transform the code into valid JSON so the chunk ids can be stored in a python dict.
51+ s = re .sub (r"([0-9]+?(?=:))" , r'"\1"' , m .group (2 ))
52+ chunk_ids = json .loads (s )
53+
54+ for k , v in chunk_ids .items ():
55+ chunk_url = f"{ BASE_URL } /{ m .group (1 )} { k } .{ v } { m .group (3 )} "
56+ chunk_urls .append (chunk_url )
57+
58+ return chunk_urls [- 1 ]
59+
60+ def _get_about_data (self , source_code ):
61+ m = re .search ("var.e=(.+?)(?=\.map).+a=(.+?)(?=\.map)" , source_code )
62+
63+ if m :
64+ people = re .sub (r"(\{|\,)([a-z]+)(\:)" , r'\1"\2"\3' , m .group (1 ))
65+ people = re .sub (r"(.+)(')(.+)(')(.+)" , r'\1"\3"\5' , people )
66+ people = people .replace ('slags "vuxen p' , "slags 'vuxen p" )
67+ people = people .replace ('riktigt"-framtid' , "riktigt'-framtid" )
68+ people = people .replace ("\\ n" , "" )
69+ people = people .encode ("utf-8" ).decode ("unicode_escape" )
70+
71+ moderators = re .sub (r"(\{|\,)([a-z]+)(\:)" , r'\1"\2"\3' , m .group (2 ))
72+ moderators = re .sub (r"(.+)(')(.+)(')(.+)" , r'\1"\3"\5' , moderators )
73+ moderators = moderators .replace ("\\ n" , "" )
74+ moderators = moderators .encode ("utf-8" ).decode ("unicode_escape" )
75+
76+ about = {"people" : json .loads (people ), "moderators" : json .loads (moderators )}
77+
78+ return about
79+
80+ def _extract_main_script_url (self , html ):
81+ soup = BeautifulSoup (html , "html.parser" )
82+ main_script = soup .find (src = re .compile ("/static/js/main\.[0-9a-zA-Z]+\.js" ))
83+
84+ return main_script ["src" ][1 :]
85+
86+
2087class AsyncLoadingApiClient :
2188 """
2289 An async client that allows python apps to easily communicate with the loading forums web api.
@@ -490,3 +557,13 @@ async def edit_thread(self, thread_id, message):
490557 thread_data ["message" ] = "Thread updated"
491558
492559 return thread_data
560+
561+ async def get_about (self ):
562+ """Get about page data
563+
564+ :rtype dict
565+ """
566+ about_page = AboutPageExtractor ()
567+ about_data = await about_page .extract_about_data ()
568+
569+ return about_data
0 commit comments