Skip to content

Commit b638d71

Browse files
authored
Merge pull request #50 from AnswerDotAI/enhance-url2note
enhance url2note
2 parents c696ffd + eaca8a8 commit b638d71

File tree

2 files changed

+82
-32
lines changed

2 files changed

+82
-32
lines changed

dialoghelper/core.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -272,12 +272,16 @@ def run_msg(
272272
def url2note(
273273
url:str, # URL to read
274274
extract_section:bool=True, # If url has an anchor, return only that section
275-
selector:str=None # Select section(s) using BeautifulSoup.select (overrides extract_section)
275+
selector:str=None, # Select section(s) using BeautifulSoup.select (overrides extract_section)
276+
ai_img:bool=True, # Make images visible to the AI
277+
split_re:str=r'(?=^#{1,6} .+)' # Regex to split content into multiple notes, set to False for single note
276278
):
277-
"Read URL as markdown, and add a note below current message with the result"
278-
res = read_url(url, as_md=True, extract_section=extract_section, selector=selector)
279+
"Read URL as markdown, and add note(s) below current message with the result"
280+
res = read_url(url, as_md=True, extract_section=extract_section, selector=selector, ai_img=ai_img)
281+
if split_re: return [add_msg(s) for s in re.split(split_re, res, flags=re.MULTILINE) if s.strip()]
279282
return add_msg(res)
280283

284+
281285
# %% ../nbs/00_core.ipynb
282286
def ast_py(code:str):
283287
"Get an SgRoot root node for python `code`"

nbs/00_core.ipynb

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@
645645
"output_type": "stream",
646646
"text": [
647647
"_9c544573\n",
648-
"\n"
648+
"_9558b075\n"
649649
]
650650
}
651651
],
@@ -855,6 +855,16 @@
855855
"_edit_id = add_msg('This message should be found.\\n\\nThis is a multiline message.')"
856856
]
857857
},
858+
{
859+
"cell_type": "markdown",
860+
"id": "57797a25",
861+
"metadata": {},
862+
"source": [
863+
"This message should be found.\n",
864+
"\n",
865+
"This is a multiline message."
866+
]
867+
},
858868
{
859869
"cell_type": "code",
860870
"execution_count": null,
@@ -936,6 +946,16 @@
936946
"id": "6e354677",
937947
"metadata": {},
938948
"outputs": [
949+
{
950+
"data": {
951+
"text/plain": [
952+
"2"
953+
]
954+
},
955+
"execution_count": null,
956+
"metadata": {},
957+
"output_type": "execute_result"
958+
},
939959
{
940960
"data": {
941961
"text/plain": [
@@ -993,11 +1013,14 @@
9931013
"def url2note(\n",
9941014
" url:str, # URL to read\n",
9951015
" extract_section:bool=True, # If url has an anchor, return only that section\n",
996-
" selector:str=None # Select section(s) using BeautifulSoup.select (overrides extract_section)\n",
1016+
" selector:str=None, # Select section(s) using BeautifulSoup.select (overrides extract_section)\n",
1017+
" ai_img:bool=True, # Make images visible to the AI\n",
1018+
" split_re:str=r'(?=^#{1,6} .+)' # Regex to split content into multiple notes, set to False for single note\n",
9971019
"):\n",
998-
" \"Read URL as markdown, and add a note below current message with the result\"\n",
999-
" res = read_url(url, as_md=True, extract_section=extract_section, selector=selector)\n",
1000-
" return add_msg(res)"
1020+
" \"Read URL as markdown, and add note(s) below current message with the result\"\n",
1021+
" res = read_url(url, as_md=True, extract_section=extract_section, selector=selector, ai_img=ai_img)\n",
1022+
" if split_re: return [add_msg(s) for s in re.split(split_re, res, flags=re.MULTILINE) if s.strip()]\n",
1023+
" return add_msg(res)\n"
10011024
]
10021025
},
10031026
{
@@ -1020,6 +1043,26 @@
10201043
"del_msg(_id)"
10211044
]
10221045
},
1046+
{
1047+
"cell_type": "code",
1048+
"execution_count": null,
1049+
"id": "43554bd9",
1050+
"metadata": {},
1051+
"outputs": [],
1052+
"source": [
1053+
"_ids = url2note('https://www.answer.ai/posts/2025-10-01-cachy.html')"
1054+
]
1055+
},
1056+
{
1057+
"cell_type": "code",
1058+
"execution_count": null,
1059+
"id": "b02115e6",
1060+
"metadata": {},
1061+
"outputs": [],
1062+
"source": [
1063+
"_ = [del_msg(i) for i in _ids]"
1064+
]
1065+
},
10231066
{
10241067
"cell_type": "code",
10251068
"execution_count": null,
@@ -1089,25 +1132,25 @@
10891132
{
10901133
"data": {
10911134
"text/plain": [
1092-
"[(\"xpost('http://localhost:5001/pop_data_blocking_', data={'data_id': idx})\",\n",
1093-
" {'B': {'text': \"{'data_id': idx}\",\n",
1094-
" 'range': {'byteOffset': {'start': 1185, 'end': 1201},\n",
1095-
" 'start': {'line': 38, 'column': 72},\n",
1096-
" 'end': {'line': 38, 'column': 88}}},\n",
1097-
" 'A': {'text': \"'http://localhost:5001/pop_data_blocking_'\",\n",
1098-
" 'range': {'byteOffset': {'start': 1136, 'end': 1178},\n",
1099-
" 'start': {'line': 38, 'column': 23},\n",
1100-
" 'end': {'line': 38, 'column': 65}}}},\n",
1101-
" 'dialoghelper/experimental.py'),\n",
1102-
" ('xpost(f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\', data=data)',\n",
1103-
" {'A': {'text': 'f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\'',\n",
1104-
" 'range': {'byteOffset': {'start': 2624, 'end': 2672},\n",
1105-
" 'start': {'line': 70, 'column': 16},\n",
1106-
" 'end': {'line': 70, 'column': 64}}},\n",
1107-
" 'B': {'text': 'data',\n",
1108-
" 'range': {'byteOffset': {'start': 2679, 'end': 2683},\n",
1135+
"[('xpost(f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\', data=data)',\n",
1136+
" {'B': {'text': 'data',\n",
1137+
" 'range': {'byteOffset': {'start': 2753, 'end': 2757},\n",
11091138
" 'start': {'line': 70, 'column': 71},\n",
1110-
" 'end': {'line': 70, 'column': 75}}}},\n",
1139+
" 'end': {'line': 70, 'column': 75}}},\n",
1140+
" 'A': {'text': 'f\\'http://localhost:{dh_settings[\"port\"]}/{path}\\'',\n",
1141+
" 'range': {'byteOffset': {'start': 2698, 'end': 2746},\n",
1142+
" 'start': {'line': 70, 'column': 16},\n",
1143+
" 'end': {'line': 70, 'column': 64}}}},\n",
1144+
" 'dialoghelper/core.py'),\n",
1145+
" (\"xpost(url, data={'data_id': idx, 'timeout': timeout})\",\n",
1146+
" {'B': {'text': \"{'data_id': idx, 'timeout': timeout}\",\n",
1147+
" 'range': {'byteOffset': {'start': 4450, 'end': 4486},\n",
1148+
" 'start': {'line': 121, 'column': 36},\n",
1149+
" 'end': {'line': 121, 'column': 72}}},\n",
1150+
" 'A': {'text': 'url',\n",
1151+
" 'range': {'byteOffset': {'start': 4440, 'end': 4443},\n",
1152+
" 'start': {'line': 121, 'column': 26},\n",
1153+
" 'end': {'line': 121, 'column': 29}}}},\n",
11111154
" 'dialoghelper/core.py')]"
11121155
]
11131156
},
@@ -1156,7 +1199,7 @@
11561199
{
11571200
"data": {
11581201
"text/plain": [
1159-
"{'success': 'Inserted text after line 5 in message _c3581eea'}"
1202+
"{'success': 'Inserted text after line 5 in message _f813f590'}"
11601203
]
11611204
},
11621205
"execution_count": null,
@@ -1220,7 +1263,7 @@
12201263
{
12211264
"data": {
12221265
"text/plain": [
1223-
"{'success': 'Replaced text in message _c3581eea'}"
1266+
"{'success': 'Replaced text in message _f813f590'}"
12241267
]
12251268
},
12261269
"execution_count": null,
@@ -1282,7 +1325,7 @@
12821325
{
12831326
"data": {
12841327
"text/plain": [
1285-
"{'success': 'Successfully replaced all the strings in message _c3581eea'}"
1328+
"{'success': 'Successfully replaced all the strings in message _f813f590'}"
12861329
]
12871330
},
12881331
"execution_count": null,
@@ -1345,7 +1388,7 @@
13451388
{
13461389
"data": {
13471390
"text/plain": [
1348-
"{'success': 'Replaced lines 2 to 4 in message _c3581eea'}"
1391+
"{'success': 'Replaced lines 2 to 4 in message _f813f590'}"
13491392
]
13501393
},
13511394
"execution_count": null,
@@ -1818,9 +1861,12 @@
18181861
"- &`find_var`: Search for var in all frames of the call stack\n",
18191862
"- &`set_var`: Set var to val after finding it in all frames of the call stack\n",
18201863
"- &`find_dname`: Get the message id by searching the call stack for __dialog_id.\n",
1821-
"- &`find_msg_id`: Get the message id by searching the call stack for __dialog_id.\n",
1864+
"- &`find_msg_id`: Get the message id by searching the call stack for __msg_id.\n",
18221865
"- &`curr_dialog`: Get the current dialog info.\n",
18231866
"- &`msg_idx`: Get absolute index of message in dialog.\n",
1867+
"- &`add_scr`: Swap a script element to the end of the js-script element\n",
1868+
"- &`iife`: Wrap javascript code string in an IIFE and execute it via `add_html`\n",
1869+
"- &`event_get`: Call `fire_event` and then `pop_data` to get a response\n",
18241870
"- &`find_msgs`: Find `list[dict]` of messages in current specific dialog that contain the given information. To refer to a message found later, use its `id` field.\n",
18251871
"- &`add_html`: Send HTML to the browser to be swapped into the DOM\n",
18261872
"- &`read_msg`: Get the message indexed in the current dialog.\n",
@@ -1833,7 +1879,7 @@
18331879
" - Use `content` param to update contents.\n",
18341880
" - Only include parameters to update--missing ones will be left unchanged.\n",
18351881
"- &`run_msg`: Adds a message to the run queue. Use read_msg to see the output once it runs.\n",
1836-
"- &`url2note`: Read URL as markdown, and add a note below current message with the result\n",
1882+
"- &`url2note`: Read URL as markdown, and add note(s) below current message with the result\n",
18371883
"- &`ast_py`: Get an SgRoot root node for python `code`\n",
18381884
"- &`ast_grep`: Use the `ast-grep` command to find `pattern` in `path`\n",
18391885
"- &`msg_insert_line`: Insert text at a specific line number in a message\n",

0 commit comments

Comments
 (0)