Skip to content

Commit 8a23f73

Browse files
committed
Merge branch 'main' of https://github.com/open-sciencelab/GraphGen into feature/protein-qa
2 parents 28e5795 + 37f1002 commit 8a23f73

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+637
-670
lines changed

.env.example

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ TRAINEE_MODEL=gpt-4o-mini
1414
TRAINEE_BASE_URL=
1515
TRAINEE_API_KEY=
1616

17+
# azure_openai_api
18+
# SYNTHESIZER_BACKEND=azure_openai_api
19+
# The following is the same as your "Deployment name" in Azure
20+
# SYNTHESIZER_MODEL=<your-deployment-name>
21+
# SYNTHESIZER_BASE_URL=https://<your-resource-name>.openai.azure.com/openai/deployments/<your-deployment-name>/chat/completions
22+
# SYNTHESIZER_API_KEY=
23+
# SYNTHESIZER_API_VERSION=<api-version>
24+
1725
# # ollama_api
1826
# SYNTHESIZER_BACKEND=ollama_api
1927
# SYNTHESIZER_MODEL=gemma3

.pylintrc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ source-roots=
100100

101101
# When enabled, pylint would attempt to guess common misconfiguration and emit
102102
# user-friendly hints instead of false-positive error messages.
103-
suggestion-mode=yes
103+
# suggestion-mode=yes
104104

105105
# Allow loading of arbitrary C extensions. Extensions are imported into the
106106
# active Python interpreter and may run arbitrary code.

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
[![Hugging Face](https://img.shields.io/badge/Demo-on%20HF-blue?logo=huggingface&logoColor=yellow)](https://huggingface.co/spaces/chenzihong/GraphGen)
1818
[![Model Scope](https://img.shields.io/badge/%F0%9F%A4%96%20Demo-on%20MS-green)](https://modelscope.cn/studios/chenzihong/GraphGen)
19-
[![OpenXLab](https://img.shields.io/badge/Demo-on%20OpenXLab-blue?logo=openxlab&logoColor=yellow)](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)
2019

2120

2221
GraphGen: Enhancing Supervised Fine-Tuning for LLMs with Knowledge-Driven Synthetic Data Generation
@@ -107,7 +106,7 @@ Users can flexibly configure according to the needs of synthetic data.
107106

108107
## 🚀 Quick Start
109108

110-
Experience GraphGen through [Web](https://g-app-center-120612-6433-jpdvmvp.openxlab.space) or [Backup Web Entrance](https://openxlab.org.cn/apps/detail/chenzihonga/GraphGen)
109+
Experience GraphGen Demo through [Huggingface](https://huggingface.co/spaces/chenzihong/GraphGen) or [Modelscope](https://modelscope.cn/studios/chenzihong/GraphGen).
111110

112111
For any questions, please check [FAQ](https://github.com/open-sciencelab/GraphGen/issues/10), open new [issue](https://github.com/open-sciencelab/GraphGen/issues) or join our [wechat group](https://cdn.vansin.top/internlm/dou.jpg) and ask.
113112

README_zh.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
[![Hugging Face](https://img.shields.io/badge/Demo-on%20HF-blue?logo=huggingface&logoColor=yellow)](https://huggingface.co/spaces/chenzihong/GraphGen)
1818
[![Model Scope](https://img.shields.io/badge/%F0%9F%A4%96%20Demo-on%20MS-green)](https://modelscope.cn/studios/chenzihong/GraphGen)
19-
[![OpenXLab](https://img.shields.io/badge/Demo-on%20OpenXLab-blue?logo=openxlab&logoColor=yellow)](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)
2019

2120
GraphGen: Enhancing Supervised Fine-Tuning for LLMs with Knowledge-Driven Synthetic Data Generation
2221

@@ -105,7 +104,7 @@ GraphGen 首先根据源文本构建细粒度的知识图谱,然后利用期
105104

106105
## 🚀 快速开始
107106

108-
通过 [Web](https://g-app-center-120612-6433-jpdvmvp.openxlab.space)[备用 Web 入口](https://openxlab.org.cn/apps/detail/chenzihonga/GraphGen) 体验 GraphGen。
107+
通过 [Huggingface](https://huggingface.co/spaces/chenzihong/GraphGen)[Modelscope](https://modelscope.cn/studios/chenzihong/GraphGen) 体验 GraphGen。
109108

110109
如有任何问题,请查看 [FAQ](https://github.com/open-sciencelab/GraphGen/issues/10)、提交新的 [issue](https://github.com/open-sciencelab/GraphGen/issues) 或加入我们的[微信群](https://cdn.vansin.top/internlm/dou.jpg)咨询。
111110

baselines/Genie/genie.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ async def process_chunk(content: str):
122122
load_dotenv()
123123

124124
llm_client = OpenAIClient(
125-
model_name=os.getenv("SYNTHESIZER_MODEL"),
125+
model=os.getenv("SYNTHESIZER_MODEL"),
126126
api_key=os.getenv("SYNTHESIZER_API_KEY"),
127127
base_url=os.getenv("SYNTHESIZER_BASE_URL"),
128128
)

baselines/LongForm/longform.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ async def process_chunk(content: str):
8989
load_dotenv()
9090

9191
llm_client = OpenAIClient(
92-
model_name=os.getenv("SYNTHESIZER_MODEL"),
92+
model=os.getenv("SYNTHESIZER_MODEL"),
9393
api_key=os.getenv("SYNTHESIZER_API_KEY"),
9494
base_url=os.getenv("SYNTHESIZER_BASE_URL"),
9595
)

baselines/SELF-QA/self-qa.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ async def process_chunk(content: str):
156156
load_dotenv()
157157

158158
llm_client = OpenAIClient(
159-
model_name=os.getenv("SYNTHESIZER_MODEL"),
159+
model=os.getenv("SYNTHESIZER_MODEL"),
160160
api_key=os.getenv("SYNTHESIZER_API_KEY"),
161161
base_url=os.getenv("SYNTHESIZER_BASE_URL"),
162162
)

baselines/Wrap/wrap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ async def process_chunk(content: str):
109109
load_dotenv()
110110

111111
llm_client = OpenAIClient(
112-
model_name=os.getenv("SYNTHESIZER_MODEL"),
112+
model=os.getenv("SYNTHESIZER_MODEL"),
113113
api_key=os.getenv("SYNTHESIZER_API_KEY"),
114114
base_url=os.getenv("SYNTHESIZER_BASE_URL"),
115115
)

graphgen/bases/base_partitioner.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,16 +39,16 @@ async def community2batch(
3939
edges = comm.edges
4040
nodes_data = []
4141
for node in nodes:
42-
node_data = await g.get_node(node)
42+
node_data = g.get_node(node)
4343
if node_data:
4444
nodes_data.append((node, node_data))
4545
edges_data = []
4646
for u, v in edges:
47-
edge_data = await g.get_edge(u, v)
47+
edge_data = g.get_edge(u, v)
4848
if edge_data:
4949
edges_data.append((u, v, edge_data))
5050
else:
51-
edge_data = await g.get_edge(v, u)
51+
edge_data = g.get_edge(v, u)
5252
if edge_data:
5353
edges_data.append((v, u, edge_data))
5454
batches.append((nodes_data, edges_data))

graphgen/bases/base_storage.py

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,103 +9,99 @@ class StorageNameSpace:
99
working_dir: str = None
1010
namespace: str = None
1111

12-
async def index_done_callback(self):
12+
def index_done_callback(self):
1313
"""commit the storage operations after indexing"""
1414

15-
async def query_done_callback(self):
15+
def query_done_callback(self):
1616
"""commit the storage operations after querying"""
1717

1818

1919
class BaseListStorage(Generic[T], StorageNameSpace):
20-
async def all_items(self) -> list[T]:
20+
def all_items(self) -> list[T]:
2121
raise NotImplementedError
2222

23-
async def get_by_index(self, index: int) -> Union[T, None]:
23+
def get_by_index(self, index: int) -> Union[T, None]:
2424
raise NotImplementedError
2525

26-
async def append(self, data: T):
26+
def append(self, data: T):
2727
raise NotImplementedError
2828

29-
async def upsert(self, data: list[T]):
29+
def upsert(self, data: list[T]):
3030
raise NotImplementedError
3131

32-
async def drop(self):
32+
def drop(self):
3333
raise NotImplementedError
3434

3535

3636
class BaseKVStorage(Generic[T], StorageNameSpace):
37-
async def all_keys(self) -> list[str]:
37+
def all_keys(self) -> list[str]:
3838
raise NotImplementedError
3939

40-
async def get_by_id(self, id: str) -> Union[T, None]:
40+
def get_by_id(self, id: str) -> Union[T, None]:
4141
raise NotImplementedError
4242

43-
async def get_by_ids(
43+
def get_by_ids(
4444
self, ids: list[str], fields: Union[set[str], None] = None
4545
) -> list[Union[T, None]]:
4646
raise NotImplementedError
4747

48-
async def get_all(self) -> dict[str, T]:
48+
def get_all(self) -> dict[str, T]:
4949
raise NotImplementedError
5050

51-
async def filter_keys(self, data: list[str]) -> set[str]:
51+
def filter_keys(self, data: list[str]) -> set[str]:
5252
"""return un-exist keys"""
5353
raise NotImplementedError
5454

55-
async def upsert(self, data: dict[str, T]):
55+
def upsert(self, data: dict[str, T]):
5656
raise NotImplementedError
5757

58-
async def drop(self):
58+
def drop(self):
5959
raise NotImplementedError
6060

6161

6262
class BaseGraphStorage(StorageNameSpace):
63-
async def has_node(self, node_id: str) -> bool:
63+
def has_node(self, node_id: str) -> bool:
6464
raise NotImplementedError
6565

66-
async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
66+
def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
6767
raise NotImplementedError
6868

69-
async def node_degree(self, node_id: str) -> int:
69+
def node_degree(self, node_id: str) -> int:
7070
raise NotImplementedError
7171

72-
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
72+
def edge_degree(self, src_id: str, tgt_id: str) -> int:
7373
raise NotImplementedError
7474

75-
async def get_node(self, node_id: str) -> Union[dict, None]:
75+
def get_node(self, node_id: str) -> Union[dict, None]:
7676
raise NotImplementedError
7777

78-
async def update_node(self, node_id: str, node_data: dict[str, str]):
78+
def update_node(self, node_id: str, node_data: dict[str, str]):
7979
raise NotImplementedError
8080

81-
async def get_all_nodes(self) -> Union[list[tuple[str, dict]], None]:
81+
def get_all_nodes(self) -> Union[list[tuple[str, dict]], None]:
8282
raise NotImplementedError
8383

84-
async def get_edge(
85-
self, source_node_id: str, target_node_id: str
86-
) -> Union[dict, None]:
84+
def get_edge(self, source_node_id: str, target_node_id: str) -> Union[dict, None]:
8785
raise NotImplementedError
8886

89-
async def update_edge(
87+
def update_edge(
9088
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
9189
):
9290
raise NotImplementedError
9391

94-
async def get_all_edges(self) -> Union[list[tuple[str, str, dict]], None]:
92+
def get_all_edges(self) -> Union[list[tuple[str, str, dict]], None]:
9593
raise NotImplementedError
9694

97-
async def get_node_edges(
98-
self, source_node_id: str
99-
) -> Union[list[tuple[str, str]], None]:
95+
def get_node_edges(self, source_node_id: str) -> Union[list[tuple[str, str]], None]:
10096
raise NotImplementedError
10197

102-
async def upsert_node(self, node_id: str, node_data: dict[str, str]):
98+
def upsert_node(self, node_id: str, node_data: dict[str, str]):
10399
raise NotImplementedError
104100

105-
async def upsert_edge(
101+
def upsert_edge(
106102
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
107103
):
108104
raise NotImplementedError
109105

110-
async def delete_node(self, node_id: str):
106+
def delete_node(self, node_id: str):
111107
raise NotImplementedError

0 commit comments

Comments
 (0)