Spaces:
Running
Running
Commit
·
23df63d
1
Parent(s):
45782c8
Add scripts.
Browse files- Fujitsu-LLM-KG.code-workspace +29 -0
- scripts/.gitignore +3 -0
- scripts/LICENSE.template +51 -0
- scripts/README.md +12 -0
- scripts/README.md.template +126 -0
- scripts/generate_documents.py +607 -0
- scripts/kgllm_utils.py +94 -0
- scripts/test_kgllm.ipynb +850 -0
Fujitsu-LLM-KG.code-workspace
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"folders": [
|
3 |
+
{
|
4 |
+
"name": "README",
|
5 |
+
"path": "."
|
6 |
+
},
|
7 |
+
{
|
8 |
+
"name": "Fujitsu-LLM-KG-8x7B_cpt",
|
9 |
+
"path": "../Fujitsu-LLM-KG-8x7B_cpt"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"name": "Fujitsu-LLM-KG-8x7B_inst-infer_v1",
|
13 |
+
"path": "../Fujitsu-LLM-KG-8x7B_inst-infer_v1"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"name": "Fujitsu-LLM-KG-8x7B_inst-infer_v2",
|
17 |
+
"path": "../Fujitsu-LLM-KG-8x7B_inst-infer_v2"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "Fujitsu-LLM-KG-8x7B_inst-gen_ja",
|
21 |
+
"path": "../Fujitsu-LLM-KG-8x7B_inst-gen_ja"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"name": "Fujitsu-LLM-KG-8x7B_inst-gen_en",
|
25 |
+
"path": "../Fujitsu-LLM-KG-8x7B_inst-gen_en"
|
26 |
+
}
|
27 |
+
],
|
28 |
+
"settings": {}
|
29 |
+
}
|
scripts/.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.venv*/
|
2 |
+
*.pyc
|
3 |
+
__pycache__/
|
scripts/LICENSE.template
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright 2025 Fujitsu Limited
|
2 |
+
|
3 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
you may not use this file except in compliance with the License.
|
5 |
+
You may obtain a copy of the License at
|
6 |
+
|
7 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
|
9 |
+
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10 |
+
See the License for the specific language governing permissions and limitations under the License.
|
11 |
+
|
12 |
+
|
13 |
+
Additional Terms of Use
|
14 |
+
|
15 |
+
In addition to the License, You shall use {model_name} upon agreeing to this Terms of Use (hereinafter referred to as “TOU”). All capitalized terms used but not defined in this TOU have the meanings set forth in the License.
|
16 |
+
|
17 |
+
Article 1 (License to Use)
|
18 |
+
You must incorporate the TOU and the License into the license terms for redistribution of {model_name} or Derivative Works of {model_name}, or into the terms of use for services using {model_name} or Derivative Works. Those who violate the TOU and/or License are not allowed to use {model_name}.
|
19 |
+
|
20 |
+
Article 2 (Responsibility)
|
21 |
+
You shall use {model_name} at Your own responsibility and discretion, and shall handle any disputes arising with third parties in relation to the use of {model_name} at Your own responsibility and expense, and shall indemnify, defend and hold harmless the Licensor against all damages and losses without causing any inconvenience to the Licensor. You shall deal with any damages caused by the use of {model_name} at Your own responsibility.
|
22 |
+
|
23 |
+
Article 3 (Prohibited Actions)
|
24 |
+
You shall not engage in the following actions when using {model_name}.
|
25 |
+
(1) Actions that will or may infringe on the intellectual property rights of the Licensor or third parties;
|
26 |
+
(2) Actions that will or may infringe on the property, privacy, or portrait rights of the Licensor or third parties;
|
27 |
+
(3) Actions that discriminate against, defame, insult, or slander the Licensor or third parties, promote discrimination against others, or damage the reputation or credibility of others;
|
28 |
+
(4) Actions that engage in unauthorized legal services and/or provide legal advice from anyone other than a qualified professional;
|
29 |
+
(5) Actions that provide financial advice from anyone other than a qualified professional;
|
30 |
+
(6) Medical actions, including providing health advice or suggesting treatment methods; and
|
31 |
+
(7) Other actions that require permissions or other forms of authorization under laws and regulations.
|
32 |
+
|
33 |
+
Article 4 (Restrictions)
|
34 |
+
1. You acknowledge that the results of processing using {model_name} (hereinafter referred to as "Processing Results") may contain falsehoods, biases, content that infringes on the rights of others, or content that does not meet the effectiveness or usefulness expected by You, and agree to use {model_name} on the premise that inaccurate or inappropriate Processing Results may cause damage or infringement of rights to You or third parties and/or ethical concerns. You shall use the Processing Results after confirming their accuracy, legality, and ethical validity themselves. If the use of {model_name}, including the Processing Results, by You cause infringement of the rights of You or third parties, the Licensor shall not be responsible for any damages and losses, and You shall indemnify, defend and hold harmless the Licensor against all damages and losses without causing any inconvenience to the Licensor.
|
35 |
+
2. You shall use the Processing Results in compliance with the regulations such as laws and regulations in each country and region.
|
36 |
+
3. You shall not use the Processing Results for the actions listed in Article 3 (Prohibited Actions).
|
37 |
+
|
38 |
+
Article 5 (Ownership of Rights)
|
39 |
+
You will acquire rights newly arising from the creation of Derivative Works of {model_name}, but You shall use Derivative Works in accordance with the above License and TOU.
|
40 |
+
|
41 |
+
Article 6 (Export Transaction)
|
42 |
+
You shall obtain the necessary permissions yourself when exporting {model_name} and the Processing Results in relation to Your use, where such export requires permissions under the Foreign Exchange and Foreign Trade Act (including related cabinet order and ministerial order) or U.S. export control laws and regulations.
|
43 |
+
|
44 |
+
Article 7 (Jurisdictional Court)
|
45 |
+
The Tokyo District Court shall have exclusive jurisdiction in the first instance over any disputes arising in relation to TOU.
|
46 |
+
|
47 |
+
Article 8 (Governing Law)
|
48 |
+
TOU shall be governed by the laws of Japan.
|
49 |
+
|
50 |
+
Article 9 (Other Provisions)
|
51 |
+
Except the terms of the License, TOU sets forth the entire agreement as to all matters concerning the use of {model_name} between You and the Licensor, and matters not provided for in the TOU shall be governed by the relevant laws and regulations.
|
scripts/README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Scripts for the Fujitsu-LLM-KG models
|
2 |
+
|
3 |
+
## 各モデルのドキュメントを生成する
|
4 |
+
|
5 |
+
```sh
|
6 |
+
$ python3 -m venv .venv
|
7 |
+
$ . .venv/bin/activate
|
8 |
+
$ pip install -U pip
|
9 |
+
$ pip install transformers torch accelerate
|
10 |
+
|
11 |
+
$ python3 generate_documents.py
|
12 |
+
```
|
scripts/README.md.template
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
license_link: LICENSE
|
4 |
+
language:
|
5 |
+
- ja
|
6 |
+
- en
|
7 |
+
base_model:
|
8 |
+
- {base_model_id}
|
9 |
+
---
|
10 |
+
# {model_name}
|
11 |
+
|
12 |
+
本モデルは、国立研究開発法人 新エネルギー・産業技術総合開発機構(NEDO)の公募「[ポスト5G情報通信システム基盤強化研究開発事業/①ポスト5G情報通信システムの開発](https://www.nedo.go.jp/activities/ZZJP_100172.html)」および経済産業省が主催する「[Generative AI Accelerator Challenge(GENIAC)プロジェクト](https://www.meti.go.jp/policy/mono_info_service/geniac/index.html)」に採択された当富士通株式会社の提案事業[「論理推論を可能とする大規模言語モデルの研究開発」](https://pr.fujitsu.com/jp/news/2024/05/17.html)中に開発した、ナレッジグラフの生成/推論に特化した大規模言語モデル(LLM)の1つです。
|
13 |
+
|
14 |
+
同提案事業中に開発したモデルは、以下の表に一覧しています。
|
15 |
+
各モデルの評価結果や開発内容については、[富士通研究所の技術ブログ](https://blog.fltech.dev/entry/2024/10/15/Fujitsu-LLM-KG-ja)に詳細がありますので是非ご覧ください。
|
16 |
+
|
17 |
+
## Model Index
|
18 |
+
|
19 |
+
|モデル|名称|概要|
|
20 |
+
|---|---|---|
|
21 |
+
|[Fujitsu-LLM-KG-8x7B_cpt](https://huggingface.co/Fujitsu-LLM-KG/Fujitsu-LLM-KG-8x7B_cpt/)|共通事前学習済みLLM|ナレッジグラフ対訳コーパスで継続事前学習したLLM。|
|
22 |
+
|[Fujitsu-LLM-KG-8x7B_inst-infer_v1](https://huggingface.co/Fujitsu-LLM-KG/Fujitsu-LLM-KG-8x7B_inst-infer_v1/)|ナレッジグラフ推論LLM ver.1|日本語のマルチホップQAタスクデータで指示学習したLLM。|
|
23 |
+
|[Fujitsu-LLM-KG-8x7B_inst-infer_v2](https://huggingface.co/Fujitsu-LLM-KG/Fujitsu-LLM-KG-8x7B_inst-infer_v2/)|ナレッジグラフ推論LLM ver.2|英語のマルチホップQAタスクデータで指示学習したLLM。|
|
24 |
+
|[Fujitsu-LLM-KG-8x7B_inst-gen_ja](https://huggingface.co/Fujitsu-LLM-KG/Fujitsu-LLM-KG-8x7B_inst-gen_ja/)|ナレッジグラフ生成LLM(日本語版)|日本語の文書レベル関係抽出タスクデータで指示学習したLLM。|
|
25 |
+
|[Fujitsu-LLM-KG-8x7B_inst-gen_en](https://huggingface.co/Fujitsu-LLM-KG/Fujitsu-LLM-KG-8x7B_inst-gen_en/)|ナレッジグラフ生成LLM(英語版)|英語の文書レベル関係抽出タスクデータで指示学習したLLM。|
|
26 |
+
|
27 |
+
## Model Details
|
28 |
+
|
29 |
+
* **Developed by**: [Fujitsu-LLM](https://huggingface.co/Fujitsu-LLM/)
|
30 |
+
* **Base Model**: [{base_model_id}](https://huggingface.co/{base_model_id})
|
31 |
+
* **Language(s)**: Japanese, English
|
32 |
+
* **Library**: [NVIDIA/NeMo](https://github.com/NVIDIA/NeMo)
|
33 |
+
* **License**: [Apache-2.0](./LICENSE)
|
34 |
+
|
35 |
+
## Model Performance
|
36 |
+
|
37 |
+
* [富士通研究所の技術ブログ](https://blog.fltech.dev/entry/2024/10/15/Fujitsu-LLM-KG-ja)をご参照ください。
|
38 |
+
|
39 |
+
## How to use
|
40 |
+
|
41 |
+
### Preparation
|
42 |
+
|
43 |
+
必要なPythonモジュールをインストールする。
|
44 |
+
```sh
|
45 |
+
# Tested with the following versions; transformers==4.48.1, torch==2.5.1, and accelerate==1.3.0.
|
46 |
+
$ pip install transformers torch accelerate
|
47 |
+
```
|
48 |
+
|
49 |
+
ユーティリティを定義する。
|
50 |
+
```python
|
51 |
+
import torch
|
52 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
53 |
+
|
54 |
+
class Fujitsu_LLM_KG:
|
55 |
+
"""The Fujitsu-LLM-KG-8x7B model.
|
56 |
+
"""
|
57 |
+
def __init__(self, model_id: str, *, device_map: str = "auto") -> None:
|
58 |
+
"""Initializes the model and tokenizer.
|
59 |
+
"""
|
60 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
61 |
+
model_id,
|
62 |
+
device_map=device_map,
|
63 |
+
torch_dtype=torch.bfloat16,
|
64 |
+
low_cpu_mem_usage=True,
|
65 |
+
)
|
66 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
|
67 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
68 |
+
|
69 |
+
def generate(self, prompt:str,
|
70 |
+
*,
|
71 |
+
max_new_tokens: int = 2048,
|
72 |
+
num_beams: int = 1,
|
73 |
+
) -> str:
|
74 |
+
"""Generate an answer.
|
75 |
+
"""
|
76 |
+
tokenized = self.tokenizer(prompt, return_tensors="pt", padding=True)
|
77 |
+
with torch.no_grad():
|
78 |
+
outputs = self.model.generate(
|
79 |
+
tokenized["input_ids"].to("cuda"),
|
80 |
+
attention_mask=tokenized["attention_mask"].to("cuda"),
|
81 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
82 |
+
max_new_tokens=max_new_tokens,
|
83 |
+
do_sample=False,
|
84 |
+
num_beams=num_beams,
|
85 |
+
)
|
86 |
+
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):]
|
87 |
+
return answer
|
88 |
+
|
89 |
+
def extract_turtle(text: str, *, with_rationale = False) -> str:
|
90 |
+
"""Extracts the RDF Turtle part from the output text of the Fujitsu-LLM-KG-8x7B models.
|
91 |
+
"""
|
92 |
+
TOKENS = ["<", "rel:", "rdf:", "]"]
|
93 |
+
if with_rationale:
|
94 |
+
TOKENS.append("#@")
|
95 |
+
turtle = ""
|
96 |
+
for line in text.splitlines():
|
97 |
+
line_ = line.strip()
|
98 |
+
if line == "" or any(line_.startswith(c) for c in TOKENS):
|
99 |
+
if turtle:
|
100 |
+
turtle += "\n"
|
101 |
+
turtle += line
|
102 |
+
return turtle
|
103 |
+
```
|
104 |
+
|
105 |
+
モデルを読み込む。
|
106 |
+
```python
|
107 |
+
kgllm = Fujitsu_LLM_KG("Fujitsu-LLM-KG/{model_name}")
|
108 |
+
```
|
109 |
+
|
110 |
+
{how_to_use}
|
111 |
+
|
112 |
+
## Training Datasets
|
113 |
+
|
114 |
+
{train_datasets}
|
115 |
+
|
116 |
+
## License
|
117 |
+
{model_name}の利用規約(Terms of Use)は、[LICENSE](./LICENSE)ファイルに記載しております。
|
118 |
+
|
119 |
+
## Risks and Limitations
|
120 |
+
{model_name}を利用した処理結果には、虚偽、偏り、他者の権利を侵害する内容、利用者が期待する効果や有用性を満たさない内容が含まれる可能性があります。
|
121 |
+
|
122 |
+
## Acknowledgements
|
123 |
+
本モデルの開発は、ポスト5G情報通信システム基盤強化研究開発事業/①ポスト5G情報通信システムの開発 第0508001号の助成を受けたものです。
|
124 |
+
|
125 |
+
## Authors
|
126 |
+
* 富士通株式会社
|
scripts/generate_documents.py
ADDED
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Generator for the repositories of Fujitsu-LLM-KG-8x7B models.
|
2 |
+
"""
|
3 |
+
|
4 |
+
from typing import Literal
|
5 |
+
from dataclasses import dataclass
|
6 |
+
|
7 |
+
@dataclass(kw_only=True)
|
8 |
+
class ModelContent:
|
9 |
+
company_name = "Fujitsu-LLM-KG"
|
10 |
+
model_name: str
|
11 |
+
base_model_id: str
|
12 |
+
train_datasets: str
|
13 |
+
how_to_use: str
|
14 |
+
|
15 |
+
@property
|
16 |
+
def model_id(self) -> str:
|
17 |
+
return f"{self.company_name}/{self.model_name}"
|
18 |
+
|
19 |
+
|
20 |
+
cpt_model = ModelContent(
|
21 |
+
model_name="Fujitsu-LLM-KG-8x7B_cpt",
|
22 |
+
base_model_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
23 |
+
train_datasets="""
|
24 |
+
### Continual Pre-Training
|
25 |
+
|
26 |
+
* ナレッジグラフ対訳コーパス(日・英)
|
27 |
+
* *cf., [当社の技術ブログ](https://blog.fltech.dev/entry/2024/10/15/Fujitsu-LLM-KG-ja)*
|
28 |
+
* ナレッジグラフ関連ソースコード(日・英)
|
29 |
+
* *e.g., RDF Turtle、SPARQL、PlantUML、など*
|
30 |
+
* ナレッジグラフ関連コーパス(日・英)
|
31 |
+
* *"ナレッジグラフ"、"知識トリプル"、などでフィルタリング*
|
32 |
+
* Webクローリングデータ(日)
|
33 |
+
* Wikipediaデータ(日・英)
|
34 |
+
* 論文コーパス(日)
|
35 |
+
* 法律データ(日)
|
36 |
+
* 判例コーパス(日)
|
37 |
+
* 英語数学コーパス(英)
|
38 |
+
* 法律対訳コーパス(日英)
|
39 |
+
* 字幕対訳コーパス(日英)
|
40 |
+
""",
|
41 |
+
how_to_use='''
|
42 |
+
### Generates Knowledge Graph from Text
|
43 |
+
|
44 |
+
タスクを指示する。
|
45 |
+
````python
|
46 |
+
prompt = """
|
47 |
+
[INST]
|
48 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
49 |
+
|
50 |
+
## Source
|
51 |
+
```txt
|
52 |
+
宗像聡は富士通に2010年から勤めています。
|
53 |
+
彼はFujitsu-LLM-KG-8x7B_cptを開発しました。
|
54 |
+
```
|
55 |
+
|
56 |
+
## Strategy
|
57 |
+
Extract all verifiable facts in "Source" as knowledge triples.
|
58 |
+
[/INST]
|
59 |
+
""".strip()
|
60 |
+
|
61 |
+
generated = kgllm.generate(prompt)
|
62 |
+
print(generated)
|
63 |
+
````
|
64 |
+
|
65 |
+
結果を確認する。
|
66 |
+
````md
|
67 |
+
## Knowledge Graph
|
68 |
+
```turtle
|
69 |
+
#@rationale: 宗像聡は富士通に2010年から勤めています。
|
70 |
+
<#宗像聡>
|
71 |
+
rel:employer [
|
72 |
+
rdf:object <#富士通>;
|
73 |
+
rel:start_time <#2010>
|
74 |
+
].
|
75 |
+
|
76 |
+
#@rationale: 彼はFujitsu-LLM-KG-8x7B_cptを開発しました。
|
77 |
+
<#宗像聡>
|
78 |
+
rel:notable_work <#Fujitsu-LLM-KG-8x7B_cpt>.
|
79 |
+
```
|
80 |
+
````
|
81 |
+
|
82 |
+
### Generates Text from Knowledge Graph
|
83 |
+
|
84 |
+
タスクを指示する。
|
85 |
+
````python
|
86 |
+
prompt = """
|
87 |
+
Generate "Text" to explain the given knowledge triples in "Source".
|
88 |
+
|
89 |
+
## Source
|
90 |
+
```turtle
|
91 |
+
<#Satoshi Munakata>
|
92 |
+
rel:notable_work <#Fujitsu-LLM-KG-8x7B_cpt>;
|
93 |
+
rel:employer [
|
94 |
+
rdf:object <#Fujitsu>;
|
95 |
+
rel:start_time <#2010>
|
96 |
+
].
|
97 |
+
```
|
98 |
+
|
99 |
+
## Strategy
|
100 |
+
Explain the knowledge triples in "Source" without omission, but concisely and fluently.
|
101 |
+
[/INST]
|
102 |
+
""".strip()
|
103 |
+
|
104 |
+
generated = kgllm.generate(prompt)
|
105 |
+
print(generated)
|
106 |
+
````
|
107 |
+
|
108 |
+
結果を確認する。
|
109 |
+
````md
|
110 |
+
## Text
|
111 |
+
```txt
|
112 |
+
Satoshi Munakata, who started working for Fujitsu in 2010, is the creator of the notable work Fujitsu-LLM-KG-8x7B cpt.
|
113 |
+
```
|
114 |
+
````
|
115 |
+
''',
|
116 |
+
)
|
117 |
+
|
118 |
+
model_contents = [
|
119 |
+
cpt_model,
|
120 |
+
ModelContent(
|
121 |
+
model_name="Fujitsu-LLM-KG-8x7B_inst-infer_v1",
|
122 |
+
base_model_id=cpt_model.model_id,
|
123 |
+
train_datasets="""
|
124 |
+
### Instruction Tuning
|
125 |
+
|
126 |
+
* ナレッジグラフ推論用の指示学習データ ver.1
|
127 |
+
* [JEMHopQA](https://github.com/aiishii/JEMHopQA)から合成したタスクデータ
|
128 |
+
""",
|
129 |
+
how_to_use='''
|
130 |
+
### Generates Graph Schema
|
131 |
+
|
132 |
+
*(以下のタスク例は、[JEMHopQA](https://github.com/aiishii/JEMHopQA)を改変したもののため、[CC BY-SA 4.0ライセンス](https://creativecommons.org/licenses/by-sa/4.0/deed.en)が継承されます)*
|
133 |
+
|
134 |
+
タスクを指示する。
|
135 |
+
````python
|
136 |
+
question = "長岡京に遷都される前の都の所在地は現在の何県の何市にあたるでしょうか?"
|
137 |
+
|
138 |
+
prompt_schema = f"""
|
139 |
+
[INST]
|
140 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
141 |
+
|
142 |
+
## Source
|
143 |
+
```txt
|
144 |
+
{question}
|
145 |
+
```
|
146 |
+
|
147 |
+
## Strategy
|
148 |
+
Extract graph schema needed to answer the question in above "Source" as knowledge triples without omission.
|
149 |
+
[/INST]
|
150 |
+
"""
|
151 |
+
|
152 |
+
generated_schema = kgllm.generate(prompt_schema)
|
153 |
+
print(generated_schema)
|
154 |
+
````
|
155 |
+
|
156 |
+
結果を確認する。
|
157 |
+
````md
|
158 |
+
## Knowledge Graph
|
159 |
+
```turtle
|
160 |
+
<#長岡京>
|
161 |
+
rel:遷都される前の都 <#?>.
|
162 |
+
|
163 |
+
<#?>
|
164 |
+
rel:現在の県 <#?>;
|
165 |
+
rel:現在の市 <#?>.
|
166 |
+
```
|
167 |
+
````
|
168 |
+
|
169 |
+
### Resolves to Knowledge Graph
|
170 |
+
|
171 |
+
*(以下のタスク例は、[Wikipedia](https://ja.wikipedia.org/wiki/)を改変したもののため、[CC BY-SA 3.0ライセンス](https://creativecommons.org/licenses/by-sa/3.0/deed.en)が継承されます)*
|
172 |
+
|
173 |
+
タスクを指示する。
|
174 |
+
````python
|
175 |
+
prompt_kg = f"""
|
176 |
+
[INST]
|
177 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
178 |
+
|
179 |
+
## Source
|
180 |
+
```txt
|
181 |
+
平安京
|
182 |
+
|
183 |
+
平安京(へいあんきょう/たいらのみやこ)または平安城(へいあんじょう)は、日本における古代最後の宮都。794年(延暦13年)から1869年(明治2年)までの日本の首都。
|
184 |
+
桓武天皇により、長岡京に代わる都として山背国(山城国)愛宕・葛野の両郡にまたがる地が選ばれ、中国の洛陽城や長安城を模して793年(延暦12年)から建設された。翌794年(延暦13年)に遷都。北部中央に宮城・平安宮(大内裏)が建設され、以降歴代の皇居が置かれた。
|
185 |
+
遷都以来、平清盛により断行された福原遷都(1180年)の期間を除いて、東京奠都まで1100年近くに亘って都として機能し、1869年(明治2年)まで続いた。今日の京都市街が形成されるに至る。
|
186 |
+
----
|
187 |
+
平城京
|
188 |
+
|
189 |
+
平城京(へいじょうきょう/へいぜいきょう/ならのみやこ)は、奈良時代の日本の首都。710年に藤原京から遷都するにあたり、唐の都長安城を模倣して大和国に建造された都城。現在の奈良県奈良市、大和郡山市に存在する。
|
190 |
+
中央北域に宮城・平城宮(大内裏)を置き、東西8坊 (約 4.3 km) の面積をもち、中央を南北に走る朱雀大路によって左京・右京に二分され、さらに南北・東西を大路・小路によって碁盤の目のように整然と区画され、全域が72坊に区画設定されていた。
|
191 |
+
大阪湾や淡路島(八島のひとつ)にも近い奈良盆地(奈良県奈良市の西部の一部、中心部及び大和郡山市北部)には、5世紀中頃にはすでに天皇陵である佐紀盾列古墳群が作られ、またのちには大神神社、7世紀には興福寺も建立されているが、京となった8世紀には、東大寺や巨大な仏像である東大寺盧舎那仏像、法華寺などが建立された。本州の政治・文化の中心地となるに至って外京(げきょう)に位置した門前町が、今に続く奈良の町を形成する中心となった。
|
192 |
+
----
|
193 |
+
長岡京
|
194 |
+
|
195 |
+
長岡京(ながおかきょう)は、山城国乙訓郡にあった奈良時代末期(または平安時代初期)の都城(現在の京都府向日市、長岡京市、京都市西京区)。宮域跡は向日市鶏冠井町(かいでちょう)に位置し、「長岡宮跡」として国の史跡に指定されている。
|
196 |
+
延暦3年(784年)11月11日、第50代桓武天皇により平城京から遷都され、延暦13年(794年)10月22日に平安京に遷都されるまで機能した。
|
197 |
+
```
|
198 |
+
|
199 |
+
## Strategy
|
200 |
+
Extract all verifiable facts in above "Source", that match the following graph schema, as knowledge triples without omission.
|
201 |
+
```turtle
|
202 |
+
{extract_turtle(generated_schema)}
|
203 |
+
```
|
204 |
+
The extracted facts are needed to answer the question "{question}".
|
205 |
+
[/INST]
|
206 |
+
"""
|
207 |
+
|
208 |
+
generated_kg = kgllm.generate(prompt_kg)
|
209 |
+
print(generated_kg)
|
210 |
+
````
|
211 |
+
|
212 |
+
結果を確認する。
|
213 |
+
````md
|
214 |
+
## Knowledge Graph
|
215 |
+
```turtle
|
216 |
+
<#長岡京>
|
217 |
+
rel:遷都される前の都 <#平城京>.
|
218 |
+
|
219 |
+
<#平城京>
|
220 |
+
rel:現在の県 <#奈良県>;
|
221 |
+
rel:現在の市 <#奈良市>.
|
222 |
+
```
|
223 |
+
````
|
224 |
+
|
225 |
+
### Generate Final Answer
|
226 |
+
|
227 |
+
タスクを指示する。
|
228 |
+
````python
|
229 |
+
prompt_answer = f"""
|
230 |
+
[INST]
|
231 |
+
Explore "Knowledge Graph" entity-to-entity then finally answer "Question".
|
232 |
+
|
233 |
+
## Knowledge Graph
|
234 |
+
```turtle
|
235 |
+
{extract_turtle(generated_kg)}
|
236 |
+
```
|
237 |
+
|
238 |
+
## Question
|
239 |
+
{question}
|
240 |
+
|
241 |
+
## Strategy
|
242 |
+
Answer briefly in one line.
|
243 |
+
[/INST]
|
244 |
+
"""
|
245 |
+
|
246 |
+
generated_answer = kgllm.generate(prompt_answer)
|
247 |
+
print(generated_answer)
|
248 |
+
````
|
249 |
+
|
250 |
+
結果を確認する。
|
251 |
+
````md
|
252 |
+
## Explore Path
|
253 |
+
```path
|
254 |
+
長岡京 → rel:遷都される前の都 → 平城京
|
255 |
+
平城京 → rel:現在の県 → 奈良県
|
256 |
+
平城京 → rel:現在の市 → 奈良市
|
257 |
+
```
|
258 |
+
|
259 |
+
## Answer
|
260 |
+
```txt
|
261 |
+
奈良県奈良市
|
262 |
+
```
|
263 |
+
````
|
264 |
+
''',
|
265 |
+
),
|
266 |
+
ModelContent(
|
267 |
+
model_name="Fujitsu-LLM-KG-8x7B_inst-infer_v2",
|
268 |
+
base_model_id=cpt_model.model_id,
|
269 |
+
train_datasets="""
|
270 |
+
### Instruction Tuning
|
271 |
+
|
272 |
+
* ナレッジグラフ推論用の指示学習データ ver.2
|
273 |
+
* [HotpotQA](https://hotpotqa.github.io/)から合成したタスクデータ
|
274 |
+
""",
|
275 |
+
how_to_use='''
|
276 |
+
### Generates Graph Schema
|
277 |
+
|
278 |
+
タスクを指示する。
|
279 |
+
````python
|
280 |
+
question = "お正月とお盆とでは、どちらがより長期間の休日ですか?"
|
281 |
+
|
282 |
+
prompt_schema = f"""
|
283 |
+
[INST]
|
284 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
285 |
+
|
286 |
+
## Source
|
287 |
+
```txt
|
288 |
+
{question}
|
289 |
+
```
|
290 |
+
|
291 |
+
## Strategy
|
292 |
+
Extract graph schema needed to answer the question in above "Source" as knowledge triples without omission.
|
293 |
+
[/INST]
|
294 |
+
""".strip()
|
295 |
+
|
296 |
+
generated_schema = kgllm.generate(prompt_schema)
|
297 |
+
print(generated_schema)
|
298 |
+
````
|
299 |
+
|
300 |
+
結果を確認する。
|
301 |
+
````md
|
302 |
+
## Knowledge Graph
|
303 |
+
```turtle
|
304 |
+
<#お正月>
|
305 |
+
rel:has_duration <?duration_of_new_year>.
|
306 |
+
<#お盆>
|
307 |
+
rel:has_duration <?duration_of_obon>.
|
308 |
+
<#Answer>
|
309 |
+
rel:is <#お正月>;
|
310 |
+
rel:when [
|
311 |
+
<?duration_of_new_year>
|
312 |
+
rel:greater_than <?duration_of_obon>.
|
313 |
+
];
|
314 |
+
rel:else <#お盆>.
|
315 |
+
```
|
316 |
+
````
|
317 |
+
|
318 |
+
### Resolves to Knowledge Graph
|
319 |
+
|
320 |
+
*(以下のタスク例は、厚生労働省が公開している[モデル就業規則](https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/koyou_roudou/roudoukijun/zigyonushi/model/index.html)を抜粋・改変したものです。)*
|
321 |
+
|
322 |
+
タスクを指示する。
|
323 |
+
````python
|
324 |
+
prompt_kg = f"""
|
325 |
+
[INST]
|
326 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
327 |
+
|
328 |
+
## Source
|
329 |
+
```txt
|
330 |
+
Title: 第19条「労働時間及び休憩時間」
|
331 |
+
|
332 |
+
1 労働者代表と1年単位の変形労働時間制に関する労使協定を締結した場合、当該協定の適用を受ける労働者について、1週間の所定労働時間は、対象期間を平均して1週間当たり40時間とする。
|
333 |
+
2 1年単位の変形労働時間制を適用しない労働者について、1週間の所定労働時間は40時間、1日の所定労働時間は8時間とする。
|
334 |
+
|
335 |
+
----
|
336 |
+
Title: 第20条「休日」
|
337 |
+
|
338 |
+
1 1年単位の変形労働時間制の適用を受ける労働者の休日については、1年単位の変形労働時間制に関する労使協定の定めるところにより、対象期間の初日を起算日とする1週間ごとに1日以上、1年間に125日以上となるように指定する。その場合、年間休日カレンダーに定め、対象期間の初日の30日前までに各労働者に通知する。
|
339 |
+
2 1年単位の変形労働時間制を適用しない労働者の休日については、以下のとおり指定し、月間休日カレンダーに定め、対象期間の初日の30日前までに各労働者に通知する。
|
340 |
+
①日曜日(前条第3号の特定期間を除く。)
|
341 |
+
②国民の祝日(日曜日と重なったときは翌日)
|
342 |
+
③年末年始(1月1日~1月3日)
|
343 |
+
④夏季休日(8月13日~8月16日)
|
344 |
+
⑤その他会社が指定する日
|
345 |
+
|
346 |
+
----
|
347 |
+
Title: 第21条「時間外及び休日労働等」
|
348 |
+
|
349 |
+
1 業務の都合により、第19条の所定労働時間を超え、又は第20条の所定休日に労働させることがある。
|
350 |
+
2 ��項の場合、法定労働時間を超える労働又は法定休日における労働については、あらかじめ会社は労働者の過半数代表者と書面による労使協定を締結するとともに、これを所轄の労働基準監督署長に届け出るものとする。
|
351 |
+
3 妊娠中の女性、産後1年を経過しない女性労働者(以下「妊産婦」という)であって請求した者及び18歳未満の者については、第2項による時間外労働又は休日若しくは深夜(午後10時から午前5時まで)労働に従事させない。
|
352 |
+
4 災害その他避けることのできない事由によって臨時の必要がある場合には、第1項から前項までの制限を超えて、所定労働時間外又は休日に労働させることがある。ただし、この場合であっても、請求のあった妊産婦については、所定労働時間外労働又は休日労働に従事させない。
|
353 |
+
|
354 |
+
----
|
355 |
+
Title: 第26条「育児時間」
|
356 |
+
|
357 |
+
1 1歳に満たない子を養育する女性労働者から請求があったときは、休憩時間のほか1日について2回、1回について30分の育児時間を与える。
|
358 |
+
```
|
359 |
+
|
360 |
+
## Strategy
|
361 |
+
Extract all verifiable facts in above "Source", that match the following graph schema, as knowledge triples.
|
362 |
+
```turtle
|
363 |
+
{extract_turtle(generated_schema)}
|
364 |
+
```
|
365 |
+
The extracted facts are needed to answer the question "{question}".
|
366 |
+
However, if no useful facts are found in the above "Source", do not output any triples.
|
367 |
+
[/INST]
|
368 |
+
""".strip()
|
369 |
+
|
370 |
+
generated_kg = kgllm.generate(prompt_kg)
|
371 |
+
print(generated_kg)
|
372 |
+
````
|
373 |
+
|
374 |
+
結果を確認する。
|
375 |
+
````md
|
376 |
+
## Knowledge Graph
|
377 |
+
```turtle
|
378 |
+
<#お正月>
|
379 |
+
#@rationale: Title: 第20条「休日」 - ③年末年始(1月1日~1月3日)
|
380 |
+
rel:has_duration <#3>;
|
381 |
+
<#お盆>
|
382 |
+
#@rationale: Title: 第20条「休日」 - ④夏季休日(8月13日~8月16日)
|
383 |
+
rel:has_duration <#4>.
|
384 |
+
<#Answer>
|
385 |
+
rel:is <#お正月>;
|
386 |
+
rel:when [
|
387 |
+
<#3>
|
388 |
+
rel:greater_than <#4>.
|
389 |
+
];
|
390 |
+
rel:else <#お盆>.
|
391 |
+
```
|
392 |
+
````
|
393 |
+
|
394 |
+
### Generate Final Answer
|
395 |
+
|
396 |
+
タスクを指示する。
|
397 |
+
````python
|
398 |
+
prompt_answer = f"""
|
399 |
+
[INST]
|
400 |
+
Explore "Knowledge Graph" entity-to-entity then finally answer "Question".
|
401 |
+
|
402 |
+
## Knowledge Graph
|
403 |
+
```turtle
|
404 |
+
{extract_turtle(generated_kg)}
|
405 |
+
```
|
406 |
+
|
407 |
+
## Question
|
408 |
+
{question}
|
409 |
+
|
410 |
+
## Strategy
|
411 |
+
Answer briefly in one line.
|
412 |
+
[/INST]
|
413 |
+
""".strip()
|
414 |
+
|
415 |
+
generated_answer = kgllm.generate(prompt_answer)
|
416 |
+
print(generated_answer)
|
417 |
+
````
|
418 |
+
|
419 |
+
結果を確認する。
|
420 |
+
````md
|
421 |
+
## Explore Path
|
422 |
+
```path
|
423 |
+
お正月は3日間の休日です.
|
424 |
+
お盆は4日間の休日です.
|
425 |
+
Answer is 4 because 4 is greater than 3.
|
426 |
+
```
|
427 |
+
|
428 |
+
## Answer
|
429 |
+
```txt
|
430 |
+
お盆
|
431 |
+
```
|
432 |
+
````
|
433 |
+
''',
|
434 |
+
),
|
435 |
+
ModelContent(
|
436 |
+
model_name="Fujitsu-LLM-KG-8x7B_inst-gen_ja",
|
437 |
+
base_model_id=cpt_model.model_id,
|
438 |
+
train_datasets="""
|
439 |
+
### Instruction Tuning
|
440 |
+
|
441 |
+
* ナレッジグラフ生成用の指示学習データ 日本語版
|
442 |
+
* [JacRED](https://github.com/YoumiMa/JacRED)から合成したタスクデータ
|
443 |
+
""",
|
444 |
+
how_to_use='''
|
445 |
+
### Generates Knowledge Graph from Sentences
|
446 |
+
|
447 |
+
*(以下のタスク例は、[Wikipedia](https://ja.wikipedia.org/wiki/)を改変したもののため、[CC BY-SA 3.0ライセンス](https://creativecommons.org/licenses/by-sa/3.0/deed.en)が継承されます)*
|
448 |
+
|
449 |
+
タスクを指示する。
|
450 |
+
````python
|
451 |
+
prompt = """
|
452 |
+
[INST]
|
453 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
454 |
+
|
455 |
+
## Source
|
456 |
+
```txt
|
457 |
+
s0. <#マリー・テレーズ・ドートリッシュ:Person[0]>(<#1638 年 9 月 10 日:Date[6]>日-<#1683 年 7月 30 日:Date[7]>)は、<#フランス:Location[1]>王<#ルイ 14 世:Person[2]>の王妃。
|
458 |
+
s1. 父は<#スペイン:Location[3]>王<#フェリペ 4 世:Person[4]>、母は<#フランス:Location[1]>王<#アンリ 4 世:Person[8]>と<#マリー・ド・メディシス:Person[9]>の娘<#イサベル・デ・ボルボン:Person[5]>。
|
459 |
+
s2. <#スペイン:Location[3]>名は<#マリア・テレサ:Person[0]>。
|
460 |
+
s3. <#ルイ 14 世:Person[2]>の父<#ルイ 13 世:Person[10]>は<#イサベル:Person[5]>の兄、母<#アンヌ・ドートリッシュ:Person[11]>は<#フェリペ 4 世:Person[4]>の姉であり、<#ルイ 14世:Person[2]>と<#マリー・テレーズ:Person[0]>とは父方・母方双方で従兄妹に当たる。
|
461 |
+
s4. <#神聖ローマ:Location[12]>皇帝<#レオポルト 1 世:Person[13]>の皇后<#マルガリータ・テレサ:Person[14]>は異母妹、<#スペイン:Location[3]>・<#ハプスブルク家:Organization[15]>最後の王<#カルロス 2世:Person[16]>は異母弟である。
|
462 |
+
```
|
463 |
+
## Strategy
|
464 |
+
Extract all verifiable facts about relation "CountryOfCitizenship" in "Source" as knowledge triples.
|
465 |
+
* Don't speculate, extract only the facts as stated in "Source" **WITHOUT OMMISION**.
|
466 |
+
The relation "CountryOfCitizenship" (P27) means "the object is a country that recognizes the subject as its citizen".
|
467 |
+
* The existence of a relation is restricted by the type of subject and object. You must select subjects and objects of triples from the bracketed eigenexpressions, such as "<#name:type[id]>", without modification.
|
468 |
+
[/INST]
|
469 |
+
""".strip()
|
470 |
+
|
471 |
+
generated = kgllm.generate(prompt)
|
472 |
+
print(generated)
|
473 |
+
````
|
474 |
+
|
475 |
+
結果を確認する。
|
476 |
+
````md
|
477 |
+
## Knowledge Graph
|
478 |
+
```turtle
|
479 |
+
#@rationale: s0. <#マリー・テレーズ・ドートリッシュ:Person[0]>(<#1638 年 9 月 10 日:Date[6]>日-<#1683 年 7月 30 日:Date[7]>)は、<#フランス:Location[1]>王<#ルイ 14 世:Person[2]>の王妃。
|
480 |
+
|
481 |
+
<#ルイ 14 世:Person[2]>
|
482 |
+
rel:CountryOfCitizenship <#フランス:Location[1]>.
|
483 |
+
|
484 |
+
#@rationale: s1. 父は<#スペイン:Location[3]>王<#フェリペ 4 世:Person[4]>、母は<#フランス:Location[1]>王<#アンリ 4 世:Person[8]>と<#マリー・ド・メディシス:Person[9]>の娘<#イサベル・デ・ボルボン:Person[5]>。
|
485 |
+
|
486 |
+
<#フェリペ 4 世:Person[4]>
|
487 |
+
rel:CountryOfCitizenship <#スペイン:Location[3]>.
|
488 |
+
|
489 |
+
<#アンリ 4 世:Person[8]>
|
490 |
+
rel:CountryOfCitizenship <#フランス:Location[1]>.
|
491 |
+
|
492 |
+
<#イサベル・デ・ボルボン:Person[5]>
|
493 |
+
rel:CountryOfCitizenship <#フランス:Location[1]>.
|
494 |
+
|
495 |
+
#@rationale: s2. <#スペイン:Location[3]>名は<#マリア・テレサ:Person[0]>。
|
496 |
+
|
497 |
+
#@rationale: s3. <#ルイ 14 世:Person[2]>の父<#ルイ 13 世:Person[10]>は<#イサベル:Person[5]>の兄、母<#アンヌ・ドートリッシュ:Person[11]>は<#フェリペ 4 世:Person[4]>の姉であり、<#ルイ 14世:Person[2]>と<#マリー・テレーズ:Person[0]>とは父方・母方双方で従兄妹に当たる。
|
498 |
+
|
499 |
+
<#ルイ 13 世:Person[10]>
|
500 |
+
rel:CountryOfCitizenship <#フランス:Location[1]>.
|
501 |
+
|
502 |
+
<#アンヌ・ドートリッシュ:Person[11]>
|
503 |
+
rel:CountryOfCitizenship <#フランス:Location[1]>.
|
504 |
+
|
505 |
+
#@rationale: s4. <#神聖ローマ:Location[12]>皇帝<#レオポルト 1 世:Person[13]>の皇后<#マルガリータ・テレサ:Person[14]>は異母妹、<#スペイン:Location[3]>・<#ハプスブルク家:Organization[15]>最後の王<#カルロス 2世:Person[16]>は異母弟である。
|
506 |
+
|
507 |
+
<#カルロス 2世:Person[16]>
|
508 |
+
rel:CountryOfCitizenship <#スペイン:Location[3]>.
|
509 |
+
```
|
510 |
+
````
|
511 |
+
''',
|
512 |
+
),
|
513 |
+
ModelContent(
|
514 |
+
model_name="Fujitsu-LLM-KG-8x7B_inst-gen_en",
|
515 |
+
base_model_id=cpt_model.model_id,
|
516 |
+
train_datasets="""
|
517 |
+
### Instruction Tuning
|
518 |
+
|
519 |
+
* ナレッジグラフ生成用の指示学習データ 英語版
|
520 |
+
* [Re-DocRED](https://github.com/tonytan48/Re-DocRED)から合成したタスクデータ
|
521 |
+
""",
|
522 |
+
how_to_use='''
|
523 |
+
### Generates Knowledge Graph from Sentences
|
524 |
+
|
525 |
+
*(以下のタスク例は、[Wikipedia](https://en.wikipedia.org/wiki/)を改変したもののため、[CC BY-SA 3.0ライセンス](https://creativecommons.org/licenses/by-sa/3.0/deed.en)が継承されます)*
|
526 |
+
|
527 |
+
タスクを指示する。
|
528 |
+
````python
|
529 |
+
prompt = """
|
530 |
+
[INST]
|
531 |
+
Generate "Knowledge Graph" in RDF Turtle format based on the given "Source".
|
532 |
+
|
533 |
+
## Source
|
534 |
+
```txt
|
535 |
+
s0. <#John Gregory Myre:Person[0]> is an <#American:Location[1]> journalist and <#NPR:Organization[2]>'s digital editor for international news.
|
536 |
+
s1. Previously, he was a foreign correspondent for The <#Associated Press:Organization[3]> and The <#New York Times:Organization[4]> for <#20 years:Time[5]>.
|
537 |
+
s2. He reported from <#Jerusalem:Location[6]> from <#2003:Time[7]> to <#2007:Time[8]> for The <#New York Times:Organization[4]>.
|
538 |
+
s3. With the <#AP:Organization[3]>, <#Myre:Person[0]> was based in <#Jerusalem:Location[6]>, <#Moscow:Location[9]>, <#Nicosia:Location[10]>, <#Islamabad:Location[11]> and <#Johannesburg:Location[12]>.
|
539 |
+
s4. He covered many major international stories, including <#Nelson Mandela:Person[13]>'s release from prison in <#1990:Time[14]>, the <#Gulf War:Miscellaneous[15]> of <#1991:Time[16]>, the rise of the <#Taliban:Organization[17]>, the early days of <#Vladimir Putin:Person[18]> as <#Russia:Location[19]>'s leader, the <#Israeli:Location[20]>-<#Palestinian:Location[21]> fighting that erupted in <#2000:Time[22]> and the <#U.S.:Location[23]> war in <#Afghanistan:Location[24]>.
|
540 |
+
s5. He is also a scholar at the <#Middle East Institute:Organization[25]> in <#Washington:Location[26]>.
|
541 |
+
```
|
542 |
+
|
543 |
+
## Strategy
|
544 |
+
Extract all verifiable facts about relation "country_of_citizenship" in "Source" as knowledge triples.
|
545 |
+
* Don't speculate, extract only the facts as stated in "Source" **WITHOUT OMMISION**.
|
546 |
+
The relation "country_of_citizenship" (P27) means "the object is a country that recognizes the subject as its citizen".
|
547 |
+
* The existence of a relation is restricted by the type of subject and object.
|
548 |
+
You must select subjects and objects of triples from the bracketed eigenexpressions, such as "<#name:type[id]>", without modification.
|
549 |
+
[/INST]
|
550 |
+
""".strip()
|
551 |
+
|
552 |
+
generated = kgllm.generate(prompt)
|
553 |
+
print(generated)
|
554 |
+
```
|
555 |
+
````
|
556 |
+
|
557 |
+
結果を確認する。
|
558 |
+
````md
|
559 |
+
## Knowledge Graph
|
560 |
+
```turtle
|
561 |
+
#@rationale: s0. <#John Gregory Myre:Person[0]> is an <#American:Location[1]> journalist and <#NPR:Organization[2]>'s digital editor for international news.
|
562 |
+
|
563 |
+
<#John Gregory Myre:Person[0]>
|
564 |
+
rel:country_of_citizenship <#American:Location[1]>.
|
565 |
+
|
566 |
+
#@rationale: s1. Previously, he was a foreign correspondent for The <#Associated Press:Organization[3]> and The <#New York Times:Organization[4]> for <#20 years:Time[5]>.
|
567 |
+
|
568 |
+
#@rationale: s2. He reported from <#Jerusalem:Location[6]> from <#2003:Time[7]> to <#2007:Time[8]> for The <#New York Times:Organization[4]>.
|
569 |
+
|
570 |
+
#@rationale: s3. With the <#AP:Organization[3]>, <#Myre:Person[0]> was based in <#Jerusalem:Location[6]>, <#Moscow:Location[9]>, <#Nicosia:Location[10]>, <#Islamabad:Location[11]> and <#Johannesburg:Location[12]>.
|
571 |
+
|
572 |
+
#@rationale: s4. He covered many major international stories, including <#Nelson Mandela:Person[13]>'s release from prison in <#1990:Time[14]>, the <#Gulf War:Miscellaneous[15]> of <#1991:Time[16]>, the rise of the <#Taliban:Organization[17]>, the early days of <#Vladimir Putin:Person[18]> as <#Russia:Location[19]>'s leader, the <#Israeli:Location[20]>-<#Palestinian:Location[21]> fighting that erupted in <#2000:Time[22]> and the <#U.S.:Location[23]> war in <#Afghanistan:Location[24]>.
|
573 |
+
|
574 |
+
<#Vladimir Putin:Person[18]>
|
575 |
+
rel:country_of_citizenship <#Russia:Location[19]>.
|
576 |
+
|
577 |
+
#@rationale: s5. He is also a scholar at the <#Middle East Institute:Organization[25]> in <#Washington:Location[26]>.
|
578 |
+
```
|
579 |
+
````
|
580 |
+
''',
|
581 |
+
),
|
582 |
+
]
|
583 |
+
|
584 |
+
|
585 |
+
def generate_document(mc: ModelContent, *, kind: Literal["README.md", "LICENSE"]):
|
586 |
+
"""Generate REDAME.md or LICENSE.
|
587 |
+
"""
|
588 |
+
with open(f"./{kind}.template", "r") as f:
|
589 |
+
template = f.read()
|
590 |
+
|
591 |
+
file_text = template.format(**dict(
|
592 |
+
model_name=mc.model_name,
|
593 |
+
base_model_id=mc.base_model_id,
|
594 |
+
train_datasets=mc.train_datasets.replace(" ", "").strip(),
|
595 |
+
how_to_use=mc.how_to_use.replace(" ", "").strip(),
|
596 |
+
))
|
597 |
+
file_path = f"../../{mc.model_name}/{kind}"
|
598 |
+
|
599 |
+
with open(file_path, "w") as f:
|
600 |
+
f.write(file_text)
|
601 |
+
print(f"Output to {file_path}")
|
602 |
+
|
603 |
+
|
604 |
+
if __name__ == "__main__":
|
605 |
+
for mc in model_contents:
|
606 |
+
generate_document(mc, kind="README.md")
|
607 |
+
generate_document(mc, kind="LICENSE")
|
scripts/kgllm_utils.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Utilities for the Fujitsu-LLM-KG-8x7B models.
|
2 |
+
"""
|
3 |
+
|
4 |
+
from typing import Literal, Sequence, Tuple
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
8 |
+
|
9 |
+
###############################################################################
|
10 |
+
# Generation
|
11 |
+
###############################################################################
|
12 |
+
|
13 |
+
class Fujitsu_LLM_KG:
|
14 |
+
"""The Fujitsu-LLM-KG-8x7B model.
|
15 |
+
"""
|
16 |
+
def __init__(self, model_id: str, *, device_map: str = "auto") -> None:
|
17 |
+
"""Initializes the model and tokenizer.
|
18 |
+
"""
|
19 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
20 |
+
model_id,
|
21 |
+
device_map=device_map,
|
22 |
+
torch_dtype=torch.bfloat16,
|
23 |
+
low_cpu_mem_usage=True,
|
24 |
+
)
|
25 |
+
self.tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
|
26 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
27 |
+
|
28 |
+
def generate(self, prompt:str,
|
29 |
+
*,
|
30 |
+
max_new_tokens: int = 2048,
|
31 |
+
num_beams: int = 1,
|
32 |
+
) -> str:
|
33 |
+
"""Generate an answer.
|
34 |
+
"""
|
35 |
+
tokenized = self.tokenizer(prompt, return_tensors="pt", padding=True)
|
36 |
+
with torch.no_grad():
|
37 |
+
outputs = self.model.generate(
|
38 |
+
tokenized["input_ids"].to("cuda"),
|
39 |
+
attention_mask=tokenized["attention_mask"].to("cuda"),
|
40 |
+
pad_token_id=self.tokenizer.eos_token_id,
|
41 |
+
max_new_tokens=max_new_tokens,
|
42 |
+
do_sample=False,
|
43 |
+
num_beams=num_beams,
|
44 |
+
)
|
45 |
+
answer = self.tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):]
|
46 |
+
return answer
|
47 |
+
|
48 |
+
|
49 |
+
###############################################################################
|
50 |
+
# Extraction
|
51 |
+
###############################################################################
|
52 |
+
|
53 |
+
def extract_turtle(text: str, *, with_rationale = False) -> str:
|
54 |
+
"""Extracts the RDF Turtle part from the output text of Fujitsu-LLM-KG-8x7B_inst-infer model.
|
55 |
+
"""
|
56 |
+
TOKENS = ["<", "rel:", "rdf:", "]"]
|
57 |
+
if with_rationale:
|
58 |
+
TOKENS.append("#@")
|
59 |
+
turtle = ""
|
60 |
+
for line in text.splitlines():
|
61 |
+
line_ = line.strip()
|
62 |
+
if line == "" or any(line_.startswith(c) for c in TOKENS):
|
63 |
+
if turtle:
|
64 |
+
turtle += "\n"
|
65 |
+
turtle += line
|
66 |
+
return turtle
|
67 |
+
|
68 |
+
|
69 |
+
def extract_answer(text: str) -> Tuple[str, Sequence[str]]:
|
70 |
+
"""Extracts the final answer part from the output text of Fujitsu-LLM-KG-8x7B_inst-infer model.
|
71 |
+
"""
|
72 |
+
path = []
|
73 |
+
answer = ""
|
74 |
+
state: Literal["path", "answer"] = "path"
|
75 |
+
for line in text.splitlines():
|
76 |
+
if line.strip() and "```" not in line and "## " not in line:
|
77 |
+
if state == "path":
|
78 |
+
path.append(line)
|
79 |
+
elif state == "answer":
|
80 |
+
if answer:
|
81 |
+
answer += "\n"
|
82 |
+
answer += line
|
83 |
+
|
84 |
+
if "## Explore Path" in line:
|
85 |
+
state = "path"
|
86 |
+
path = []
|
87 |
+
elif "## Answer" in line:
|
88 |
+
state = "answer"
|
89 |
+
answer = ""
|
90 |
+
elif "```" in line and answer:
|
91 |
+
break
|
92 |
+
path = tuple(p.strip() for p in path)
|
93 |
+
answer = answer.strip()
|
94 |
+
return answer, path
|
scripts/test_kgllm.ipynb
ADDED
@@ -0,0 +1,850 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"metadata": {},
|
6 |
+
"source": [
|
7 |
+
"# KGLLMの動作例を作成する"
|
8 |
+
]
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [
|
15 |
+
{
|
16 |
+
"name": "stdout",
|
17 |
+
"output_type": "stream",
|
18 |
+
"text": [
|
19 |
+
"Collecting transformers\n",
|
20 |
+
" Using cached transformers-4.48.1-py3-none-any.whl.metadata (44 kB)\n",
|
21 |
+
"Collecting torch\n",
|
22 |
+
" Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl.metadata (28 kB)\n",
|
23 |
+
"Collecting accelerate\n",
|
24 |
+
" Using cached accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)\n",
|
25 |
+
"Collecting filelock (from transformers)\n",
|
26 |
+
" Using cached filelock-3.17.0-py3-none-any.whl.metadata (2.9 kB)\n",
|
27 |
+
"Collecting huggingface-hub<1.0,>=0.24.0 (from transformers)\n",
|
28 |
+
" Using cached huggingface_hub-0.27.1-py3-none-any.whl.metadata (13 kB)\n",
|
29 |
+
"Collecting numpy>=1.17 (from transformers)\n",
|
30 |
+
" Using cached numpy-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)\n",
|
31 |
+
"Requirement already satisfied: packaging>=20.0 in /home/mnktsts/repo/Fujitsu-LLM-KG/README/.venv/lib/python3.10/site-packages (from transformers) (24.2)\n",
|
32 |
+
"Collecting pyyaml>=5.1 (from transformers)\n",
|
33 |
+
" Using cached PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.1 kB)\n",
|
34 |
+
"Collecting regex!=2019.12.17 (from transformers)\n",
|
35 |
+
" Using cached regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)\n",
|
36 |
+
"Collecting requests (from transformers)\n",
|
37 |
+
" Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)\n",
|
38 |
+
"Collecting tokenizers<0.22,>=0.21 (from transformers)\n",
|
39 |
+
" Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n",
|
40 |
+
"Collecting safetensors>=0.4.1 (from transformers)\n",
|
41 |
+
" Using cached safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
|
42 |
+
"Collecting tqdm>=4.27 (from transformers)\n",
|
43 |
+
" Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)\n",
|
44 |
+
"Requirement already satisfied: typing-extensions>=4.8.0 in /home/mnktsts/repo/Fujitsu-LLM-KG/README/.venv/lib/python3.10/site-packages (from torch) (4.12.2)\n",
|
45 |
+
"Collecting networkx (from torch)\n",
|
46 |
+
" Using cached networkx-3.4.2-py3-none-any.whl.metadata (6.3 kB)\n",
|
47 |
+
"Collecting jinja2 (from torch)\n",
|
48 |
+
" Using cached jinja2-3.1.5-py3-none-any.whl.metadata (2.6 kB)\n",
|
49 |
+
"Collecting fsspec (from torch)\n",
|
50 |
+
" Using cached fsspec-2024.12.0-py3-none-any.whl.metadata (11 kB)\n",
|
51 |
+
"Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n",
|
52 |
+
" Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
53 |
+
"Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n",
|
54 |
+
" Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
55 |
+
"Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n",
|
56 |
+
" Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
|
57 |
+
"Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n",
|
58 |
+
" Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
|
59 |
+
"Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n",
|
60 |
+
" Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
61 |
+
"Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n",
|
62 |
+
" Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
63 |
+
"Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n",
|
64 |
+
" Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
65 |
+
"Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n",
|
66 |
+
" Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
|
67 |
+
"Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n",
|
68 |
+
" Using cached nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n",
|
69 |
+
"Collecting nvidia-nccl-cu12==2.21.5 (from torch)\n",
|
70 |
+
" Using cached nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n",
|
71 |
+
"Collecting nvidia-nvtx-cu12==12.4.127 (from torch)\n",
|
72 |
+
" Using cached nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.7 kB)\n",
|
73 |
+
"Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n",
|
74 |
+
" Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n",
|
75 |
+
"Collecting triton==3.1.0 (from torch)\n",
|
76 |
+
" Using cached triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)\n",
|
77 |
+
"Collecting sympy==1.13.1 (from torch)\n",
|
78 |
+
" Using cached sympy-1.13.1-py3-none-any.whl.metadata (12 kB)\n",
|
79 |
+
"Collecting mpmath<1.4,>=1.1.0 (from sympy==1.13.1->torch)\n",
|
80 |
+
" Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\n",
|
81 |
+
"Requirement already satisfied: psutil in /home/mnktsts/repo/Fujitsu-LLM-KG/README/.venv/lib/python3.10/site-packages (from accelerate) (6.1.1)\n",
|
82 |
+
"Collecting MarkupSafe>=2.0 (from jinja2->torch)\n",
|
83 |
+
" Using cached MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.0 kB)\n",
|
84 |
+
"Collecting charset-normalizer<4,>=2 (from requests->transformers)\n",
|
85 |
+
" Using cached charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (35 kB)\n",
|
86 |
+
"Collecting idna<4,>=2.5 (from requests->transformers)\n",
|
87 |
+
" Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)\n",
|
88 |
+
"Collecting urllib3<3,>=1.21.1 (from requests->transformers)\n",
|
89 |
+
" Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)\n",
|
90 |
+
"Collecting certifi>=2017.4.17 (from requests->transformers)\n",
|
91 |
+
" Using cached certifi-2024.12.14-py3-none-any.whl.metadata (2.3 kB)\n",
|
92 |
+
"Using cached transformers-4.48.1-py3-none-any.whl (9.7 MB)\n",
|
93 |
+
"Using cached torch-2.5.1-cp310-cp310-manylinux1_x86_64.whl (906.4 MB)\n",
|
94 |
+
"Using cached nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n",
|
95 |
+
"Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n",
|
96 |
+
"Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n",
|
97 |
+
"Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n",
|
98 |
+
"Using cached nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n",
|
99 |
+
"Using cached nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n",
|
100 |
+
"Using cached nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n",
|
101 |
+
"Using cached nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n",
|
102 |
+
"Using cached nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n",
|
103 |
+
"Using cached nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl (188.7 MB)\n",
|
104 |
+
"Using cached nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n",
|
105 |
+
"Using cached nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (99 kB)\n",
|
106 |
+
"Using cached sympy-1.13.1-py3-none-any.whl (6.2 MB)\n",
|
107 |
+
"Using cached triton-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (209.5 MB)\n",
|
108 |
+
"Using cached accelerate-1.3.0-py3-none-any.whl (336 kB)\n",
|
109 |
+
"Using cached huggingface_hub-0.27.1-py3-none-any.whl (450 kB)\n",
|
110 |
+
"Using cached fsspec-2024.12.0-py3-none-any.whl (183 kB)\n",
|
111 |
+
"Using cached numpy-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)\n",
|
112 |
+
"Using cached PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (751 kB)\n",
|
113 |
+
"Using cached regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (781 kB)\n",
|
114 |
+
"Using cached safetensors-0.5.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (461 kB)\n",
|
115 |
+
"Using cached tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)\n",
|
116 |
+
"Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)\n",
|
117 |
+
"Using cached filelock-3.17.0-py3-none-any.whl (16 kB)\n",
|
118 |
+
"Using cached jinja2-3.1.5-py3-none-any.whl (134 kB)\n",
|
119 |
+
"Using cached networkx-3.4.2-py3-none-any.whl (1.7 MB)\n",
|
120 |
+
"Using cached requests-2.32.3-py3-none-any.whl (64 kB)\n",
|
121 |
+
"Using cached certifi-2024.12.14-py3-none-any.whl (164 kB)\n",
|
122 |
+
"Using cached charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (146 kB)\n",
|
123 |
+
"Using cached idna-3.10-py3-none-any.whl (70 kB)\n",
|
124 |
+
"Using cached MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20 kB)\n",
|
125 |
+
"Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
|
126 |
+
"Using cached urllib3-2.3.0-py3-none-any.whl (128 kB)\n",
|
127 |
+
"Installing collected packages: mpmath, urllib3, tqdm, sympy, safetensors, regex, pyyaml, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, numpy, networkx, MarkupSafe, idna, fsspec, filelock, charset-normalizer, certifi, triton, requests, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jinja2, nvidia-cusolver-cu12, huggingface-hub, torch, tokenizers, transformers, accelerate\n",
|
128 |
+
"Successfully installed MarkupSafe-3.0.2 accelerate-1.3.0 certifi-2024.12.14 charset-normalizer-3.4.1 filelock-3.17.0 fsspec-2024.12.0 huggingface-hub-0.27.1 idna-3.10 jinja2-3.1.5 mpmath-1.3.0 networkx-3.4.2 numpy-2.2.2 nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nccl-cu12-2.21.5 nvidia-nvjitlink-cu12-12.4.127 nvidia-nvtx-cu12-12.4.127 pyyaml-6.0.2 regex-2024.11.6 requests-2.32.3 safetensors-0.5.2 sympy-1.13.1 tokenizers-0.21.0 torch-2.5.1 tqdm-4.67.1 transformers-4.48.1 triton-3.1.0 urllib3-2.3.0\n",
|
129 |
+
"Note: you may need to restart the kernel to use updated packages.\n"
|
130 |
+
]
|
131 |
+
}
|
132 |
+
],
|
133 |
+
"source": [
|
134 |
+
"pip install transformers torch accelerate"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": 2,
|
140 |
+
"metadata": {},
|
141 |
+
"outputs": [
|
142 |
+
{
|
143 |
+
"name": "stderr",
|
144 |
+
"output_type": "stream",
|
145 |
+
"text": [
|
146 |
+
"/home/mnktsts/repo/Fujitsu-LLM-KG/README/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
147 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
148 |
+
]
|
149 |
+
}
|
150 |
+
],
|
151 |
+
"source": [
|
152 |
+
"from kgllm_utils import Fujitsu_LLM_KG, extract_turtle, extract_answer"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"cell_type": "markdown",
|
157 |
+
"metadata": {},
|
158 |
+
"source": [
|
159 |
+
"## 共通事前学習済みLLM"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"cell_type": "code",
|
164 |
+
"execution_count": 3,
|
165 |
+
"metadata": {},
|
166 |
+
"outputs": [
|
167 |
+
{
|
168 |
+
"name": "stderr",
|
169 |
+
"output_type": "stream",
|
170 |
+
"text": [
|
171 |
+
"Loading checkpoint shards: 100%|██████████| 39/39 [00:35<00:00, 1.11it/s]\n"
|
172 |
+
]
|
173 |
+
}
|
174 |
+
],
|
175 |
+
"source": [
|
176 |
+
"kgllm = Fujitsu_LLM_KG(\"../../Fujitsu-LLM-KG-8x7B_cpt\")"
|
177 |
+
]
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"cell_type": "code",
|
181 |
+
"execution_count": 4,
|
182 |
+
"metadata": {},
|
183 |
+
"outputs": [
|
184 |
+
{
|
185 |
+
"name": "stdout",
|
186 |
+
"output_type": "stream",
|
187 |
+
"text": [
|
188 |
+
"\n",
|
189 |
+
"## Knowledge Graph\n",
|
190 |
+
"```turtle\n",
|
191 |
+
"#@rationale: 宗像聡は富士通に2010年から勤めています。\n",
|
192 |
+
"<#宗像聡>\n",
|
193 |
+
" rel:employer [\n",
|
194 |
+
" rdf:object <#富士通>;\n",
|
195 |
+
" rel:start_time <#2010>\n",
|
196 |
+
" ].\n",
|
197 |
+
"\n",
|
198 |
+
"#@rationale: 彼はFujitsu-LLM-KG-8x7B_cptを開発しました。\n",
|
199 |
+
"<#宗像聡>\n",
|
200 |
+
" rel:notable_work <#Fujitsu-LLM-KG-8x7B_cpt>.\n",
|
201 |
+
"```\n"
|
202 |
+
]
|
203 |
+
}
|
204 |
+
],
|
205 |
+
"source": [
|
206 |
+
"prompt = \"\"\"\n",
|
207 |
+
"[INST]\n",
|
208 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
209 |
+
"\n",
|
210 |
+
"## Source\n",
|
211 |
+
"```txt\n",
|
212 |
+
"宗像聡は富士通に2010年から勤めています。\n",
|
213 |
+
"彼はFujitsu-LLM-KG-8x7B_cptを開発しました。\n",
|
214 |
+
"```\n",
|
215 |
+
"\n",
|
216 |
+
"## Strategy\n",
|
217 |
+
"Extract all verifiable facts in \"Source\" as knowledge triples.\n",
|
218 |
+
"[/INST]\n",
|
219 |
+
"\"\"\".strip()\n",
|
220 |
+
"\n",
|
221 |
+
"generated = kgllm.generate(prompt)\n",
|
222 |
+
"print(generated)"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"cell_type": "code",
|
227 |
+
"execution_count": 5,
|
228 |
+
"metadata": {},
|
229 |
+
"outputs": [
|
230 |
+
{
|
231 |
+
"name": "stdout",
|
232 |
+
"output_type": "stream",
|
233 |
+
"text": [
|
234 |
+
"\n",
|
235 |
+
"## Text\n",
|
236 |
+
"```txt\n",
|
237 |
+
"Satoshi Munakata, who started working for Fujitsu in 2010, is the author of the notable work Fujitsu-LLM-KG-8x7B cpt.\n",
|
238 |
+
"```\n"
|
239 |
+
]
|
240 |
+
}
|
241 |
+
],
|
242 |
+
"source": [
|
243 |
+
"prompt = \"\"\"\n",
|
244 |
+
"[INST]\n",
|
245 |
+
"Generate \"Text\" to explain the given knowledge triples in \"Source\".\n",
|
246 |
+
"\n",
|
247 |
+
"## Source\n",
|
248 |
+
"```turtle\n",
|
249 |
+
"<#Satoshi Munakata>\n",
|
250 |
+
" rel:notable_work <#Fujitsu-LLM-KG-8x7B_cpt>;\n",
|
251 |
+
" rel:employer [\n",
|
252 |
+
" rdf:object <#Fujitsu>;\n",
|
253 |
+
" rel:start_time <#2010>\n",
|
254 |
+
" ].\n",
|
255 |
+
"```\n",
|
256 |
+
"\n",
|
257 |
+
"## Strategy\n",
|
258 |
+
"Explain the knowledge triples in \"Source\" without omission, but concisely and fluently.\n",
|
259 |
+
"[/INST]\n",
|
260 |
+
"\"\"\".strip()\n",
|
261 |
+
"\n",
|
262 |
+
"generated = kgllm.generate(prompt)\n",
|
263 |
+
"print(generated)"
|
264 |
+
]
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"cell_type": "markdown",
|
268 |
+
"metadata": {},
|
269 |
+
"source": [
|
270 |
+
"## ナレッジグラフ生成LLM 日本語版"
|
271 |
+
]
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"cell_type": "code",
|
275 |
+
"execution_count": 6,
|
276 |
+
"metadata": {},
|
277 |
+
"outputs": [
|
278 |
+
{
|
279 |
+
"name": "stderr",
|
280 |
+
"output_type": "stream",
|
281 |
+
"text": [
|
282 |
+
"Loading checkpoint shards: 100%|██████████| 39/39 [00:27<00:00, 1.41it/s]\n"
|
283 |
+
]
|
284 |
+
}
|
285 |
+
],
|
286 |
+
"source": [
|
287 |
+
"kgllm = Fujitsu_LLM_KG(\"../../Fujitsu-LLM-KG-8x7B_inst-gen_ja\")"
|
288 |
+
]
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"cell_type": "code",
|
292 |
+
"execution_count": 7,
|
293 |
+
"metadata": {},
|
294 |
+
"outputs": [
|
295 |
+
{
|
296 |
+
"name": "stdout",
|
297 |
+
"output_type": "stream",
|
298 |
+
"text": [
|
299 |
+
" ## Knowledge Graph\n",
|
300 |
+
"```turtle\n",
|
301 |
+
"#@rationale: s0. <#マリー・テレーズ・ドートリッシュ:Person[0]>(<#1638 年 9 月 10 日:Date[6]>日-<#1683 年 7月 30 日:Date[7]>)は、<#フランス:Location[1]>王<#ルイ 14 世:Person[2]>の王妃。\n",
|
302 |
+
"\n",
|
303 |
+
"<#ルイ 14 世:Person[2]>\n",
|
304 |
+
" rel:CountryOfCitizenship <#フランス:Location[1]>.\n",
|
305 |
+
"\n",
|
306 |
+
"#@rationale: s1. 父は<#スペイン:Location[3]>王<#フェリペ 4 世:Person[4]>、母は<#フランス:Location[1]>王<#アンリ 4 世:Person[8]>と<#マリー・ド・メディシス:Person[9]>の娘<#イサベル・デ・ボルボン:Person[5]>。\n",
|
307 |
+
"\n",
|
308 |
+
"<#フェリペ 4 世:Person[4]>\n",
|
309 |
+
" rel:CountryOfCitizenship <#スペイン:Location[3]>.\n",
|
310 |
+
"\n",
|
311 |
+
"<#アンリ 4 世:Person[8]>\n",
|
312 |
+
" rel:CountryOfCitizenship <#フランス:Location[1]>.\n",
|
313 |
+
"\n",
|
314 |
+
"<#イサベル・デ・ボルボン:Person[5]>\n",
|
315 |
+
" rel:CountryOfCitizenship <#フランス:Location[1]>.\n",
|
316 |
+
"\n",
|
317 |
+
"#@rationale: s2. <#スペイン:Location[3]>名は<#マリア・テレサ:Person[0]>。\n",
|
318 |
+
"\n",
|
319 |
+
"#@rationale: s3. <#ルイ 14 世:Person[2]>の父<#ルイ 13 世:Person[10]>は<#イサベル:Person[5]>の兄、母<#アンヌ・ドートリッシュ:Person[11]>は<#フェリペ 4 世:Person[4]>の姉であり、<#ルイ 14世:Person[2]>と<#マリー・テレーズ:Person[0]>とは父方・母方双方で従兄妹に当たる。\n",
|
320 |
+
"\n",
|
321 |
+
"<#ルイ 13 世:Person[10]>\n",
|
322 |
+
" rel:CountryOfCitizenship <#フランス:Location[1]>.\n",
|
323 |
+
"\n",
|
324 |
+
"<#アンヌ・ドートリッシュ:Person[11]>\n",
|
325 |
+
" rel:CountryOfCitizenship <#フランス:Location[1]>.\n",
|
326 |
+
"\n",
|
327 |
+
"#@rationale: s4. <#神聖ローマ:Location[12]>皇帝<#レオポルト 1 世:Person[13]>の皇后<#マルガリータ・テレサ:Person[14]>は異母妹、<#スペイン:Location[3]>・<#ハプスブルク家:Organization[15]>最後の王<#カルロス 2世:Person[16]>は異母弟である。\n",
|
328 |
+
"\n",
|
329 |
+
"<#カルロス 2世:Person[16]>\n",
|
330 |
+
" rel:CountryOfCitizenship <#スペイン:Location[3]>.\n",
|
331 |
+
"```\n"
|
332 |
+
]
|
333 |
+
}
|
334 |
+
],
|
335 |
+
"source": [
|
336 |
+
"\n",
|
337 |
+
"prompt = \"\"\"\n",
|
338 |
+
"[INST]\n",
|
339 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
340 |
+
"\n",
|
341 |
+
"## Source\n",
|
342 |
+
"```txt\n",
|
343 |
+
"s0. <#マリー・テレーズ・ドートリッシュ:Person[0]>(<#1638 年 9 月 10 日:Date[6]>日-<#1683 年 7月 30 日:Date[7]>)は、<#フランス:Location[1]>王<#ルイ 14 世:Person[2]>の王妃。\n",
|
344 |
+
"s1. 父は<#スペイン:Location[3]>王<#フェリペ 4 世:Person[4]>、母は<#フランス:Location[1]>王<#アンリ 4 世:Person[8]>と<#マリー・ド・メディシス:Person[9]>の娘<#イサベル・デ・ボルボン:Person[5]>。\n",
|
345 |
+
"s2. <#スペイン:Location[3]>名は<#マリア・テレサ:Person[0]>。\n",
|
346 |
+
"s3. <#ルイ 14 世:Person[2]>の父<#ルイ 13 世:Person[10]>は<#イサベル:Person[5]>の兄、母<#アンヌ・ドートリッシュ:Person[11]>は<#フェリペ 4 世:Person[4]>の姉であり、<#ルイ 14世:Person[2]>と<#マリー・テレーズ:Person[0]>とは父方・母方双方で従兄妹に当たる。\n",
|
347 |
+
"s4. <#神聖ローマ:Location[12]>皇帝<#レオポルト 1 世:Person[13]>の皇后<#マルガリータ・テレサ:Person[14]>は異母妹、<#スペイン:Location[3]>・<#ハプスブルク家:Organization[15]>最後の王<#カルロス 2世:Person[16]>は異母弟である。\n",
|
348 |
+
"```\n",
|
349 |
+
"## Strategy\n",
|
350 |
+
"Extract all verifiable facts about relation \"CountryOfCitizenship\" in \"Source\" as knowledge triples.\n",
|
351 |
+
"* Don't speculate, extract only the facts as stated in \"Source\" **WITHOUT OMMISION**. \n",
|
352 |
+
"The relation \"CountryOfCitizenship\" (P27) means \"the object is a country that recognizes the subject as its citizen\".\n",
|
353 |
+
"* The existence of a relation is restricted by the type of subject and object. You must select subjects and objects of triples from the bracketed eigenexpressions, such as \"<#name:type[id]>\", without modification.\n",
|
354 |
+
"[/INST]\n",
|
355 |
+
" \"\"\".strip()\n",
|
356 |
+
"\n",
|
357 |
+
"generated = kgllm.generate(prompt)\n",
|
358 |
+
"print(generated)"
|
359 |
+
]
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"cell_type": "markdown",
|
363 |
+
"metadata": {},
|
364 |
+
"source": [
|
365 |
+
"## ナレッジグラフ生成LLM 英語版"
|
366 |
+
]
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"cell_type": "code",
|
370 |
+
"execution_count": 8,
|
371 |
+
"metadata": {},
|
372 |
+
"outputs": [
|
373 |
+
{
|
374 |
+
"name": "stderr",
|
375 |
+
"output_type": "stream",
|
376 |
+
"text": [
|
377 |
+
"Loading checkpoint shards: 100%|██████████| 39/39 [00:27<00:00, 1.42it/s]\n"
|
378 |
+
]
|
379 |
+
}
|
380 |
+
],
|
381 |
+
"source": [
|
382 |
+
"kgllm = Fujitsu_LLM_KG(\"../../Fujitsu-LLM-KG-8x7B_inst-gen_en\")"
|
383 |
+
]
|
384 |
+
},
|
385 |
+
{
|
386 |
+
"cell_type": "code",
|
387 |
+
"execution_count": 9,
|
388 |
+
"metadata": {},
|
389 |
+
"outputs": [
|
390 |
+
{
|
391 |
+
"name": "stdout",
|
392 |
+
"output_type": "stream",
|
393 |
+
"text": [
|
394 |
+
" ## Knowledge Graph\n",
|
395 |
+
"```turtle\n",
|
396 |
+
"#@rationale: s0. <#John Gregory Myre:Person[0]> is an <#American:Location[1]> journalist and <#NPR:Organization[2]>'s digital editor for international news.\n",
|
397 |
+
"\n",
|
398 |
+
"<#John Gregory Myre:Person[0]>\n",
|
399 |
+
" rel:country_of_citizenship <#American:Location[1]>.\n",
|
400 |
+
"\n",
|
401 |
+
"#@rationale: s1. Previously, he was a foreign correspondent for The <#Associated Press:Organization[3]> and The <#New York Times:Organization[4]> for <#20 years:Time[5]>.\n",
|
402 |
+
"\n",
|
403 |
+
"#@rationale: s2. He reported from <#Jerusalem:Location[6]> from <#2003:Time[7]> to <#2007:Time[8]> for The <#New York Times:Organization[4]>.\n",
|
404 |
+
"\n",
|
405 |
+
"#@rationale: s3. With the <#AP:Organization[3]>, <#Myre:Person[0]> was based in <#Jerusalem:Location[6]>, <#Moscow:Location[9]>, <#Nicosia:Location[10]>, <#Islamabad:Location[11]> and <#Johannesburg:Location[12]>.\n",
|
406 |
+
"\n",
|
407 |
+
"#@rationale: s4. He covered many major international stories, including <#Nelson Mandela:Person[13]>'s release from prison in <#1990:Time[14]>, the <#Gulf War:Miscellaneous[15]> of <#1991:Time[16]>, the rise of the <#Taliban:Organization[17]>, the early days of <#Vladimir Putin:Person[18]> as <#Russia:Location[19]>'s leader, the <#Israeli:Location[20]>-<#Palestinian:Location[21]> fighting that erupted in <#2000:Time[22]> and the <#U.S.:Location[23]> war in <#Afghanistan:Location[24]>.\n",
|
408 |
+
"\n",
|
409 |
+
"<#Vladimir Putin:Person[18]>\n",
|
410 |
+
" rel:country_of_citizenship <#Russia:Location[19]>.\n",
|
411 |
+
"\n",
|
412 |
+
"#@rationale: s5. He is also a scholar at the <#Middle East Institute:Organization[25]> in <#Washington:Location[26]>.\n",
|
413 |
+
"```\n"
|
414 |
+
]
|
415 |
+
}
|
416 |
+
],
|
417 |
+
"source": [
|
418 |
+
"prompt = \"\"\"\n",
|
419 |
+
"[INST]\n",
|
420 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
421 |
+
" \n",
|
422 |
+
"## Source\n",
|
423 |
+
"```txt\n",
|
424 |
+
"s0. <#John Gregory Myre:Person[0]> is an <#American:Location[1]> journalist and <#NPR:Organization[2]>'s digital editor for international news.\n",
|
425 |
+
"s1. Previously, he was a foreign correspondent for The <#Associated Press:Organization[3]> and The <#New York Times:Organization[4]> for <#20 years:Time[5]>.\n",
|
426 |
+
"s2. He reported from <#Jerusalem:Location[6]> from <#2003:Time[7]> to <#2007:Time[8]> for The <#New York Times:Organization[4]>.\n",
|
427 |
+
"s3. With the <#AP:Organization[3]>, <#Myre:Person[0]> was based in <#Jerusalem:Location[6]>, <#Moscow:Location[9]>, <#Nicosia:Location[10]>, <#Islamabad:Location[11]> and <#Johannesburg:Location[12]>.\n",
|
428 |
+
"s4. He covered many major international stories, including <#Nelson Mandela:Person[13]>'s release from prison in <#1990:Time[14]>, the <#Gulf War:Miscellaneous[15]> of <#1991:Time[16]>, the rise of the <#Taliban:Organization[17]>, the early days of <#Vladimir Putin:Person[18]> as <#Russia:Location[19]>'s leader, the <#Israeli:Location[20]>-<#Palestinian:Location[21]> fighting that erupted in <#2000:Time[22]> and the <#U.S.:Location[23]> war in <#Afghanistan:Location[24]>.\n",
|
429 |
+
"s5. He is also a scholar at the <#Middle East Institute:Organization[25]> in <#Washington:Location[26]>.\n",
|
430 |
+
" ```\n",
|
431 |
+
" \n",
|
432 |
+
"## Strategy\n",
|
433 |
+
"Extract all verifiable facts about relation \"country_of_citizenship\" in \"Source\" as knowledge triples.\n",
|
434 |
+
"* Don't speculate, extract only the facts as stated in \"Source\" **WITHOUT OMMISION**.\n",
|
435 |
+
"The relation \"country_of_citizenship\" (P27) means \"the object is a country that recognizes the subject as its citizen\".\n",
|
436 |
+
"* The existence of a relation is restricted by the type of subject and object.\n",
|
437 |
+
"You must select subjects and objects of triples from the bracketed eigenexpressions, such as \"<#name:type[id]>\", without modification.\n",
|
438 |
+
"[/INST]\n",
|
439 |
+
"\"\"\".strip()\n",
|
440 |
+
"\n",
|
441 |
+
"generated = kgllm.generate(prompt)\n",
|
442 |
+
"print(generated)"
|
443 |
+
]
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"cell_type": "markdown",
|
447 |
+
"metadata": {},
|
448 |
+
"source": [
|
449 |
+
"## ナレッジグラフ推論LLM ver.2"
|
450 |
+
]
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"cell_type": "code",
|
454 |
+
"execution_count": 5,
|
455 |
+
"metadata": {},
|
456 |
+
"outputs": [
|
457 |
+
{
|
458 |
+
"name": "stderr",
|
459 |
+
"output_type": "stream",
|
460 |
+
"text": [
|
461 |
+
"Loading checkpoint shards: 100%|██████████| 39/39 [00:50<00:00, 1.31s/it]\n"
|
462 |
+
]
|
463 |
+
}
|
464 |
+
],
|
465 |
+
"source": [
|
466 |
+
"kgllm = Fujitsu_LLM_KG(\"../../Fujitsu-LLM-KG-8x7B_inst-infer_v2\")"
|
467 |
+
]
|
468 |
+
},
|
469 |
+
{
|
470 |
+
"cell_type": "code",
|
471 |
+
"execution_count": 13,
|
472 |
+
"metadata": {},
|
473 |
+
"outputs": [
|
474 |
+
{
|
475 |
+
"name": "stdout",
|
476 |
+
"output_type": "stream",
|
477 |
+
"text": [
|
478 |
+
" ## Knowledge Graph\n",
|
479 |
+
"```turtle\n",
|
480 |
+
"<#お正月>\n",
|
481 |
+
" rel:has_duration <?duration_of_new_year>.\n",
|
482 |
+
"<#お盆>\n",
|
483 |
+
" rel:has_duration <?duration_of_obon>.\n",
|
484 |
+
"<#Answer>\n",
|
485 |
+
" rel:is <#お正月>;\n",
|
486 |
+
" rel:when [\n",
|
487 |
+
" <?duration_of_new_year>\n",
|
488 |
+
" rel:greater_than <?duration_of_obon>.\n",
|
489 |
+
" ];\n",
|
490 |
+
" rel:else <#お盆>.\n",
|
491 |
+
"```\n"
|
492 |
+
]
|
493 |
+
}
|
494 |
+
],
|
495 |
+
"source": [
|
496 |
+
"question = \"お正月とお盆とでは、どちらがより長期間の休日ですか?\"\n",
|
497 |
+
"\n",
|
498 |
+
"prompt_schema = f\"\"\"\n",
|
499 |
+
"[INST]\n",
|
500 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
501 |
+
"\n",
|
502 |
+
"## Source\n",
|
503 |
+
"```txt\n",
|
504 |
+
"{question}\n",
|
505 |
+
"```\n",
|
506 |
+
" \n",
|
507 |
+
"## Strategy\n",
|
508 |
+
"Extract graph schema needed to answer the question in above \"Source\" as knowledge triples without omission.\n",
|
509 |
+
"[/INST]\n",
|
510 |
+
"\"\"\".strip()\n",
|
511 |
+
"\n",
|
512 |
+
"generated_schema = kgllm.generate(prompt_schema)\n",
|
513 |
+
"print(generated_schema)"
|
514 |
+
]
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"cell_type": "code",
|
518 |
+
"execution_count": 14,
|
519 |
+
"metadata": {},
|
520 |
+
"outputs": [
|
521 |
+
{
|
522 |
+
"name": "stdout",
|
523 |
+
"output_type": "stream",
|
524 |
+
"text": [
|
525 |
+
" ## Knowledge Graph\n",
|
526 |
+
"```turtle\n",
|
527 |
+
"<#お正月>\n",
|
528 |
+
" #@rationale: Title: 第20条「休日」 - ③年末年始(1月1日~1月3日)\n",
|
529 |
+
" rel:has_duration <#3>;\n",
|
530 |
+
"<#お盆>\n",
|
531 |
+
" #@rationale: Title: 第20条「休日」 - ④夏季休日(8月13日~8月16日)\n",
|
532 |
+
" rel:has_duration <#4>.\n",
|
533 |
+
"<#Answer>\n",
|
534 |
+
" rel:is <#お正月>;\n",
|
535 |
+
" rel:when [\n",
|
536 |
+
" <#3>\n",
|
537 |
+
" rel:greater_than <#4>.\n",
|
538 |
+
" ];\n",
|
539 |
+
" rel:else <#お盆>.\n",
|
540 |
+
"```\n"
|
541 |
+
]
|
542 |
+
}
|
543 |
+
],
|
544 |
+
"source": [
|
545 |
+
"prompt_kg = f\"\"\"\n",
|
546 |
+
"[INST]\n",
|
547 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
548 |
+
"\n",
|
549 |
+
"## Source\n",
|
550 |
+
"```txt\n",
|
551 |
+
"Title: 第19条「労働時間及び休憩時間」\n",
|
552 |
+
"\n",
|
553 |
+
"1 労働者代表と1年単位の変形労働時間制に関する労使協定を締結した場合、当該協定の適用を受ける労働者について、1週間の所定労働時間は、対象期間を平均して1週間当たり40時間とする。\n",
|
554 |
+
"2 1年単位の変形労働時間制を適用しない労働者について、1週間の所定労働時間は40時間、1日の所定労働時間は8時間とする。\n",
|
555 |
+
"\n",
|
556 |
+
"----\n",
|
557 |
+
"Title: 第20条「休日」\n",
|
558 |
+
"\n",
|
559 |
+
"1 1年単位の変形労働時間制の適用を受ける労働者の休日については、1年単位の変形労働時間制に関する労使協定の定めるところにより、対象期間の初日を起算日とする1週間ごとに1日以上、1年間に125日以上となるように指定する。その場合、年間休日カレンダーに定め、対象期間の初日の30日前までに各労働者に通知する。\n",
|
560 |
+
"2 1年単位の変形労働時間制を適用しない労働者の休日については、以下のとおり指定し、月間休日カレンダーに定め、対象期間の初日の30日前までに各労働者に通知する。\n",
|
561 |
+
" ①日曜日(前条第3号の特定期間を除く。)\n",
|
562 |
+
" ②国民の祝日(日曜日と重なったときは翌日)\n",
|
563 |
+
" ③年末年始(1月1日~1月3日)\n",
|
564 |
+
" ④夏季休日(8月13日~8月16日)\n",
|
565 |
+
" ⑤その他会社が指定する日\n",
|
566 |
+
"\n",
|
567 |
+
"----\n",
|
568 |
+
"Title: 第21条「時間外及び休日労働等」\n",
|
569 |
+
"\n",
|
570 |
+
"1 業務の都合により、第19条の所定労働時間を超え、又は第20条の所定休日に労働させることがある。\n",
|
571 |
+
"2 前項の場合、法定労働時間を超える労働又は法定休日における労働については、あらかじめ会社は労働者の過半数代表者と書面による労使協定を締結するとともに、これを所轄の労働基準監督署長に届け出るものとする。\n",
|
572 |
+
"3 妊娠中の女性、産後1年を経過しない女性労働者(以下「妊産婦」という)であって請求した者及び18歳未満の者については、第2項による時間外労働又は休日若しくは深夜(午後10時から午前5時まで)労働に従事させない。\n",
|
573 |
+
"4 災害その他避けることのできない事由によって臨時の必要がある場合には、第1項から前項までの制限を超えて、所定労働時間外又は休日に労働させることがある。ただし、この場合であっても、請求のあった妊産婦については、所定労働時間外労働又は休日労働に従事させない。\n",
|
574 |
+
"\n",
|
575 |
+
"----\n",
|
576 |
+
"Title: 第26条「育児時間」\n",
|
577 |
+
"\n",
|
578 |
+
"1 1歳に満たない子を養育する女性労働者から請求があったときは、休憩時間のほか1日について2回、1回について30分の育児時間を与える。\n",
|
579 |
+
"```\n",
|
580 |
+
" \n",
|
581 |
+
"## Strategy\n",
|
582 |
+
"Extract all verifiable facts in above \"Source\", that match the following graph schema, as knowledge triples.\n",
|
583 |
+
"```turtle\n",
|
584 |
+
"{extract_turtle(generated_schema)}\n",
|
585 |
+
"```\n",
|
586 |
+
"The extracted facts are needed to answer the question \"{question}\".\n",
|
587 |
+
"However, if no useful facts are found in the above \"Source\", do not output any triples.\n",
|
588 |
+
"[/INST]\n",
|
589 |
+
"\"\"\".strip()\n",
|
590 |
+
"\n",
|
591 |
+
"generated_kg = kgllm.generate(prompt_kg)\n",
|
592 |
+
"print(generated_kg)"
|
593 |
+
]
|
594 |
+
},
|
595 |
+
{
|
596 |
+
"cell_type": "code",
|
597 |
+
"execution_count": 15,
|
598 |
+
"metadata": {},
|
599 |
+
"outputs": [
|
600 |
+
{
|
601 |
+
"name": "stdout",
|
602 |
+
"output_type": "stream",
|
603 |
+
"text": [
|
604 |
+
" ## Explore Path\n",
|
605 |
+
"```path\n",
|
606 |
+
"お正月は3日間の休日です.\n",
|
607 |
+
"お盆は4日間の休日です.\n",
|
608 |
+
"Answer is 4 because 4 is greater than 3.\n",
|
609 |
+
"```\n",
|
610 |
+
"\n",
|
611 |
+
"## Answer\n",
|
612 |
+
"```txt\n",
|
613 |
+
"お盆\n",
|
614 |
+
"```\n"
|
615 |
+
]
|
616 |
+
}
|
617 |
+
],
|
618 |
+
"source": [
|
619 |
+
"prompt_answer = f\"\"\"\n",
|
620 |
+
"[INST]\n",
|
621 |
+
"Explore \"Knowledge Graph\" entity-to-entity then finally answer \"Question\".\n",
|
622 |
+
"\n",
|
623 |
+
"## Knowledge Graph\n",
|
624 |
+
"```turtle\n",
|
625 |
+
"{extract_turtle(generated_kg)}\n",
|
626 |
+
"```\n",
|
627 |
+
"\n",
|
628 |
+
"## Question\n",
|
629 |
+
"{question}\n",
|
630 |
+
"\n",
|
631 |
+
"## Strategy\n",
|
632 |
+
"Answer briefly in one line.\n",
|
633 |
+
"[/INST]\n",
|
634 |
+
"\"\"\".strip()\n",
|
635 |
+
"\n",
|
636 |
+
"generated_answer = kgllm.generate(prompt_answer)\n",
|
637 |
+
"print(generated_answer)"
|
638 |
+
]
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"cell_type": "code",
|
642 |
+
"execution_count": 17,
|
643 |
+
"metadata": {},
|
644 |
+
"outputs": [
|
645 |
+
{
|
646 |
+
"name": "stdout",
|
647 |
+
"output_type": "stream",
|
648 |
+
"text": [
|
649 |
+
"お盆\n"
|
650 |
+
]
|
651 |
+
}
|
652 |
+
],
|
653 |
+
"source": [
|
654 |
+
"final_answer = extract_answer(generated_answer)[0]\n",
|
655 |
+
"print(final_answer)"
|
656 |
+
]
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"cell_type": "markdown",
|
660 |
+
"metadata": {},
|
661 |
+
"source": [
|
662 |
+
"## ナレッジグラフ推論LLM ver.1"
|
663 |
+
]
|
664 |
+
},
|
665 |
+
{
|
666 |
+
"cell_type": "code",
|
667 |
+
"execution_count": 21,
|
668 |
+
"metadata": {},
|
669 |
+
"outputs": [
|
670 |
+
{
|
671 |
+
"name": "stderr",
|
672 |
+
"output_type": "stream",
|
673 |
+
"text": [
|
674 |
+
"Loading checkpoint shards: 100%|██████████| 39/39 [00:51<00:00, 1.32s/it]\n"
|
675 |
+
]
|
676 |
+
}
|
677 |
+
],
|
678 |
+
"source": [
|
679 |
+
"kgllm = Fujitsu_LLM_KG(\"../../Fujitsu-LLM-KG-8x7B_inst-infer_v1\")"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"cell_type": "code",
|
684 |
+
"execution_count": 25,
|
685 |
+
"metadata": {},
|
686 |
+
"outputs": [
|
687 |
+
{
|
688 |
+
"name": "stdout",
|
689 |
+
"output_type": "stream",
|
690 |
+
"text": [
|
691 |
+
" ## Knowledge Graph\n",
|
692 |
+
"```turtle\n",
|
693 |
+
"<#長岡京>\n",
|
694 |
+
" rel:遷都される前の都 <#?>.\n",
|
695 |
+
"\n",
|
696 |
+
"<#?>\n",
|
697 |
+
" rel:現在の県 <#?>;\n",
|
698 |
+
" rel:現在の市 <#?>.\n",
|
699 |
+
"```\n"
|
700 |
+
]
|
701 |
+
}
|
702 |
+
],
|
703 |
+
"source": [
|
704 |
+
"question = \"長岡京に遷都される前の都の所在地は現在の何県の何市にあたるでしょうか?\"\n",
|
705 |
+
"\n",
|
706 |
+
"prompt_schema = f\"\"\"\n",
|
707 |
+
"[INST]\n",
|
708 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
709 |
+
"\n",
|
710 |
+
"## Source\n",
|
711 |
+
"```txt\n",
|
712 |
+
"{question}\n",
|
713 |
+
"```\n",
|
714 |
+
"\n",
|
715 |
+
"## Strategy\n",
|
716 |
+
"Extract graph schema needed to answer the question in above \"Source\" as knowledge triples without omission.\n",
|
717 |
+
"[/INST]\n",
|
718 |
+
"\"\"\"\n",
|
719 |
+
"\n",
|
720 |
+
"generated_schema = kgllm.generate(prompt_schema)\n",
|
721 |
+
"print(generated_schema)"
|
722 |
+
]
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"cell_type": "code",
|
726 |
+
"execution_count": 26,
|
727 |
+
"metadata": {},
|
728 |
+
"outputs": [
|
729 |
+
{
|
730 |
+
"name": "stdout",
|
731 |
+
"output_type": "stream",
|
732 |
+
"text": [
|
733 |
+
" ## Knowledge Graph\n",
|
734 |
+
"```turtle\n",
|
735 |
+
"<#長岡京>\n",
|
736 |
+
" rel:遷都される前の都 <#平城京>.\n",
|
737 |
+
"\n",
|
738 |
+
"<#平城京>\n",
|
739 |
+
" rel:現在の県 <#奈良県>;\n",
|
740 |
+
" rel:現在の市 <#奈良市>.\n",
|
741 |
+
"```\n"
|
742 |
+
]
|
743 |
+
}
|
744 |
+
],
|
745 |
+
"source": [
|
746 |
+
"prompt_kg = f\"\"\"\n",
|
747 |
+
"[INST]\n",
|
748 |
+
"Generate \"Knowledge Graph\" in RDF Turtle format based on the given \"Source\".\n",
|
749 |
+
" \n",
|
750 |
+
"## Source\n",
|
751 |
+
"```txt\n",
|
752 |
+
"平安京\n",
|
753 |
+
"\n",
|
754 |
+
"平安京(へいあんきょう/たいらのみやこ)または平安城(へいあんじょう)は、日本における古代最後の宮都。794年(延暦13年)から1869年(明治2年)までの日本の首都。\n",
|
755 |
+
"桓武天皇により、長岡京に代わる都として山背国(山城国)愛宕・葛野の両郡にまたがる地が選ばれ、中国の洛陽城や長安城を模して793年(延暦12年)から建設された。翌794年(延暦13年)に遷都。北部中央に宮城・平安宮(大内裏)が建設され、以降歴代の皇居が置かれた。\n",
|
756 |
+
"遷都以来、平清盛により断行された福原遷都(1180年)の期間を除いて、東京奠都まで1100年近くに亘って都として機能し、1869年(明治2年)まで続いた。今日の京都市街が形成されるに至る。\n",
|
757 |
+
"----\n",
|
758 |
+
"平城京\n",
|
759 |
+
"\n",
|
760 |
+
"平城京(へいじょうきょう/へいぜいきょう/ならのみやこ)は、奈良時代の日本の首都。710年に藤原京から遷都するにあたり、唐の都長安城を模倣して大和国に建造された都城。現在の奈良県奈良市、大和郡山市に存在する。\n",
|
761 |
+
"中央北域に宮城・平城宮(大内裏)を置き、東西8坊 (約 4.3 km) の面積をもち、中央を南北に走る朱雀大路によって左京・右京に二分され、さらに南北・東西を大路・小路によって碁盤の目のように整然と区画され、全域が72坊に区画設定されていた。\n",
|
762 |
+
"大阪湾や淡路島(八島のひとつ)にも近い奈良盆地(奈良県奈良市の西部の一部、中心部及び大和郡山市北部)には、5世紀中頃にはすでに天皇陵である佐紀盾列古墳群が作られ、またのちには大神神社、7世紀には興福寺も建立されているが、京となった8世紀には、東大寺や巨大な仏像である東大寺盧舎那仏像、法華寺などが建立された。本州の政治・文化の中心地となるに至って外京(げきょう)に位置した門前町が、今に続く奈良の町を形成する中心となった。\n",
|
763 |
+
"----\n",
|
764 |
+
"長岡京\n",
|
765 |
+
"\n",
|
766 |
+
"長岡京(ながおかきょう)は、山城国乙訓郡にあった奈良時代末期(または平安時代初期)の都城(現在の京都府向日市、長岡京市、京都市西京区)。宮域跡は向日市鶏冠井町(かいでちょう)に位置し、「長岡宮跡」として国の史跡に指定されている。\n",
|
767 |
+
"延暦3年(784年)11月11日、第50代桓武天皇により平城京から遷都され、延暦13年(794年)10月22日に平安京に遷都されるまで機能した。\n",
|
768 |
+
"```\n",
|
769 |
+
"\n",
|
770 |
+
"## Strategy\n",
|
771 |
+
"Extract all verifiable facts in above \"Source\", that match the following graph schema, as knowledge triples without omission.\n",
|
772 |
+
"```turtle\n",
|
773 |
+
"{extract_turtle(generated_schema)}\n",
|
774 |
+
"```\n",
|
775 |
+
"The extracted facts are needed to answer the question \"{question}\".\n",
|
776 |
+
"[/INST]\n",
|
777 |
+
"\"\"\"\n",
|
778 |
+
"\n",
|
779 |
+
"generated_kg = kgllm.generate(prompt_kg)\n",
|
780 |
+
"print(generated_kg)"
|
781 |
+
]
|
782 |
+
},
|
783 |
+
{
|
784 |
+
"cell_type": "code",
|
785 |
+
"execution_count": 27,
|
786 |
+
"metadata": {},
|
787 |
+
"outputs": [
|
788 |
+
{
|
789 |
+
"name": "stdout",
|
790 |
+
"output_type": "stream",
|
791 |
+
"text": [
|
792 |
+
" ## Explore Path\n",
|
793 |
+
"```path\n",
|
794 |
+
"長岡京 → rel:遷都される前の都 → 平城京\n",
|
795 |
+
"平城京 → rel:現在の県 → 奈良県\n",
|
796 |
+
"平城京 → rel:現在の市 → 奈良市\n",
|
797 |
+
"```\n",
|
798 |
+
"\n",
|
799 |
+
"## Answer\n",
|
800 |
+
"```txt\n",
|
801 |
+
"奈良県奈良市\n",
|
802 |
+
"```\n"
|
803 |
+
]
|
804 |
+
}
|
805 |
+
],
|
806 |
+
"source": [
|
807 |
+
"prompt_answer = f\"\"\"\n",
|
808 |
+
"[INST]\n",
|
809 |
+
"Explore \"Knowledge Graph\" entity-to-entity then finally answer \"Question\".\n",
|
810 |
+
"\n",
|
811 |
+
"## Knowledge Graph\n",
|
812 |
+
"```turtle\n",
|
813 |
+
"{extract_turtle(generated_kg)}\n",
|
814 |
+
"```\n",
|
815 |
+
" \n",
|
816 |
+
"## Question\n",
|
817 |
+
"{question}\n",
|
818 |
+
" \n",
|
819 |
+
"## Strategy\n",
|
820 |
+
"Answer briefly in one line.\n",
|
821 |
+
"[/INST]\n",
|
822 |
+
"\"\"\"\n",
|
823 |
+
"\n",
|
824 |
+
"generated_answer = kgllm.generate(prompt_answer)\n",
|
825 |
+
"print(generated_answer)"
|
826 |
+
]
|
827 |
+
}
|
828 |
+
],
|
829 |
+
"metadata": {
|
830 |
+
"kernelspec": {
|
831 |
+
"display_name": ".venv",
|
832 |
+
"language": "python",
|
833 |
+
"name": "python3"
|
834 |
+
},
|
835 |
+
"language_info": {
|
836 |
+
"codemirror_mode": {
|
837 |
+
"name": "ipython",
|
838 |
+
"version": 3
|
839 |
+
},
|
840 |
+
"file_extension": ".py",
|
841 |
+
"mimetype": "text/x-python",
|
842 |
+
"name": "python",
|
843 |
+
"nbconvert_exporter": "python",
|
844 |
+
"pygments_lexer": "ipython3",
|
845 |
+
"version": "3.10.12"
|
846 |
+
}
|
847 |
+
},
|
848 |
+
"nbformat": 4,
|
849 |
+
"nbformat_minor": 2
|
850 |
+
}
|