eubinecto commited on
Commit
322e083
·
1 Parent(s): d3d3e90

[#1] fetch_alpha implemented

Browse files
explore/explore_fetch_alpha.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from idiomify.fetchers import fetch_alpha
2
+
3
+
4
+ def main():
5
+ model = fetch_alpha("overfit")
6
+ print(model.bart.config)
7
+
8
+
9
+ if __name__ == '__main__':
10
+ main()
explore/explore_fetch_alpha_predict.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BartTokenizer
2
+ from builders import SourcesBuilder
3
+ from fetchers import fetch_alpha
4
+
5
+
6
+ def main():
7
+ model = fetch_alpha("overfit")
8
+ tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
9
+ lit2idi = [
10
+ ("my man", ""),
11
+ ("hello", "")
12
+ ] # just some dummy stuff
13
+ srcs = SourcesBuilder(tokenizer)(lit2idi)
14
+ out = model.predict(srcs=srcs)
15
+ print(out)
16
+
17
+
18
+ if __name__ == '__main__':
19
+ main()
explore/explore_fetch_epie.py DELETED
@@ -1,27 +0,0 @@
1
-
2
- from idiomify.fetchers import fetch_epie
3
-
4
-
5
- def main():
6
- epie = fetch_epie()
7
- idioms = set([
8
- idiom
9
- for idiom, _, _ in epie
10
- ])
11
-
12
- # so, what do you want? you want to build an idiom-masked language modeling?
13
- for idiom, context, tag in epie:
14
- print(idiom, context)
15
-
16
- for idx, idiom in enumerate(idioms):
17
- print(idx, idiom)
18
-
19
- # isn't it better to just leave the idiom there, and have it guess what meaning it has?
20
- # in that case, It may be better to use a generative model?
21
- # but what would happen if you let it... just guess it?
22
- # the problem with non-masking is that ... you give the model the answer.
23
- # what you should rather do is... do something like... find similar words.
24
-
25
-
26
- if __name__ == '__main__':
27
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
idiomify/fetchers.py CHANGED
@@ -5,7 +5,7 @@ import wandb
5
  import requests
6
  from typing import Tuple, List
7
  from wandb.sdk.wandb_run import Run
8
- from idiomify.paths import CONFIG_YAML, idioms_dir, literal2idiomatic
9
  from idiomify.urls import (
10
  EPIE_IMMUTABLE_IDIOMS_URL,
11
  EPIE_IMMUTABLE_IDIOMS_CONTEXTS_URL,
@@ -15,9 +15,10 @@ from idiomify.urls import (
15
  EPIE_MUTABLE_IDIOMS_TAGS_URL,
16
  PIE_URL
17
  )
 
 
18
 
19
 
20
- # sources for dataset
21
  def fetch_epie(ver: str) -> List[Tuple[str, str, str]]:
22
  """
23
  It fetches the EPIE idioms, contexts, and tags from the web
@@ -85,6 +86,20 @@ def fetch_literal2idiomatic(ver: str, run: Run = None) -> List[Tuple[str, str]]:
85
  return [(row[0], row[1]) for row in reader]
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def fetch_config() -> dict:
89
  with open(str(CONFIG_YAML), 'r', encoding="utf-8") as fh:
90
  return yaml.safe_load(fh)
 
5
  import requests
6
  from typing import Tuple, List
7
  from wandb.sdk.wandb_run import Run
8
+ from idiomify.paths import CONFIG_YAML, idioms_dir, literal2idiomatic, alpha_dir
9
  from idiomify.urls import (
10
  EPIE_IMMUTABLE_IDIOMS_URL,
11
  EPIE_IMMUTABLE_IDIOMS_CONTEXTS_URL,
 
15
  EPIE_MUTABLE_IDIOMS_TAGS_URL,
16
  PIE_URL
17
  )
18
+ from transformers import AutoModelForSeq2SeqLM, AutoConfig
19
+ from models import Alpha
20
 
21
 
 
22
  def fetch_epie(ver: str) -> List[Tuple[str, str, str]]:
23
  """
24
  It fetches the EPIE idioms, contexts, and tags from the web
 
86
  return [(row[0], row[1]) for row in reader]
87
 
88
 
89
+ def fetch_alpha(ver: str, run: Run = None) -> Alpha:
90
+ if run:
91
+ artifact = run.use_artifact(f"alpha:{ver}", type="model")
92
+ else:
93
+ artifact = wandb.Api().artifact(f"eubinecto/idiomify/alpha:{ver}", type="model")
94
+ config = artifact.metadata
95
+ artifact_dir = artifact.download(root=alpha_dir(ver))
96
+ ckpt_path = path.join(artifact_dir, "model.ckpt")
97
+ bart = AutoModelForSeq2SeqLM.from_config(AutoConfig.from_pretrained(config['bart']))
98
+ with open(ckpt_path, 'r') as fh:
99
+ alpha = Alpha.load_from_checkpoint(ckpt_path, bart=bart)
100
+ return alpha
101
+
102
+
103
  def fetch_config() -> dict:
104
  with open(str(CONFIG_YAML), 'r', encoding="utf-8") as fh:
105
  return yaml.safe_load(fh)
idiomify/paths.py CHANGED
@@ -15,7 +15,3 @@ def literal2idiomatic(ver: str) -> Path:
15
 
16
  def alpha_dir(ver: str) -> Path:
17
  return ARTIFACTS_DIR / f"alpha_{ver}"
18
-
19
-
20
- def gamma_dir(ver: str) -> Path:
21
- return ARTIFACTS_DIR / f"beta_{ver}"
 
15
 
16
  def alpha_dir(ver: str) -> Path:
17
  return ARTIFACTS_DIR / f"alpha_{ver}"