alessandro trinca tornidor
commited on
Commit
·
498803f
1
Parent(s):
0109857
feat: add a default download NLTK_DATA folder
Browse files- .gitignore +1 -0
- my_ghost_writer/constants.py +5 -1
- my_ghost_writer/text_parsers2.py +5 -4
.gitignore
CHANGED
|
@@ -670,3 +670,4 @@ $RECYCLE.BIN/
|
|
| 670 |
tmp/*
|
| 671 |
lite.koboldai.net
|
| 672 |
lite.koboldai.net/*
|
|
|
|
|
|
| 670 |
tmp/*
|
| 671 |
lite.koboldai.net
|
| 672 |
lite.koboldai.net/*
|
| 673 |
+
nltk_data/*
|
my_ghost_writer/constants.py
CHANGED
|
@@ -20,6 +20,7 @@ IS_TESTING = bool(os.getenv('IS_TESTING', ""))
|
|
| 20 |
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
| 21 |
API_MODE = bool(os.getenv("API_MODE", ""))
|
| 22 |
N_WORDS_GRAM = int(os.getenv("N_WORDS_GRAM", 2))
|
|
|
|
| 23 |
WORDNET_LANGUAGES=(os.getenv("WORDNET_LANGUAGES", "eng,"))
|
| 24 |
SPACY_MODEL_NAME=os.getenv("SPACY_MODEL_NAME", "en_core_web_sm")
|
| 25 |
WORDSAPI_KEY = os.getenv("WORDSAPI_KEY")
|
|
@@ -30,7 +31,10 @@ ME_CONFIG_MONGODB_URL_LOCAL = "mongodb://localhost:27017"
|
|
| 30 |
ME_CONFIG_MONGODB_URL = os.getenv("ME_CONFIG_MONGODB_URL", ME_CONFIG_MONGODB_URL_LOCAL)
|
| 31 |
ME_CONFIG_MONGODB_TIMEOUT_LOCAL = int(os.getenv("ME_CONFIG_MONGODB_TIMEOUT_LOCAL", 200))
|
| 32 |
ME_CONFIG_MONGODB_TIMEOUT_REMOTE = int(os.getenv("ME_CONFIG_MONGODB_TIMEOUT_REMOTE", 3000))
|
| 33 |
-
ME_CONFIG_MONGODB_TIMEOUT = int(os.getenv(
|
|
|
|
|
|
|
|
|
|
| 34 |
ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP = int(os.getenv("ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP", 900))
|
| 35 |
DEFAULT_COLLECTION_THESAURUS =os.getenv("DEFAULT_COLLECTION_THESAURUS", "wordsapi")
|
| 36 |
DEFAULT_DBNAME_THESAURUS = "thesaurus"
|
|
|
|
| 20 |
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
| 21 |
API_MODE = bool(os.getenv("API_MODE", ""))
|
| 22 |
N_WORDS_GRAM = int(os.getenv("N_WORDS_GRAM", 2))
|
| 23 |
+
NLTK_DATA = os.getenv("NLTK_DATA", str(PROJECT_ROOT_FOLDER / "nltk_data"))
|
| 24 |
WORDNET_LANGUAGES=(os.getenv("WORDNET_LANGUAGES", "eng,"))
|
| 25 |
SPACY_MODEL_NAME=os.getenv("SPACY_MODEL_NAME", "en_core_web_sm")
|
| 26 |
WORDSAPI_KEY = os.getenv("WORDSAPI_KEY")
|
|
|
|
| 31 |
ME_CONFIG_MONGODB_URL = os.getenv("ME_CONFIG_MONGODB_URL", ME_CONFIG_MONGODB_URL_LOCAL)
|
| 32 |
ME_CONFIG_MONGODB_TIMEOUT_LOCAL = int(os.getenv("ME_CONFIG_MONGODB_TIMEOUT_LOCAL", 200))
|
| 33 |
ME_CONFIG_MONGODB_TIMEOUT_REMOTE = int(os.getenv("ME_CONFIG_MONGODB_TIMEOUT_REMOTE", 3000))
|
| 34 |
+
ME_CONFIG_MONGODB_TIMEOUT = int(os.getenv(
|
| 35 |
+
"ME_CONFIG_MONGODB_TIMEOUT",
|
| 36 |
+
ME_CONFIG_MONGODB_TIMEOUT_LOCAL if ME_CONFIG_MONGODB_URL == ME_CONFIG_MONGODB_URL_LOCAL else ME_CONFIG_MONGODB_TIMEOUT_REMOTE
|
| 37 |
+
))
|
| 38 |
ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP = int(os.getenv("ME_CONFIG_MONGODB_HEALTHCHECK_SLEEP", 900))
|
| 39 |
DEFAULT_COLLECTION_THESAURUS =os.getenv("DEFAULT_COLLECTION_THESAURUS", "wordsapi")
|
| 40 |
DEFAULT_DBNAME_THESAURUS = "thesaurus"
|
my_ghost_writer/text_parsers2.py
CHANGED
|
@@ -7,7 +7,7 @@ import pyinflect
|
|
| 7 |
from typing import Any, Optional
|
| 8 |
from fastapi import HTTPException
|
| 9 |
|
| 10 |
-
from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS
|
| 11 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
| 12 |
from my_ghost_writer.thesaurus import wn
|
| 13 |
from my_ghost_writer.type_hints import WordSynonymResult, ContextInfo, SynonymGroup
|
|
@@ -27,9 +27,10 @@ except (OSError, IOError) as io_ex:
|
|
| 27 |
|
| 28 |
# Ensure NLTK data is downloaded
|
| 29 |
try:
|
| 30 |
-
|
| 31 |
-
nltk.download('
|
| 32 |
-
nltk.download('
|
|
|
|
| 33 |
except Exception as e:
|
| 34 |
app_logger.error(f"Failed to download NLTK data: {e}")
|
| 35 |
|
|
|
|
| 7 |
from typing import Any, Optional
|
| 8 |
from fastapi import HTTPException
|
| 9 |
|
| 10 |
+
from my_ghost_writer.constants import SPACY_MODEL_NAME, app_logger, ELIGIBLE_POS, NLTK_DATA
|
| 11 |
from my_ghost_writer.custom_synonym_handler import CustomSynonymHandler
|
| 12 |
from my_ghost_writer.thesaurus import wn
|
| 13 |
from my_ghost_writer.type_hints import WordSynonymResult, ContextInfo, SynonymGroup
|
|
|
|
| 27 |
|
| 28 |
# Ensure NLTK data is downloaded
|
| 29 |
try:
|
| 30 |
+
app_logger.info(f"Downloading NLTK data to the folder:'{NLTK_DATA}'")
|
| 31 |
+
nltk.download('punkt_tab', quiet=False, download_dir=NLTK_DATA)
|
| 32 |
+
nltk.download('wordnet', quiet=False, download_dir=NLTK_DATA)
|
| 33 |
+
nltk.download('wordnet31', quiet=False, download_dir=NLTK_DATA)
|
| 34 |
except Exception as e:
|
| 35 |
app_logger.error(f"Failed to download NLTK data: {e}")
|
| 36 |
|