stefanoviel
commited on
Commit
·
b1a742b
1
Parent(s):
3c2ac96
removing useless st print
Browse files- src/streamlit_app.py +7 -7
src/streamlit_app.py
CHANGED
|
@@ -33,7 +33,7 @@ def create_and_save_embeddings(model, data_df):
|
|
| 33 |
Generates and saves document embeddings and the dataframe.
|
| 34 |
This function is called only once if the files don't exist.
|
| 35 |
"""
|
| 36 |
-
|
| 37 |
# Combine title and abstract for richer embeddings
|
| 38 |
data_df['text_to_embed'] = data_df['title'] + ". " + data_df['abstract'].fillna('')
|
| 39 |
|
|
@@ -44,9 +44,9 @@ def create_and_save_embeddings(model, data_df):
|
|
| 44 |
try:
|
| 45 |
torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
|
| 46 |
data_df.to_pickle(DATA_FILE)
|
| 47 |
-
|
| 48 |
except Exception as e:
|
| 49 |
-
|
| 50 |
|
| 51 |
return corpus_embeddings, data_df
|
| 52 |
|
|
@@ -65,19 +65,19 @@ def load_data_and_embeddings():
|
|
| 65 |
data_df = pd.read_pickle(DATA_FILE)
|
| 66 |
return model, corpus_embeddings, data_df
|
| 67 |
except Exception as e:
|
| 68 |
-
|
| 69 |
|
| 70 |
-
|
| 71 |
|
| 72 |
# Load the raw data from CSV
|
| 73 |
try:
|
| 74 |
data_df = pd.read_csv(CSV_FILE)
|
| 75 |
corpus_embeddings, data_df = create_and_save_embeddings(model, data_df)
|
| 76 |
except FileNotFoundError:
|
| 77 |
-
|
| 78 |
st.stop()
|
| 79 |
except Exception as e:
|
| 80 |
-
|
| 81 |
st.stop()
|
| 82 |
|
| 83 |
return model, corpus_embeddings, data_df
|
|
|
|
| 33 |
Generates and saves document embeddings and the dataframe.
|
| 34 |
This function is called only once if the files don't exist.
|
| 35 |
"""
|
| 36 |
+
print("First time setup: Generating and saving embeddings. This may take a moment...")
|
| 37 |
# Combine title and abstract for richer embeddings
|
| 38 |
data_df['text_to_embed'] = data_df['title'] + ". " + data_df['abstract'].fillna('')
|
| 39 |
|
|
|
|
| 44 |
try:
|
| 45 |
torch.save(corpus_embeddings.cpu(), EMBEDDINGS_FILE)
|
| 46 |
data_df.to_pickle(DATA_FILE)
|
| 47 |
+
print("Embeddings and data saved successfully!")
|
| 48 |
except Exception as e:
|
| 49 |
+
print(f"Could not save embeddings to disk: {e}. Will regenerate on each session.")
|
| 50 |
|
| 51 |
return corpus_embeddings, data_df
|
| 52 |
|
|
|
|
| 65 |
data_df = pd.read_pickle(DATA_FILE)
|
| 66 |
return model, corpus_embeddings, data_df
|
| 67 |
except Exception as e:
|
| 68 |
+
print(f"Could not load saved embeddings: {e}. Regenerating...")
|
| 69 |
|
| 70 |
+
print("embeding model path exists: " + str(Path(EMBEDDING_MODEL).exists()))
|
| 71 |
|
| 72 |
# Load the raw data from CSV
|
| 73 |
try:
|
| 74 |
data_df = pd.read_csv(CSV_FILE)
|
| 75 |
corpus_embeddings, data_df = create_and_save_embeddings(model, data_df)
|
| 76 |
except FileNotFoundError:
|
| 77 |
+
print(f"CSV file '{CSV_FILE}' not found. Please ensure it's in your repository.")
|
| 78 |
st.stop()
|
| 79 |
except Exception as e:
|
| 80 |
+
print(f"Error loading data: {e}")
|
| 81 |
st.stop()
|
| 82 |
|
| 83 |
return model, corpus_embeddings, data_df
|