Commit
·
c6fb015
1
Parent(s):
4d4a1c6
addind the news classifier and parser
Browse files- .DS_Store +0 -0
- .gitignore +1 -0
- backend/Dockerfile +6 -7
- backend/__pycache__/celery_worker.cpython-311.pyc +0 -0
- backend/__pycache__/main.cpython-311.pyc +0 -0
- backend/__pycache__/schemas.cpython-311.pyc +0 -0
- backend/alembic.ini +147 -0
- backend/alembic/README +1 -0
- backend/alembic/__pycache__/env.cpython-311.pyc +0 -0
- backend/alembic/env.py +69 -0
- backend/alembic/script.py.mako +28 -0
- backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py +40 -0
- backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc +0 -0
- backend/celery_worker.py +19 -0
- backend/core/__pycache__/config.cpython-311.pyc +0 -0
- backend/core/__pycache__/database.cpython-311.pyc +0 -0
- backend/core/config.py +10 -0
- backend/core/database.py +7 -0
- backend/main.py +41 -11
- backend/models/__pycache__/analysis_job.cpython-311.pyc +0 -0
- backend/models/analysis_job.py +12 -0
- backend/requirements.txt +25 -0
- backend/schemas.py +14 -0
- backend/tasks/__pycache__/news_tasks.cpython-311.pyc +0 -0
- backend/tasks/news_tasks.py +43 -0
- backend/tools/__pycache__/data_tools.cpython-311.pyc +0 -0
- backend/tools/__pycache__/news_tools.cpython-311.pyc +0 -0
- backend/tools/data_tools.py +43 -0
- backend/tools/download_model.py +0 -0
- backend/tools/news_tools.py +86 -0
- docker-compose.yml +28 -3
- ml_models/.DS_Store +0 -0
- ml_models/README.md +1 -0
- ml_models/config.json +1 -0
- ml_models/merges.txt +1 -0
- ml_models/special_tokens_map.json +1 -0
- ml_models/tokenizer.json +1 -0
- ml_models/tokenizer_config.json +1 -0
- ml_models/vocab.json +1 -0
- poetry.lock +0 -0
- pyproject.toml +8 -1
- tmp_down.py +7 -0
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.env
|
backend/Dockerfile
CHANGED
@@ -1,13 +1,12 @@
|
|
1 |
FROM python:3.11-slim
|
2 |
|
3 |
-
WORKDIR /
|
4 |
|
5 |
-
RUN
|
6 |
|
7 |
-
COPY
|
|
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
CMD ["poetry", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
|
|
1 |
FROM python:3.11-slim
|
2 |
|
3 |
+
WORKDIR /code
|
4 |
|
5 |
+
RUN apt-get update && apt-get install -y git
|
6 |
|
7 |
+
COPY ./backend/requirements.txt .
|
8 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
9 |
|
10 |
+
COPY ./ml_models /code/sentiment_model
|
11 |
|
12 |
+
WORKDIR /code/app
|
|
|
|
backend/__pycache__/celery_worker.cpython-311.pyc
ADDED
Binary file (706 Bytes). View file
|
|
backend/__pycache__/main.cpython-311.pyc
ADDED
Binary file (3.5 kB). View file
|
|
backend/__pycache__/schemas.cpython-311.pyc
ADDED
Binary file (1.11 kB). View file
|
|
backend/alembic.ini
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# A generic, single database configuration.
|
2 |
+
|
3 |
+
[alembic]
|
4 |
+
# path to migration scripts.
|
5 |
+
# this is typically a path given in POSIX (e.g. forward slashes)
|
6 |
+
# format, relative to the token %(here)s which refers to the location of this
|
7 |
+
# ini file
|
8 |
+
script_location = %(here)s/alembic
|
9 |
+
|
10 |
+
# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
|
11 |
+
# Uncomment the line below if you want the files to be prepended with date and time
|
12 |
+
# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
|
13 |
+
# for all available tokens
|
14 |
+
# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
|
15 |
+
|
16 |
+
# sys.path path, will be prepended to sys.path if present.
|
17 |
+
# defaults to the current working directory. for multiple paths, the path separator
|
18 |
+
# is defined by "path_separator" below.
|
19 |
+
prepend_sys_path = .
|
20 |
+
|
21 |
+
|
22 |
+
# timezone to use when rendering the date within the migration file
|
23 |
+
# as well as the filename.
|
24 |
+
# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
|
25 |
+
# Any required deps can installed by adding `alembic[tz]` to the pip requirements
|
26 |
+
# string value is passed to ZoneInfo()
|
27 |
+
# leave blank for localtime
|
28 |
+
# timezone =
|
29 |
+
|
30 |
+
# max length of characters to apply to the "slug" field
|
31 |
+
# truncate_slug_length = 40
|
32 |
+
|
33 |
+
# set to 'true' to run the environment during
|
34 |
+
# the 'revision' command, regardless of autogenerate
|
35 |
+
# revision_environment = false
|
36 |
+
|
37 |
+
# set to 'true' to allow .pyc and .pyo files without
|
38 |
+
# a source .py file to be detected as revisions in the
|
39 |
+
# versions/ directory
|
40 |
+
# sourceless = false
|
41 |
+
|
42 |
+
# version location specification; This defaults
|
43 |
+
# to <script_location>/versions. When using multiple version
|
44 |
+
# directories, initial revisions must be specified with --version-path.
|
45 |
+
# The path separator used here should be the separator specified by "path_separator"
|
46 |
+
# below.
|
47 |
+
# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
|
48 |
+
|
49 |
+
# path_separator; This indicates what character is used to split lists of file
|
50 |
+
# paths, including version_locations and prepend_sys_path within configparser
|
51 |
+
# files such as alembic.ini.
|
52 |
+
# The default rendered in new alembic.ini files is "os", which uses os.pathsep
|
53 |
+
# to provide os-dependent path splitting.
|
54 |
+
#
|
55 |
+
# Note that in order to support legacy alembic.ini files, this default does NOT
|
56 |
+
# take place if path_separator is not present in alembic.ini. If this
|
57 |
+
# option is omitted entirely, fallback logic is as follows:
|
58 |
+
#
|
59 |
+
# 1. Parsing of the version_locations option falls back to using the legacy
|
60 |
+
# "version_path_separator" key, which if absent then falls back to the legacy
|
61 |
+
# behavior of splitting on spaces and/or commas.
|
62 |
+
# 2. Parsing of the prepend_sys_path option falls back to the legacy
|
63 |
+
# behavior of splitting on spaces, commas, or colons.
|
64 |
+
#
|
65 |
+
# Valid values for path_separator are:
|
66 |
+
#
|
67 |
+
# path_separator = :
|
68 |
+
# path_separator = ;
|
69 |
+
# path_separator = space
|
70 |
+
# path_separator = newline
|
71 |
+
#
|
72 |
+
# Use os.pathsep. Default configuration used for new projects.
|
73 |
+
path_separator = os
|
74 |
+
|
75 |
+
# set to 'true' to search source files recursively
|
76 |
+
# in each "version_locations" directory
|
77 |
+
# new in Alembic version 1.10
|
78 |
+
# recursive_version_locations = false
|
79 |
+
|
80 |
+
# the output encoding used when revision files
|
81 |
+
# are written from script.py.mako
|
82 |
+
# output_encoding = utf-8
|
83 |
+
|
84 |
+
# database URL. This is consumed by the user-maintained env.py script only.
|
85 |
+
# other means of configuring database URLs may be customized within the env.py
|
86 |
+
# file.
|
87 |
+
sqlalchemy.url = ${DATABASE_URL}
|
88 |
+
|
89 |
+
|
90 |
+
[post_write_hooks]
|
91 |
+
# post_write_hooks defines scripts or Python functions that are run
|
92 |
+
# on newly generated revision scripts. See the documentation for further
|
93 |
+
# detail and examples
|
94 |
+
|
95 |
+
# format using "black" - use the console_scripts runner, against the "black" entrypoint
|
96 |
+
# hooks = black
|
97 |
+
# black.type = console_scripts
|
98 |
+
# black.entrypoint = black
|
99 |
+
# black.options = -l 79 REVISION_SCRIPT_FILENAME
|
100 |
+
|
101 |
+
# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
|
102 |
+
# hooks = ruff
|
103 |
+
# ruff.type = module
|
104 |
+
# ruff.module = ruff
|
105 |
+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
106 |
+
|
107 |
+
# Alternatively, use the exec runner to execute a binary found on your PATH
|
108 |
+
# hooks = ruff
|
109 |
+
# ruff.type = exec
|
110 |
+
# ruff.executable = ruff
|
111 |
+
# ruff.options = check --fix REVISION_SCRIPT_FILENAME
|
112 |
+
|
113 |
+
# Logging configuration. This is also consumed by the user-maintained
|
114 |
+
# env.py script only.
|
115 |
+
[loggers]
|
116 |
+
keys = root,sqlalchemy,alembic
|
117 |
+
|
118 |
+
[handlers]
|
119 |
+
keys = console
|
120 |
+
|
121 |
+
[formatters]
|
122 |
+
keys = generic
|
123 |
+
|
124 |
+
[logger_root]
|
125 |
+
level = WARNING
|
126 |
+
handlers = console
|
127 |
+
qualname =
|
128 |
+
|
129 |
+
[logger_sqlalchemy]
|
130 |
+
level = WARNING
|
131 |
+
handlers =
|
132 |
+
qualname = sqlalchemy.engine
|
133 |
+
|
134 |
+
[logger_alembic]
|
135 |
+
level = INFO
|
136 |
+
handlers =
|
137 |
+
qualname = alembic
|
138 |
+
|
139 |
+
[handler_console]
|
140 |
+
class = StreamHandler
|
141 |
+
args = (sys.stderr,)
|
142 |
+
level = NOTSET
|
143 |
+
formatter = generic
|
144 |
+
|
145 |
+
[formatter_generic]
|
146 |
+
format = %(levelname)-5.5s [%(name)s] %(message)s
|
147 |
+
datefmt = %H:%M:%S
|
backend/alembic/README
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Generic single-database configuration.
|
backend/alembic/__pycache__/env.cpython-311.pyc
ADDED
Binary file (3.24 kB). View file
|
|
backend/alembic/env.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# backend/alembic/env.py
|
2 |
+
|
3 |
+
from logging.config import fileConfig
|
4 |
+
|
5 |
+
from sqlalchemy import engine_from_config
|
6 |
+
from sqlalchemy.pool import NullPool
|
7 |
+
|
8 |
+
from alembic import context
|
9 |
+
|
10 |
+
# This is the crucial part: we import our app's config and models
|
11 |
+
import sys
|
12 |
+
import os
|
13 |
+
sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
|
14 |
+
|
15 |
+
from core.config import settings
|
16 |
+
from core.database import Base
|
17 |
+
from models.analysis_job import AnalysisJob
|
18 |
+
|
19 |
+
# this is the Alembic Config object, which provides
|
20 |
+
# access to the values within the .ini file in use.
|
21 |
+
config = context.config
|
22 |
+
|
23 |
+
# This line is IMPORTANT: it tells Alembic to use our app's settings for the DB URL
|
24 |
+
config.set_main_option("sqlalchemy.url", settings.DATABASE_URL)
|
25 |
+
|
26 |
+
# Interpret the config file for Python logging.
|
27 |
+
# This line reads the logging configuration from alembic.ini
|
28 |
+
if config.config_file_name is not None:
|
29 |
+
fileConfig(config.config_file_name)
|
30 |
+
|
31 |
+
# add your model's MetaData object here
|
32 |
+
# for 'autogenerate' support
|
33 |
+
target_metadata = Base.metadata
|
34 |
+
|
35 |
+
def run_migrations_offline() -> None:
|
36 |
+
"""Run migrations in 'offline' mode."""
|
37 |
+
url = config.get_main_option("sqlalchemy.url")
|
38 |
+
context.configure(
|
39 |
+
url=url,
|
40 |
+
target_metadata=target_metadata,
|
41 |
+
literal_binds=True,
|
42 |
+
dialect_opts={"paramstyle": "named"},
|
43 |
+
)
|
44 |
+
|
45 |
+
with context.begin_transaction():
|
46 |
+
context.run_migrations()
|
47 |
+
|
48 |
+
|
49 |
+
def run_migrations_online() -> None:
|
50 |
+
"""Run migrations in 'online' mode."""
|
51 |
+
connectable = engine_from_config(
|
52 |
+
config.get_section(config.config_main_section, {}),
|
53 |
+
prefix="sqlalchemy.",
|
54 |
+
poolclass=NullPool,
|
55 |
+
)
|
56 |
+
|
57 |
+
with connectable.connect() as connection:
|
58 |
+
context.configure(
|
59 |
+
connection=connection, target_metadata=target_metadata
|
60 |
+
)
|
61 |
+
|
62 |
+
with context.begin_transaction():
|
63 |
+
context.run_migrations()
|
64 |
+
|
65 |
+
|
66 |
+
if context.is_offline_mode():
|
67 |
+
run_migrations_offline()
|
68 |
+
else:
|
69 |
+
run_migrations_online()
|
backend/alembic/script.py.mako
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""${message}
|
2 |
+
|
3 |
+
Revision ID: ${up_revision}
|
4 |
+
Revises: ${down_revision | comma,n}
|
5 |
+
Create Date: ${create_date}
|
6 |
+
|
7 |
+
"""
|
8 |
+
from typing import Sequence, Union
|
9 |
+
|
10 |
+
from alembic import op
|
11 |
+
import sqlalchemy as sa
|
12 |
+
${imports if imports else ""}
|
13 |
+
|
14 |
+
# revision identifiers, used by Alembic.
|
15 |
+
revision: str = ${repr(up_revision)}
|
16 |
+
down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
|
17 |
+
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
18 |
+
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
19 |
+
|
20 |
+
|
21 |
+
def upgrade() -> None:
|
22 |
+
"""Upgrade schema."""
|
23 |
+
${upgrades if upgrades else "pass"}
|
24 |
+
|
25 |
+
|
26 |
+
def downgrade() -> None:
|
27 |
+
"""Downgrade schema."""
|
28 |
+
${downgrades if downgrades else "pass"}
|
backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Create analysis_jobs table
|
2 |
+
|
3 |
+
Revision ID: 17ec047335c5
|
4 |
+
Revises:
|
5 |
+
Create Date: 2025-09-01 16:55:38.619192
|
6 |
+
|
7 |
+
"""
|
8 |
+
from typing import Sequence, Union
|
9 |
+
|
10 |
+
from alembic import op
|
11 |
+
import sqlalchemy as sa
|
12 |
+
|
13 |
+
|
14 |
+
# revision identifiers, used by Alembic.
|
15 |
+
revision: str = '17ec047335c5'
|
16 |
+
down_revision: Union[str, Sequence[str], None] = None
|
17 |
+
branch_labels: Union[str, Sequence[str], None] = None
|
18 |
+
depends_on: Union[str, Sequence[str], None] = None
|
19 |
+
|
20 |
+
|
21 |
+
def upgrade() -> None:
|
22 |
+
"""Upgrade schema."""
|
23 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
24 |
+
op.create_table('analysis_jobs',
|
25 |
+
sa.Column('id', sa.UUID(), nullable=False),
|
26 |
+
sa.Column('ticker', sa.String(), nullable=False),
|
27 |
+
sa.Column('status', sa.String(), nullable=False),
|
28 |
+
sa.Column('result', sa.JSON(), nullable=True),
|
29 |
+
sa.PrimaryKeyConstraint('id')
|
30 |
+
)
|
31 |
+
op.create_index(op.f('ix_analysis_jobs_ticker'), 'analysis_jobs', ['ticker'], unique=False)
|
32 |
+
# ### end Alembic commands ###
|
33 |
+
|
34 |
+
|
35 |
+
def downgrade() -> None:
|
36 |
+
"""Downgrade schema."""
|
37 |
+
# ### commands auto generated by Alembic - please adjust! ###
|
38 |
+
op.drop_index(op.f('ix_analysis_jobs_ticker'), table_name='analysis_jobs')
|
39 |
+
op.drop_table('analysis_jobs')
|
40 |
+
# ### end Alembic commands ###
|
backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc
ADDED
Binary file (2.22 kB). View file
|
|
backend/celery_worker.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from celery import Celery
|
2 |
+
from core.config import settings
|
3 |
+
|
4 |
+
# giving app a more descriptive name
|
5 |
+
celery = Celery(
|
6 |
+
"quantitative_analysis_platform",
|
7 |
+
broker=settings.CELERY_BROKER_URL,
|
8 |
+
backend=settings.CELERY_RESULT_BACKEND,
|
9 |
+
include=["tasks.data_tasks", "tasks.news_tasks"]
|
10 |
+
)
|
11 |
+
|
12 |
+
celery.conf.update(
|
13 |
+
task_serializer="json",
|
14 |
+
accept_content=["json"],
|
15 |
+
result_serializer="json",
|
16 |
+
timezone="UTC",
|
17 |
+
enable_utc=True,
|
18 |
+
)
|
19 |
+
|
backend/core/__pycache__/config.cpython-311.pyc
ADDED
Binary file (729 Bytes). View file
|
|
backend/core/__pycache__/database.cpython-311.pyc
ADDED
Binary file (563 Bytes). View file
|
|
backend/core/config.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
2 |
+
|
3 |
+
class Settings(BaseSettings):
|
4 |
+
DATABASE_URL: str
|
5 |
+
CELERY_BROKER_URL: str
|
6 |
+
CELERY_RESULT_BACKEND: str
|
7 |
+
|
8 |
+
model_config = SettingsConfigDict(env_file=".env")
|
9 |
+
|
10 |
+
settings = Settings()
|
backend/core/database.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import create_engine
|
2 |
+
from sqlalchemy.orm import sessionmaker, declarative_base
|
3 |
+
from .config import settings
|
4 |
+
|
5 |
+
engine = create_engine(settings.DATABASE_URL)
|
6 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
7 |
+
Base = declarative_base()
|
backend/main.py
CHANGED
@@ -1,20 +1,50 @@
|
|
1 |
-
|
|
|
2 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
app = FastAPI(title="
|
6 |
|
7 |
app.add_middleware(
|
8 |
CORSMiddleware,
|
9 |
-
allow_origins=["*"],
|
10 |
-
allow_credentials=True,
|
11 |
-
allow_methods=["*"],
|
12 |
-
allow_headers=["*"],
|
13 |
)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
-
@app.get("/")
|
18 |
-
def
|
19 |
-
|
20 |
-
|
|
|
|
1 |
+
# backend/main.py
|
2 |
+
from fastapi import FastAPI, Depends, HTTPException
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from sqlalchemy.orm import Session
|
5 |
+
from uuid import UUID
|
6 |
+
import models.analysis_job as model
|
7 |
+
import schemas
|
8 |
+
from core.database import SessionLocal, engine
|
9 |
+
from tasks.data_tasks import run_data_analysis
|
10 |
+
from tasks.news_tasks import run_intelligence_analysis
|
11 |
+
from celery import chain
|
12 |
|
13 |
+
model.Base.metadata.create_all(bind=engine)
|
14 |
+
app = FastAPI(title="Quantitative Analysis Platform API", version="0.1.0")
|
15 |
|
16 |
app.add_middleware(
|
17 |
CORSMiddleware,
|
18 |
+
allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
|
|
|
|
|
|
|
19 |
)
|
20 |
|
21 |
+
def get_db():
|
22 |
+
db = SessionLocal()
|
23 |
+
try:
|
24 |
+
yield db
|
25 |
+
finally:
|
26 |
+
db.close()
|
27 |
|
28 |
+
@app.post("/jobs", response_model=schemas.Job, status_code=201)
|
29 |
+
def create_analysis_job(job_request: schemas.JobCreate, db: Session = Depends(get_db)):
|
30 |
+
db_job = model.AnalysisJob(ticker=job_request.ticker.upper())
|
31 |
+
db.add(db_job)
|
32 |
+
db.commit()
|
33 |
+
db.refresh(db_job)
|
34 |
+
|
35 |
+
# THE CRITICAL CHANGE IS HERE
|
36 |
+
analysis_chain = chain(
|
37 |
+
run_data_analysis.s(str(db_job.id), db_job.ticker),
|
38 |
+
# By making the signature immutable, we tell Celery to ignore
|
39 |
+
# the result of the previous task and only use the arguments we provide.
|
40 |
+
run_intelligence_analysis.s(str(db_job.id)).set(immutable=True)
|
41 |
+
)
|
42 |
+
analysis_chain.apply_async()
|
43 |
+
|
44 |
+
return db_job
|
45 |
|
46 |
+
@app.get("/jobs/{job_id}", response_model=schemas.Job)
|
47 |
+
def get_job_status(job_id: UUID, db: Session = Depends(get_db)):
|
48 |
+
db_job = db.query(model.AnalysisJob).filter(model.AnalysisJob.id == job_id).first()
|
49 |
+
if db_job is None: raise HTTPException(status_code=404, detail="Job not found")
|
50 |
+
return db_job
|
backend/models/__pycache__/analysis_job.cpython-311.pyc
ADDED
Binary file (1.05 kB). View file
|
|
backend/models/analysis_job.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sqlalchemy import Column, String, JSON
|
2 |
+
from sqlalchemy.dialects.postgresql import UUID
|
3 |
+
import uuid
|
4 |
+
from core.database import Base
|
5 |
+
|
6 |
+
class AnalysisJob(Base):
|
7 |
+
__tablename__ = "analysis_jobs"
|
8 |
+
|
9 |
+
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
10 |
+
ticker = Column(String, nullable=False, index=True)
|
11 |
+
status = Column(String, default="PENDING", nullable=False)
|
12 |
+
result = Column(JSON, nullable=True)
|
backend/requirements.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn[standard]
|
3 |
+
pydantic-settings
|
4 |
+
|
5 |
+
|
6 |
+
sqlalchemy
|
7 |
+
psycopg2-binary
|
8 |
+
alembic
|
9 |
+
|
10 |
+
# task queue
|
11 |
+
celery
|
12 |
+
redis[redis-stack]
|
13 |
+
|
14 |
+
# data agent
|
15 |
+
yfinance
|
16 |
+
|
17 |
+
# news & sentiment agent
|
18 |
+
newspaper3k
|
19 |
+
lxml_html_clean
|
20 |
+
torch
|
21 |
+
transformers
|
22 |
+
sentence-transformers
|
23 |
+
|
24 |
+
# special install for Twitter scraping
|
25 |
+
snscrape@git+https://github.com/JustAnotherArchivist/snscrape.git@master
|
backend/schemas.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel, ConfigDict
|
2 |
+
from uuid import UUID
|
3 |
+
from typing import Optional, Dict, Any
|
4 |
+
|
5 |
+
class JobCreate(BaseModel):
|
6 |
+
ticker: str
|
7 |
+
|
8 |
+
class Job(BaseModel):
|
9 |
+
id: UUID
|
10 |
+
ticker: str
|
11 |
+
status: str
|
12 |
+
result: Optional[Dict[str, Any]] = None
|
13 |
+
|
14 |
+
model_config = ConfigDict(from_attributes=True)
|
backend/tasks/__pycache__/news_tasks.cpython-311.pyc
ADDED
Binary file (2.82 kB). View file
|
|
backend/tasks/news_tasks.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from celery_worker import celery
|
2 |
+
from core.database import SessionLocal
|
3 |
+
from models.analysis_job import AnalysisJob
|
4 |
+
from tools.news_tools import get_news_and_sentiment, get_twitter_sentiment
|
5 |
+
from uuid import UUID
|
6 |
+
|
7 |
+
@celery.task
|
8 |
+
def run_intelligence_analysis(job_id: str):
|
9 |
+
db = SessionLocal()
|
10 |
+
job = None
|
11 |
+
try:
|
12 |
+
job = db.query(AnalysisJob).filter(AnalysisJob.id == UUID(job_id)).first()
|
13 |
+
if not job or not job.result: raise ValueError(f"Job {job_id} not found or has no data.")
|
14 |
+
|
15 |
+
current_data = job.result
|
16 |
+
company_name = current_data.get("company_name")
|
17 |
+
if not company_name: raise ValueError("Company name not found in data.")
|
18 |
+
|
19 |
+
print(f"Starting intelligence analysis for {company_name}...")
|
20 |
+
|
21 |
+
news = get_news_and_sentiment(current_data.get("ticker"), company_name)
|
22 |
+
twitter = get_twitter_sentiment(f"{company_name} stock")
|
23 |
+
|
24 |
+
current_data['intelligence_briefing'] = {"news": news, "twitter": twitter}
|
25 |
+
job.result = current_data
|
26 |
+
job.status = "SUCCESS"
|
27 |
+
db.commit()
|
28 |
+
|
29 |
+
print(f"Intelligence analysis for job {job_id} completed.")
|
30 |
+
final_result = str(job.result)
|
31 |
+
|
32 |
+
except Exception as e:
|
33 |
+
print(f"Error during intelligence analysis for job {job_id}: {e}")
|
34 |
+
if job:
|
35 |
+
job.status = "FAILED"
|
36 |
+
error_data = job.result if job.result else {}
|
37 |
+
error_data['error'] = f"Intelligence analysis failed: {str(e)}"
|
38 |
+
job.result = error_data
|
39 |
+
db.commit()
|
40 |
+
final_result = f"Error: {e}"
|
41 |
+
finally:
|
42 |
+
db.close()
|
43 |
+
return final_result
|
backend/tools/__pycache__/data_tools.cpython-311.pyc
ADDED
Binary file (2.65 kB). View file
|
|
backend/tools/__pycache__/news_tools.cpython-311.pyc
ADDED
Binary file (5.81 kB). View file
|
|
backend/tools/data_tools.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yfinance as yf
|
2 |
+
from typing import Dict, Any
|
3 |
+
|
4 |
+
def get_stock_data(ticker: str) -> Dict[str, Any]:
|
5 |
+
|
6 |
+
# for NSE stocks, yfinance expects the '.NS' suffix. For BSE, it's '.BO'.
|
7 |
+
# assume NSE by default if no suffix is provided.
|
8 |
+
if not ticker.endswith(('.NS', '.BO')):
|
9 |
+
print(f"Ticker '{ticker}' has no exchange suffix. Assuming NSE and appending '.NS'.")
|
10 |
+
ticker = f"{ticker}.NS"
|
11 |
+
|
12 |
+
stock = yf.Ticker(ticker)
|
13 |
+
|
14 |
+
# yfinance can sometimes fail for certain tickers or data points.
|
15 |
+
try:
|
16 |
+
info = stock.info
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Could not fetch info for {ticker}: {e}")
|
19 |
+
return {"error": f"Invalid ticker or no data available for {ticker}"}
|
20 |
+
|
21 |
+
# check if we got a valid response
|
22 |
+
if not info or info.get('regularMarketPrice') is None:
|
23 |
+
return {"error": f"Invalid ticker or no data available for {ticker}"}
|
24 |
+
|
25 |
+
# select key data points relevant to analysis
|
26 |
+
data = {
|
27 |
+
"ticker": ticker,
|
28 |
+
"company_name": info.get('longName'),
|
29 |
+
"current_price": info.get('currentPrice') or info.get('regularMarketPrice'),
|
30 |
+
"previous_close": info.get('previousClose'),
|
31 |
+
"market_cap": info.get('marketCap'),
|
32 |
+
"pe_ratio": info.get('trailingPE') or info.get('forwardPE'),
|
33 |
+
"pb_ratio": info.get('priceToBook'),
|
34 |
+
"dividend_yield": info.get('dividendYield'),
|
35 |
+
"sector": info.get('sector'),
|
36 |
+
"industry": info.get('industry'),
|
37 |
+
"summary": info.get('longBusinessSummary'),
|
38 |
+
"website": info.get('website'),
|
39 |
+
"logo_url": info.get('logo_url')
|
40 |
+
}
|
41 |
+
|
42 |
+
# clean up data by removing any keys with none values
|
43 |
+
return {k: v for k, v in data.items() if v is not None}
|
backend/tools/download_model.py
ADDED
File without changes
|
backend/tools/news_tools.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import snscrape.modules.twitter as sntwitter
|
2 |
+
import newspaper
|
3 |
+
# We will now import AutoModelForSequenceClassification and AutoTokenizer
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
5 |
+
import torch # We need torch to process the model's output
|
6 |
+
from typing import List, Dict, Any
|
7 |
+
|
8 |
+
# We will load the model and tokenizer inside the function
|
9 |
+
sentiment_model = None
|
10 |
+
tokenizer = None
|
11 |
+
MODEL_PATH = '/code/sentiment_model'
|
12 |
+
|
13 |
+
def load_sentiment_model():
|
14 |
+
"""A function to load the model and tokenizer on demand using the transformers library."""
|
15 |
+
global sentiment_model, tokenizer
|
16 |
+
if sentiment_model is None or tokenizer is None:
|
17 |
+
print("Loading sentiment model and tokenizer for the first time...")
|
18 |
+
# Load the tokenizer from the local path
|
19 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
|
20 |
+
# Load the model from the local path
|
21 |
+
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
|
22 |
+
print("Sentiment model and tokenizer loaded.")
|
23 |
+
|
24 |
+
def analyze_sentiment_with_model(text: str) -> str:
|
25 |
+
"""Uses the loaded model to predict sentiment."""
|
26 |
+
# This is the standard way to use a transformers model
|
27 |
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
28 |
+
with torch.no_grad():
|
29 |
+
logits = sentiment_model(**inputs).logits
|
30 |
+
|
31 |
+
scores = logits.softmax(dim=1)[0].tolist()
|
32 |
+
sentiment_map = {0: 'Positive', 1: 'Neutral', 2: 'Negative'} # This order might be different
|
33 |
+
|
34 |
+
# Let's verify the model's expected labels
|
35 |
+
model_labels = sentiment_model.config.id2label
|
36 |
+
if model_labels:
|
37 |
+
# e.g., {0: 'positive', 1: 'neutral', 2: 'negative'}
|
38 |
+
sentiment_map = {int(k): v.capitalize() for k, v in model_labels.items()}
|
39 |
+
|
40 |
+
best_index = scores.index(max(scores))
|
41 |
+
return sentiment_map.get(best_index, "Unknown")
|
42 |
+
|
43 |
+
|
44 |
+
def get_news_and_sentiment(ticker: str, company_name: str) -> List[Dict[str, Any]]:
|
45 |
+
load_sentiment_model()
|
46 |
+
|
47 |
+
print(f"Fetching news for {company_name}...")
|
48 |
+
search_url = f"https://news.google.com/rss/search?q={company_name.replace(' ', '+')}+stock&hl=en-IN&gl=IN&ceid=IN:en"
|
49 |
+
news_source = newspaper.build(search_url, memoize_articles=False, language='en')
|
50 |
+
articles_data = []
|
51 |
+
for article in news_source.articles[:5]:
|
52 |
+
try:
|
53 |
+
article.download(); article.parse(); article.nlp()
|
54 |
+
if not article.text or len(article.text) < 150: continue
|
55 |
+
|
56 |
+
sentiment = analyze_sentiment_with_model(article.summary)
|
57 |
+
|
58 |
+
articles_data.append({
|
59 |
+
"title": article.title,
|
60 |
+
"summary": article.summary,
|
61 |
+
"url": article.url,
|
62 |
+
"sentiment": sentiment
|
63 |
+
})
|
64 |
+
except Exception as e:
|
65 |
+
print(f"Could not process article {article.url}: {e}")
|
66 |
+
return articles_data
|
67 |
+
|
68 |
+
def get_twitter_sentiment(search_query: str) -> Dict[str, Any]:
|
69 |
+
load_sentiment_model()
|
70 |
+
|
71 |
+
print(f"Fetching Twitter sentiment for '{search_query}'...")
|
72 |
+
tweets = [tweet.rawContent for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f"{search_query} lang:en").get_items()) if i < 50]
|
73 |
+
if not tweets: return {"error": "No recent tweets found."}
|
74 |
+
|
75 |
+
counts = {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'Unknown': 0}
|
76 |
+
for text in tweets:
|
77 |
+
sentiment = analyze_sentiment_with_model(text)
|
78 |
+
counts[sentiment] += 1
|
79 |
+
|
80 |
+
return {
|
81 |
+
"search_query": search_query,
|
82 |
+
"total_tweets": len(tweets),
|
83 |
+
"positive": counts['Positive'],
|
84 |
+
"negative": counts['Negative'],
|
85 |
+
"neutral": counts['Neutral']
|
86 |
+
}
|
docker-compose.yml
CHANGED
@@ -1,12 +1,35 @@
|
|
1 |
services:
|
|
|
|
|
|
|
|
|
|
|
2 |
backend:
|
3 |
build:
|
4 |
-
context: .
|
5 |
dockerfile: ./backend/Dockerfile
|
6 |
ports:
|
7 |
- "8000:8000"
|
8 |
volumes:
|
9 |
-
- ./backend:/app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
frontend:
|
12 |
build:
|
@@ -16,4 +39,6 @@ services:
|
|
16 |
- "5173:5173"
|
17 |
volumes:
|
18 |
- ./frontend:/app
|
19 |
-
- /app/node_modules
|
|
|
|
|
|
1 |
services:
|
2 |
+
redis:
|
3 |
+
image: redis:7-alpine
|
4 |
+
ports:
|
5 |
+
- "6379:6379"
|
6 |
+
|
7 |
backend:
|
8 |
build:
|
9 |
+
context: . # <-- Context is ROOT
|
10 |
dockerfile: ./backend/Dockerfile
|
11 |
ports:
|
12 |
- "8000:8000"
|
13 |
volumes:
|
14 |
+
- ./backend:/code/app
|
15 |
+
env_file:
|
16 |
+
- .env
|
17 |
+
command: python -m uvicorn main:app --host 0.0.0.0 --port 8000
|
18 |
+
depends_on:
|
19 |
+
- redis
|
20 |
+
|
21 |
+
worker:
|
22 |
+
build:
|
23 |
+
context: . # <-- Context is ROOT
|
24 |
+
dockerfile: ./backend/Dockerfile
|
25 |
+
volumes:
|
26 |
+
- ./backend:/code/app
|
27 |
+
env_file:
|
28 |
+
- .env
|
29 |
+
command: python -m celery -A celery_worker.celery worker --loglevel=info
|
30 |
+
depends_on:
|
31 |
+
- redis
|
32 |
+
- backend
|
33 |
|
34 |
frontend:
|
35 |
build:
|
|
|
39 |
- "5173:5173"
|
40 |
volumes:
|
41 |
- ./frontend:/app
|
42 |
+
- /app/node_modules
|
43 |
+
depends_on:
|
44 |
+
- backend
|
ml_models/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
ml_models/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/368ed67c15df571f78fa692ab0e20262d0cbe8cc
|
ml_models/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/bedcf6808384112f9da4ecc439f2addbdac3785b
|
ml_models/merges.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc
|
ml_models/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/a7816a407990c2e8d254b10d36beeb972016086e
|
ml_models/tokenizer.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/d8059234da10f3ae6513891308e25f256dc1db6d
|
ml_models/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/45af26cf13c9666e90a0891616e8cf48b0404aed
|
ml_models/vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
../../blobs/4ebe4bb3f3114daf2e4cc349f24873a1175a35d7
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -9,7 +9,14 @@ readme = "README.md"
|
|
9 |
requires-python = ">=3.10"
|
10 |
dependencies = [
|
11 |
"fastapi (>=0.116.1,<0.117.0)",
|
12 |
-
"uvicorn[standard] (>=0.35.0,<0.36.0)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
]
|
14 |
|
15 |
|
|
|
9 |
requires-python = ">=3.10"
|
10 |
dependencies = [
|
11 |
"fastapi (>=0.116.1,<0.117.0)",
|
12 |
+
"uvicorn[standard] (>=0.35.0,<0.36.0)",
|
13 |
+
"sqlalchemy (>=2.0.43,<3.0.0)",
|
14 |
+
"psycopg2-binary (>=2.9.10,<3.0.0)",
|
15 |
+
"celery (>=5.5.3,<6.0.0)",
|
16 |
+
"redis[redis-stack] (>=6.4.0,<7.0.0)",
|
17 |
+
"alembic (>=1.16.5,<2.0.0)",
|
18 |
+
"pydantic-settings (>=2.10.1,<3.0.0)",
|
19 |
+
"yfinance (>=0.2.65,<0.3.0)"
|
20 |
]
|
21 |
|
22 |
|
tmp_down.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import CrossEncoder
|
2 |
+
|
3 |
+
print("Starting model download to local cache...")
|
4 |
+
# This will download the model to a central Hugging Face cache on your Mac
|
5 |
+
# It might be in ~/.cache/huggingface/hub/ or similar
|
6 |
+
model = CrossEncoder('cross-encoder/nli-roberta-base')
|
7 |
+
print("Model download complete!")
|