boyinfuture commited on
Commit
c6fb015
·
1 Parent(s): 4d4a1c6

addind the news classifier and parser

Browse files
Files changed (42) hide show
  1. .DS_Store +0 -0
  2. .gitignore +1 -0
  3. backend/Dockerfile +6 -7
  4. backend/__pycache__/celery_worker.cpython-311.pyc +0 -0
  5. backend/__pycache__/main.cpython-311.pyc +0 -0
  6. backend/__pycache__/schemas.cpython-311.pyc +0 -0
  7. backend/alembic.ini +147 -0
  8. backend/alembic/README +1 -0
  9. backend/alembic/__pycache__/env.cpython-311.pyc +0 -0
  10. backend/alembic/env.py +69 -0
  11. backend/alembic/script.py.mako +28 -0
  12. backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py +40 -0
  13. backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc +0 -0
  14. backend/celery_worker.py +19 -0
  15. backend/core/__pycache__/config.cpython-311.pyc +0 -0
  16. backend/core/__pycache__/database.cpython-311.pyc +0 -0
  17. backend/core/config.py +10 -0
  18. backend/core/database.py +7 -0
  19. backend/main.py +41 -11
  20. backend/models/__pycache__/analysis_job.cpython-311.pyc +0 -0
  21. backend/models/analysis_job.py +12 -0
  22. backend/requirements.txt +25 -0
  23. backend/schemas.py +14 -0
  24. backend/tasks/__pycache__/news_tasks.cpython-311.pyc +0 -0
  25. backend/tasks/news_tasks.py +43 -0
  26. backend/tools/__pycache__/data_tools.cpython-311.pyc +0 -0
  27. backend/tools/__pycache__/news_tools.cpython-311.pyc +0 -0
  28. backend/tools/data_tools.py +43 -0
  29. backend/tools/download_model.py +0 -0
  30. backend/tools/news_tools.py +86 -0
  31. docker-compose.yml +28 -3
  32. ml_models/.DS_Store +0 -0
  33. ml_models/README.md +1 -0
  34. ml_models/config.json +1 -0
  35. ml_models/merges.txt +1 -0
  36. ml_models/special_tokens_map.json +1 -0
  37. ml_models/tokenizer.json +1 -0
  38. ml_models/tokenizer_config.json +1 -0
  39. ml_models/vocab.json +1 -0
  40. poetry.lock +0 -0
  41. pyproject.toml +8 -1
  42. tmp_down.py +7 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
backend/Dockerfile CHANGED
@@ -1,13 +1,12 @@
1
  FROM python:3.11-slim
2
 
3
- WORKDIR /app
4
 
5
- RUN pip install poetry
6
 
7
- COPY pyproject.toml poetry.lock ./
 
8
 
9
- RUN poetry install --no-root
10
 
11
- COPY ./backend .
12
-
13
- CMD ["poetry", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
 
1
  FROM python:3.11-slim
2
 
3
+ WORKDIR /code
4
 
5
+ RUN apt-get update && apt-get install -y git
6
 
7
+ COPY ./backend/requirements.txt .
8
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
9
 
10
+ COPY ./ml_models /code/sentiment_model
11
 
12
+ WORKDIR /code/app
 
 
backend/__pycache__/celery_worker.cpython-311.pyc ADDED
Binary file (706 Bytes). View file
 
backend/__pycache__/main.cpython-311.pyc ADDED
Binary file (3.5 kB). View file
 
backend/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (1.11 kB). View file
 
backend/alembic.ini ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # A generic, single database configuration.
2
+
3
+ [alembic]
4
+ # path to migration scripts.
5
+ # this is typically a path given in POSIX (e.g. forward slashes)
6
+ # format, relative to the token %(here)s which refers to the location of this
7
+ # ini file
8
+ script_location = %(here)s/alembic
9
+
10
+ # template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
11
+ # Uncomment the line below if you want the files to be prepended with date and time
12
+ # see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
13
+ # for all available tokens
14
+ # file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
15
+
16
+ # sys.path path, will be prepended to sys.path if present.
17
+ # defaults to the current working directory. for multiple paths, the path separator
18
+ # is defined by "path_separator" below.
19
+ prepend_sys_path = .
20
+
21
+
22
+ # timezone to use when rendering the date within the migration file
23
+ # as well as the filename.
24
+ # If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
25
+ # Any required deps can installed by adding `alembic[tz]` to the pip requirements
26
+ # string value is passed to ZoneInfo()
27
+ # leave blank for localtime
28
+ # timezone =
29
+
30
+ # max length of characters to apply to the "slug" field
31
+ # truncate_slug_length = 40
32
+
33
+ # set to 'true' to run the environment during
34
+ # the 'revision' command, regardless of autogenerate
35
+ # revision_environment = false
36
+
37
+ # set to 'true' to allow .pyc and .pyo files without
38
+ # a source .py file to be detected as revisions in the
39
+ # versions/ directory
40
+ # sourceless = false
41
+
42
+ # version location specification; This defaults
43
+ # to <script_location>/versions. When using multiple version
44
+ # directories, initial revisions must be specified with --version-path.
45
+ # The path separator used here should be the separator specified by "path_separator"
46
+ # below.
47
+ # version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
48
+
49
+ # path_separator; This indicates what character is used to split lists of file
50
+ # paths, including version_locations and prepend_sys_path within configparser
51
+ # files such as alembic.ini.
52
+ # The default rendered in new alembic.ini files is "os", which uses os.pathsep
53
+ # to provide os-dependent path splitting.
54
+ #
55
+ # Note that in order to support legacy alembic.ini files, this default does NOT
56
+ # take place if path_separator is not present in alembic.ini. If this
57
+ # option is omitted entirely, fallback logic is as follows:
58
+ #
59
+ # 1. Parsing of the version_locations option falls back to using the legacy
60
+ # "version_path_separator" key, which if absent then falls back to the legacy
61
+ # behavior of splitting on spaces and/or commas.
62
+ # 2. Parsing of the prepend_sys_path option falls back to the legacy
63
+ # behavior of splitting on spaces, commas, or colons.
64
+ #
65
+ # Valid values for path_separator are:
66
+ #
67
+ # path_separator = :
68
+ # path_separator = ;
69
+ # path_separator = space
70
+ # path_separator = newline
71
+ #
72
+ # Use os.pathsep. Default configuration used for new projects.
73
+ path_separator = os
74
+
75
+ # set to 'true' to search source files recursively
76
+ # in each "version_locations" directory
77
+ # new in Alembic version 1.10
78
+ # recursive_version_locations = false
79
+
80
+ # the output encoding used when revision files
81
+ # are written from script.py.mako
82
+ # output_encoding = utf-8
83
+
84
+ # database URL. This is consumed by the user-maintained env.py script only.
85
+ # other means of configuring database URLs may be customized within the env.py
86
+ # file.
87
+ sqlalchemy.url = ${DATABASE_URL}
88
+
89
+
90
+ [post_write_hooks]
91
+ # post_write_hooks defines scripts or Python functions that are run
92
+ # on newly generated revision scripts. See the documentation for further
93
+ # detail and examples
94
+
95
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
96
+ # hooks = black
97
+ # black.type = console_scripts
98
+ # black.entrypoint = black
99
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
100
+
101
+ # lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
102
+ # hooks = ruff
103
+ # ruff.type = module
104
+ # ruff.module = ruff
105
+ # ruff.options = check --fix REVISION_SCRIPT_FILENAME
106
+
107
+ # Alternatively, use the exec runner to execute a binary found on your PATH
108
+ # hooks = ruff
109
+ # ruff.type = exec
110
+ # ruff.executable = ruff
111
+ # ruff.options = check --fix REVISION_SCRIPT_FILENAME
112
+
113
+ # Logging configuration. This is also consumed by the user-maintained
114
+ # env.py script only.
115
+ [loggers]
116
+ keys = root,sqlalchemy,alembic
117
+
118
+ [handlers]
119
+ keys = console
120
+
121
+ [formatters]
122
+ keys = generic
123
+
124
+ [logger_root]
125
+ level = WARNING
126
+ handlers = console
127
+ qualname =
128
+
129
+ [logger_sqlalchemy]
130
+ level = WARNING
131
+ handlers =
132
+ qualname = sqlalchemy.engine
133
+
134
+ [logger_alembic]
135
+ level = INFO
136
+ handlers =
137
+ qualname = alembic
138
+
139
+ [handler_console]
140
+ class = StreamHandler
141
+ args = (sys.stderr,)
142
+ level = NOTSET
143
+ formatter = generic
144
+
145
+ [formatter_generic]
146
+ format = %(levelname)-5.5s [%(name)s] %(message)s
147
+ datefmt = %H:%M:%S
backend/alembic/README ADDED
@@ -0,0 +1 @@
 
 
1
+ Generic single-database configuration.
backend/alembic/__pycache__/env.cpython-311.pyc ADDED
Binary file (3.24 kB). View file
 
backend/alembic/env.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backend/alembic/env.py
2
+
3
+ from logging.config import fileConfig
4
+
5
+ from sqlalchemy import engine_from_config
6
+ from sqlalchemy.pool import NullPool
7
+
8
+ from alembic import context
9
+
10
+ # This is the crucial part: we import our app's config and models
11
+ import sys
12
+ import os
13
+ sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
14
+
15
+ from core.config import settings
16
+ from core.database import Base
17
+ from models.analysis_job import AnalysisJob
18
+
19
+ # this is the Alembic Config object, which provides
20
+ # access to the values within the .ini file in use.
21
+ config = context.config
22
+
23
+ # This line is IMPORTANT: it tells Alembic to use our app's settings for the DB URL
24
+ config.set_main_option("sqlalchemy.url", settings.DATABASE_URL)
25
+
26
+ # Interpret the config file for Python logging.
27
+ # This line reads the logging configuration from alembic.ini
28
+ if config.config_file_name is not None:
29
+ fileConfig(config.config_file_name)
30
+
31
+ # add your model's MetaData object here
32
+ # for 'autogenerate' support
33
+ target_metadata = Base.metadata
34
+
35
+ def run_migrations_offline() -> None:
36
+ """Run migrations in 'offline' mode."""
37
+ url = config.get_main_option("sqlalchemy.url")
38
+ context.configure(
39
+ url=url,
40
+ target_metadata=target_metadata,
41
+ literal_binds=True,
42
+ dialect_opts={"paramstyle": "named"},
43
+ )
44
+
45
+ with context.begin_transaction():
46
+ context.run_migrations()
47
+
48
+
49
+ def run_migrations_online() -> None:
50
+ """Run migrations in 'online' mode."""
51
+ connectable = engine_from_config(
52
+ config.get_section(config.config_main_section, {}),
53
+ prefix="sqlalchemy.",
54
+ poolclass=NullPool,
55
+ )
56
+
57
+ with connectable.connect() as connection:
58
+ context.configure(
59
+ connection=connection, target_metadata=target_metadata
60
+ )
61
+
62
+ with context.begin_transaction():
63
+ context.run_migrations()
64
+
65
+
66
+ if context.is_offline_mode():
67
+ run_migrations_offline()
68
+ else:
69
+ run_migrations_online()
backend/alembic/script.py.mako ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """${message}
2
+
3
+ Revision ID: ${up_revision}
4
+ Revises: ${down_revision | comma,n}
5
+ Create Date: ${create_date}
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ ${imports if imports else ""}
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = ${repr(up_revision)}
16
+ down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
17
+ branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18
+ depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Upgrade schema."""
23
+ ${upgrades if upgrades else "pass"}
24
+
25
+
26
+ def downgrade() -> None:
27
+ """Downgrade schema."""
28
+ ${downgrades if downgrades else "pass"}
backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Create analysis_jobs table
2
+
3
+ Revision ID: 17ec047335c5
4
+ Revises:
5
+ Create Date: 2025-09-01 16:55:38.619192
6
+
7
+ """
8
+ from typing import Sequence, Union
9
+
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = '17ec047335c5'
16
+ down_revision: Union[str, Sequence[str], None] = None
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def upgrade() -> None:
22
+ """Upgrade schema."""
23
+ # ### commands auto generated by Alembic - please adjust! ###
24
+ op.create_table('analysis_jobs',
25
+ sa.Column('id', sa.UUID(), nullable=False),
26
+ sa.Column('ticker', sa.String(), nullable=False),
27
+ sa.Column('status', sa.String(), nullable=False),
28
+ sa.Column('result', sa.JSON(), nullable=True),
29
+ sa.PrimaryKeyConstraint('id')
30
+ )
31
+ op.create_index(op.f('ix_analysis_jobs_ticker'), 'analysis_jobs', ['ticker'], unique=False)
32
+ # ### end Alembic commands ###
33
+
34
+
35
+ def downgrade() -> None:
36
+ """Downgrade schema."""
37
+ # ### commands auto generated by Alembic - please adjust! ###
38
+ op.drop_index(op.f('ix_analysis_jobs_ticker'), table_name='analysis_jobs')
39
+ op.drop_table('analysis_jobs')
40
+ # ### end Alembic commands ###
backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc ADDED
Binary file (2.22 kB). View file
 
backend/celery_worker.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery import Celery
2
+ from core.config import settings
3
+
4
+ # giving app a more descriptive name
5
+ celery = Celery(
6
+ "quantitative_analysis_platform",
7
+ broker=settings.CELERY_BROKER_URL,
8
+ backend=settings.CELERY_RESULT_BACKEND,
9
+ include=["tasks.data_tasks", "tasks.news_tasks"]
10
+ )
11
+
12
+ celery.conf.update(
13
+ task_serializer="json",
14
+ accept_content=["json"],
15
+ result_serializer="json",
16
+ timezone="UTC",
17
+ enable_utc=True,
18
+ )
19
+
backend/core/__pycache__/config.cpython-311.pyc ADDED
Binary file (729 Bytes). View file
 
backend/core/__pycache__/database.cpython-311.pyc ADDED
Binary file (563 Bytes). View file
 
backend/core/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+ class Settings(BaseSettings):
4
+ DATABASE_URL: str
5
+ CELERY_BROKER_URL: str
6
+ CELERY_RESULT_BACKEND: str
7
+
8
+ model_config = SettingsConfigDict(env_file=".env")
9
+
10
+ settings = Settings()
backend/core/database.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine
2
+ from sqlalchemy.orm import sessionmaker, declarative_base
3
+ from .config import settings
4
+
5
+ engine = create_engine(settings.DATABASE_URL)
6
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
7
+ Base = declarative_base()
backend/main.py CHANGED
@@ -1,20 +1,50 @@
1
- from fastapi import FastAPI
 
2
  from fastapi.middleware.cors import CORSMiddleware
 
 
 
 
 
 
 
 
3
 
4
-
5
- app = FastAPI(title="AI Hedge Fund API")
6
 
7
  app.add_middleware(
8
  CORSMiddleware,
9
- allow_origins=["*"],
10
- allow_credentials=True,
11
- allow_methods=["*"],
12
- allow_headers=["*"],
13
  )
14
 
 
 
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- @app.get("/")
18
- def read_root():
19
- """This function runs when someone visits the main URL."""
20
- return {"status": "ok", "message": "Welcome to the API!"}
 
 
1
+ # backend/main.py
2
+ from fastapi import FastAPI, Depends, HTTPException
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ from sqlalchemy.orm import Session
5
+ from uuid import UUID
6
+ import models.analysis_job as model
7
+ import schemas
8
+ from core.database import SessionLocal, engine
9
+ from tasks.data_tasks import run_data_analysis
10
+ from tasks.news_tasks import run_intelligence_analysis
11
+ from celery import chain
12
 
13
+ model.Base.metadata.create_all(bind=engine)
14
+ app = FastAPI(title="Quantitative Analysis Platform API", version="0.1.0")
15
 
16
  app.add_middleware(
17
  CORSMiddleware,
18
+ allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
 
 
 
19
  )
20
 
21
+ def get_db():
22
+ db = SessionLocal()
23
+ try:
24
+ yield db
25
+ finally:
26
+ db.close()
27
 
28
+ @app.post("/jobs", response_model=schemas.Job, status_code=201)
29
+ def create_analysis_job(job_request: schemas.JobCreate, db: Session = Depends(get_db)):
30
+ db_job = model.AnalysisJob(ticker=job_request.ticker.upper())
31
+ db.add(db_job)
32
+ db.commit()
33
+ db.refresh(db_job)
34
+
35
+ # THE CRITICAL CHANGE IS HERE
36
+ analysis_chain = chain(
37
+ run_data_analysis.s(str(db_job.id), db_job.ticker),
38
+ # By making the signature immutable, we tell Celery to ignore
39
+ # the result of the previous task and only use the arguments we provide.
40
+ run_intelligence_analysis.s(str(db_job.id)).set(immutable=True)
41
+ )
42
+ analysis_chain.apply_async()
43
+
44
+ return db_job
45
 
46
+ @app.get("/jobs/{job_id}", response_model=schemas.Job)
47
+ def get_job_status(job_id: UUID, db: Session = Depends(get_db)):
48
+ db_job = db.query(model.AnalysisJob).filter(model.AnalysisJob.id == job_id).first()
49
+ if db_job is None: raise HTTPException(status_code=404, detail="Job not found")
50
+ return db_job
backend/models/__pycache__/analysis_job.cpython-311.pyc ADDED
Binary file (1.05 kB). View file
 
backend/models/analysis_job.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import Column, String, JSON
2
+ from sqlalchemy.dialects.postgresql import UUID
3
+ import uuid
4
+ from core.database import Base
5
+
6
+ class AnalysisJob(Base):
7
+ __tablename__ = "analysis_jobs"
8
+
9
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
10
+ ticker = Column(String, nullable=False, index=True)
11
+ status = Column(String, default="PENDING", nullable=False)
12
+ result = Column(JSON, nullable=True)
backend/requirements.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ pydantic-settings
4
+
5
+
6
+ sqlalchemy
7
+ psycopg2-binary
8
+ alembic
9
+
10
+ # task queue
11
+ celery
12
+ redis[redis-stack]
13
+
14
+ # data agent
15
+ yfinance
16
+
17
+ # news & sentiment agent
18
+ newspaper3k
19
+ lxml_html_clean
20
+ torch
21
+ transformers
22
+ sentence-transformers
23
+
24
+ # special install for Twitter scraping
25
+ snscrape@git+https://github.com/JustAnotherArchivist/snscrape.git@master
backend/schemas.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, ConfigDict
2
+ from uuid import UUID
3
+ from typing import Optional, Dict, Any
4
+
5
+ class JobCreate(BaseModel):
6
+ ticker: str
7
+
8
+ class Job(BaseModel):
9
+ id: UUID
10
+ ticker: str
11
+ status: str
12
+ result: Optional[Dict[str, Any]] = None
13
+
14
+ model_config = ConfigDict(from_attributes=True)
backend/tasks/__pycache__/news_tasks.cpython-311.pyc ADDED
Binary file (2.82 kB). View file
 
backend/tasks/news_tasks.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from celery_worker import celery
2
+ from core.database import SessionLocal
3
+ from models.analysis_job import AnalysisJob
4
+ from tools.news_tools import get_news_and_sentiment, get_twitter_sentiment
5
+ from uuid import UUID
6
+
7
+ @celery.task
8
+ def run_intelligence_analysis(job_id: str):
9
+ db = SessionLocal()
10
+ job = None
11
+ try:
12
+ job = db.query(AnalysisJob).filter(AnalysisJob.id == UUID(job_id)).first()
13
+ if not job or not job.result: raise ValueError(f"Job {job_id} not found or has no data.")
14
+
15
+ current_data = job.result
16
+ company_name = current_data.get("company_name")
17
+ if not company_name: raise ValueError("Company name not found in data.")
18
+
19
+ print(f"Starting intelligence analysis for {company_name}...")
20
+
21
+ news = get_news_and_sentiment(current_data.get("ticker"), company_name)
22
+ twitter = get_twitter_sentiment(f"{company_name} stock")
23
+
24
+ current_data['intelligence_briefing'] = {"news": news, "twitter": twitter}
25
+ job.result = current_data
26
+ job.status = "SUCCESS"
27
+ db.commit()
28
+
29
+ print(f"Intelligence analysis for job {job_id} completed.")
30
+ final_result = str(job.result)
31
+
32
+ except Exception as e:
33
+ print(f"Error during intelligence analysis for job {job_id}: {e}")
34
+ if job:
35
+ job.status = "FAILED"
36
+ error_data = job.result if job.result else {}
37
+ error_data['error'] = f"Intelligence analysis failed: {str(e)}"
38
+ job.result = error_data
39
+ db.commit()
40
+ final_result = f"Error: {e}"
41
+ finally:
42
+ db.close()
43
+ return final_result
backend/tools/__pycache__/data_tools.cpython-311.pyc ADDED
Binary file (2.65 kB). View file
 
backend/tools/__pycache__/news_tools.cpython-311.pyc ADDED
Binary file (5.81 kB). View file
 
backend/tools/data_tools.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yfinance as yf
2
+ from typing import Dict, Any
3
+
4
+ def get_stock_data(ticker: str) -> Dict[str, Any]:
5
+
6
+ # for NSE stocks, yfinance expects the '.NS' suffix. For BSE, it's '.BO'.
7
+ # assume NSE by default if no suffix is provided.
8
+ if not ticker.endswith(('.NS', '.BO')):
9
+ print(f"Ticker '{ticker}' has no exchange suffix. Assuming NSE and appending '.NS'.")
10
+ ticker = f"{ticker}.NS"
11
+
12
+ stock = yf.Ticker(ticker)
13
+
14
+ # yfinance can sometimes fail for certain tickers or data points.
15
+ try:
16
+ info = stock.info
17
+ except Exception as e:
18
+ print(f"Could not fetch info for {ticker}: {e}")
19
+ return {"error": f"Invalid ticker or no data available for {ticker}"}
20
+
21
+ # check if we got a valid response
22
+ if not info or info.get('regularMarketPrice') is None:
23
+ return {"error": f"Invalid ticker or no data available for {ticker}"}
24
+
25
+ # select key data points relevant to analysis
26
+ data = {
27
+ "ticker": ticker,
28
+ "company_name": info.get('longName'),
29
+ "current_price": info.get('currentPrice') or info.get('regularMarketPrice'),
30
+ "previous_close": info.get('previousClose'),
31
+ "market_cap": info.get('marketCap'),
32
+ "pe_ratio": info.get('trailingPE') or info.get('forwardPE'),
33
+ "pb_ratio": info.get('priceToBook'),
34
+ "dividend_yield": info.get('dividendYield'),
35
+ "sector": info.get('sector'),
36
+ "industry": info.get('industry'),
37
+ "summary": info.get('longBusinessSummary'),
38
+ "website": info.get('website'),
39
+ "logo_url": info.get('logo_url')
40
+ }
41
+
42
+ # clean up data by removing any keys with none values
43
+ return {k: v for k, v in data.items() if v is not None}
backend/tools/download_model.py ADDED
File without changes
backend/tools/news_tools.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import snscrape.modules.twitter as sntwitter
2
+ import newspaper
3
+ # We will now import AutoModelForSequenceClassification and AutoTokenizer
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import torch # We need torch to process the model's output
6
+ from typing import List, Dict, Any
7
+
8
+ # We will load the model and tokenizer inside the function
9
+ sentiment_model = None
10
+ tokenizer = None
11
+ MODEL_PATH = '/code/sentiment_model'
12
+
13
+ def load_sentiment_model():
14
+ """A function to load the model and tokenizer on demand using the transformers library."""
15
+ global sentiment_model, tokenizer
16
+ if sentiment_model is None or tokenizer is None:
17
+ print("Loading sentiment model and tokenizer for the first time...")
18
+ # Load the tokenizer from the local path
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
20
+ # Load the model from the local path
21
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
22
+ print("Sentiment model and tokenizer loaded.")
23
+
24
+ def analyze_sentiment_with_model(text: str) -> str:
25
+ """Uses the loaded model to predict sentiment."""
26
+ # This is the standard way to use a transformers model
27
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
28
+ with torch.no_grad():
29
+ logits = sentiment_model(**inputs).logits
30
+
31
+ scores = logits.softmax(dim=1)[0].tolist()
32
+ sentiment_map = {0: 'Positive', 1: 'Neutral', 2: 'Negative'} # This order might be different
33
+
34
+ # Let's verify the model's expected labels
35
+ model_labels = sentiment_model.config.id2label
36
+ if model_labels:
37
+ # e.g., {0: 'positive', 1: 'neutral', 2: 'negative'}
38
+ sentiment_map = {int(k): v.capitalize() for k, v in model_labels.items()}
39
+
40
+ best_index = scores.index(max(scores))
41
+ return sentiment_map.get(best_index, "Unknown")
42
+
43
+
44
+ def get_news_and_sentiment(ticker: str, company_name: str) -> List[Dict[str, Any]]:
45
+ load_sentiment_model()
46
+
47
+ print(f"Fetching news for {company_name}...")
48
+ search_url = f"https://news.google.com/rss/search?q={company_name.replace(' ', '+')}+stock&hl=en-IN&gl=IN&ceid=IN:en"
49
+ news_source = newspaper.build(search_url, memoize_articles=False, language='en')
50
+ articles_data = []
51
+ for article in news_source.articles[:5]:
52
+ try:
53
+ article.download(); article.parse(); article.nlp()
54
+ if not article.text or len(article.text) < 150: continue
55
+
56
+ sentiment = analyze_sentiment_with_model(article.summary)
57
+
58
+ articles_data.append({
59
+ "title": article.title,
60
+ "summary": article.summary,
61
+ "url": article.url,
62
+ "sentiment": sentiment
63
+ })
64
+ except Exception as e:
65
+ print(f"Could not process article {article.url}: {e}")
66
+ return articles_data
67
+
68
+ def get_twitter_sentiment(search_query: str) -> Dict[str, Any]:
69
+ load_sentiment_model()
70
+
71
+ print(f"Fetching Twitter sentiment for '{search_query}'...")
72
+ tweets = [tweet.rawContent for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f"{search_query} lang:en").get_items()) if i < 50]
73
+ if not tweets: return {"error": "No recent tweets found."}
74
+
75
+ counts = {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'Unknown': 0}
76
+ for text in tweets:
77
+ sentiment = analyze_sentiment_with_model(text)
78
+ counts[sentiment] += 1
79
+
80
+ return {
81
+ "search_query": search_query,
82
+ "total_tweets": len(tweets),
83
+ "positive": counts['Positive'],
84
+ "negative": counts['Negative'],
85
+ "neutral": counts['Neutral']
86
+ }
docker-compose.yml CHANGED
@@ -1,12 +1,35 @@
1
  services:
 
 
 
 
 
2
  backend:
3
  build:
4
- context: .
5
  dockerfile: ./backend/Dockerfile
6
  ports:
7
  - "8000:8000"
8
  volumes:
9
- - ./backend:/app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  frontend:
12
  build:
@@ -16,4 +39,6 @@ services:
16
  - "5173:5173"
17
  volumes:
18
  - ./frontend:/app
19
- - /app/node_modules
 
 
 
1
  services:
2
+ redis:
3
+ image: redis:7-alpine
4
+ ports:
5
+ - "6379:6379"
6
+
7
  backend:
8
  build:
9
+ context: . # <-- Context is ROOT
10
  dockerfile: ./backend/Dockerfile
11
  ports:
12
  - "8000:8000"
13
  volumes:
14
+ - ./backend:/code/app
15
+ env_file:
16
+ - .env
17
+ command: python -m uvicorn main:app --host 0.0.0.0 --port 8000
18
+ depends_on:
19
+ - redis
20
+
21
+ worker:
22
+ build:
23
+ context: . # <-- Context is ROOT
24
+ dockerfile: ./backend/Dockerfile
25
+ volumes:
26
+ - ./backend:/code/app
27
+ env_file:
28
+ - .env
29
+ command: python -m celery -A celery_worker.celery worker --loglevel=info
30
+ depends_on:
31
+ - redis
32
+ - backend
33
 
34
  frontend:
35
  build:
 
39
  - "5173:5173"
40
  volumes:
41
  - ./frontend:/app
42
+ - /app/node_modules
43
+ depends_on:
44
+ - backend
ml_models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
ml_models/README.md ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/368ed67c15df571f78fa692ab0e20262d0cbe8cc
ml_models/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/bedcf6808384112f9da4ecc439f2addbdac3785b
ml_models/merges.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc
ml_models/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/a7816a407990c2e8d254b10d36beeb972016086e
ml_models/tokenizer.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/d8059234da10f3ae6513891308e25f256dc1db6d
ml_models/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/45af26cf13c9666e90a0891616e8cf48b0404aed
ml_models/vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ../../blobs/4ebe4bb3f3114daf2e4cc349f24873a1175a35d7
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -9,7 +9,14 @@ readme = "README.md"
9
  requires-python = ">=3.10"
10
  dependencies = [
11
  "fastapi (>=0.116.1,<0.117.0)",
12
- "uvicorn[standard] (>=0.35.0,<0.36.0)"
 
 
 
 
 
 
 
13
  ]
14
 
15
 
 
9
  requires-python = ">=3.10"
10
  dependencies = [
11
  "fastapi (>=0.116.1,<0.117.0)",
12
+ "uvicorn[standard] (>=0.35.0,<0.36.0)",
13
+ "sqlalchemy (>=2.0.43,<3.0.0)",
14
+ "psycopg2-binary (>=2.9.10,<3.0.0)",
15
+ "celery (>=5.5.3,<6.0.0)",
16
+ "redis[redis-stack] (>=6.4.0,<7.0.0)",
17
+ "alembic (>=1.16.5,<2.0.0)",
18
+ "pydantic-settings (>=2.10.1,<3.0.0)",
19
+ "yfinance (>=0.2.65,<0.3.0)"
20
  ]
21
 
22
 
tmp_down.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import CrossEncoder
2
+
3
+ print("Starting model download to local cache...")
4
+ # This will download the model to a central Hugging Face cache on your Mac
5
+ # It might be in ~/.cache/huggingface/hub/ or similar
6
+ model = CrossEncoder('cross-encoder/nli-roberta-base')
7
+ print("Model download complete!")