Spaces:

boyinfuture
/

quantitative-analysis-platform

Sleeping

App Files Files Community

boyinfuture commited on Sep 1

Commit

c6fb015

1 Parent(s): 4d4a1c6

addind the news classifier and parser

Browse files

Files changed (42) hide show

.DS_Store +0 -0
.gitignore +1 -0
backend/Dockerfile +6 -7
backend/__pycache__/celery_worker.cpython-311.pyc +0 -0
backend/__pycache__/main.cpython-311.pyc +0 -0
backend/__pycache__/schemas.cpython-311.pyc +0 -0
backend/alembic.ini +147 -0
backend/alembic/README +1 -0
backend/alembic/__pycache__/env.cpython-311.pyc +0 -0
backend/alembic/env.py +69 -0
backend/alembic/script.py.mako +28 -0
backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py +40 -0
backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc +0 -0
backend/celery_worker.py +19 -0
backend/core/__pycache__/config.cpython-311.pyc +0 -0
backend/core/__pycache__/database.cpython-311.pyc +0 -0
backend/core/config.py +10 -0
backend/core/database.py +7 -0
backend/main.py +41 -11
backend/models/__pycache__/analysis_job.cpython-311.pyc +0 -0
backend/models/analysis_job.py +12 -0
backend/requirements.txt +25 -0
backend/schemas.py +14 -0
backend/tasks/__pycache__/news_tasks.cpython-311.pyc +0 -0
backend/tasks/news_tasks.py +43 -0
backend/tools/__pycache__/data_tools.cpython-311.pyc +0 -0
backend/tools/__pycache__/news_tools.cpython-311.pyc +0 -0
backend/tools/data_tools.py +43 -0
backend/tools/download_model.py +0 -0
backend/tools/news_tools.py +86 -0
docker-compose.yml +28 -3
ml_models/.DS_Store +0 -0
ml_models/README.md +1 -0
ml_models/config.json +1 -0
ml_models/merges.txt +1 -0
ml_models/special_tokens_map.json +1 -0
ml_models/tokenizer.json +1 -0
ml_models/tokenizer_config.json +1 -0
ml_models/vocab.json +1 -0
poetry.lock +0 -0
pyproject.toml +8 -1
tmp_down.py +7 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .env

backend/Dockerfile CHANGED Viewed

@@ -1,13 +1,12 @@
 FROM python:3.11-slim
-WORKDIR /app
-RUN pip install poetry
-COPY pyproject.toml poetry.lock ./
-RUN poetry install --no-root
-COPY ./backend .
-CMD ["poetry", "run", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

 FROM python:3.11-slim
+WORKDIR /code
+RUN apt-get update && apt-get install -y git
+COPY ./backend/requirements.txt .
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY ./ml_models /code/sentiment_model
+WORKDIR /code/app

backend/__pycache__/celery_worker.cpython-311.pyc ADDED Viewed

Binary file (706 Bytes). View file

backend/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (3.5 kB). View file

backend/__pycache__/schemas.cpython-311.pyc ADDED Viewed

Binary file (1.11 kB). View file

backend/alembic.ini ADDED Viewed

	@@ -0,0 +1,147 @@

+# A generic, single database configuration.
+[alembic]
+# path to migration scripts.
+# this is typically a path given in POSIX (e.g. forward slashes)
+# format, relative to the token %(here)s which refers to the location of this
+# ini file
+script_location = %(here)s/alembic
+# template used to generate migration file names; The default value is %%(rev)s_%%(slug)s
+# Uncomment the line below if you want the files to be prepended with date and time
+# see https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file
+# for all available tokens
+# file_template = %%(year)d_%%(month).2d_%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+# sys.path path, will be prepended to sys.path if present.
+# defaults to the current working directory.  for multiple paths, the path separator
+# is defined by "path_separator" below.
+prepend_sys_path = .
+# timezone to use when rendering the date within the migration file
+# as well as the filename.
+# If specified, requires the python>=3.9 or backports.zoneinfo library and tzdata library.
+# Any required deps can installed by adding `alembic[tz]` to the pip requirements
+# string value is passed to ZoneInfo()
+# leave blank for localtime
+# timezone =
+# max length of characters to apply to the "slug" field
+# truncate_slug_length = 40
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+# version location specification; This defaults
+# to <script_location>/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path.
+# The path separator used here should be the separator specified by "path_separator"
+# below.
+# version_locations = %(here)s/bar:%(here)s/bat:%(here)s/alembic/versions
+# path_separator; This indicates what character is used to split lists of file
+# paths, including version_locations and prepend_sys_path within configparser
+# files such as alembic.ini.
+# The default rendered in new alembic.ini files is "os", which uses os.pathsep
+# to provide os-dependent path splitting.
+#
+# Note that in order to support legacy alembic.ini files, this default does NOT
+# take place if path_separator is not present in alembic.ini.  If this
+# option is omitted entirely, fallback logic is as follows:
+#
+# 1. Parsing of the version_locations option falls back to using the legacy
+#    "version_path_separator" key, which if absent then falls back to the legacy
+#    behavior of splitting on spaces and/or commas.
+# 2. Parsing of the prepend_sys_path option falls back to the legacy
+#    behavior of splitting on spaces, commas, or colons.
+#
+# Valid values for path_separator are:
+#
+# path_separator = :
+# path_separator = ;
+# path_separator = space
+# path_separator = newline
+#
+# Use os.pathsep. Default configuration used for new projects.
+path_separator = os
+# set to 'true' to search source files recursively
+# in each "version_locations" directory
+# new in Alembic version 1.10
+# recursive_version_locations = false
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+# database URL.  This is consumed by the user-maintained env.py script only.
+# other means of configuring database URLs may be customized within the env.py
+# file.
+sqlalchemy.url = ${DATABASE_URL}
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+# lint with attempts to fix using "ruff" - use the module runner, against the "ruff" module
+# hooks = ruff
+# ruff.type = module
+# ruff.module = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+# Alternatively, use the exec runner to execute a binary found on your PATH
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = check --fix REVISION_SCRIPT_FILENAME
+# Logging configuration.  This is also consumed by the user-maintained
+# env.py script only.
+[loggers]
+keys = root,sqlalchemy,alembic
+[handlers]
+keys = console
+[formatters]
+keys = generic
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S

backend/alembic/README ADDED Viewed

	@@ -0,0 +1 @@


1	+ Generic single-database configuration.

backend/alembic/__pycache__/env.cpython-311.pyc ADDED Viewed

Binary file (3.24 kB). View file

backend/alembic/env.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# backend/alembic/env.py
+from logging.config import fileConfig
+from sqlalchemy import engine_from_config
+from sqlalchemy.pool import NullPool
+from alembic import context
+# This is the crucial part: we import our app's config and models
+import sys
+import os
+sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '..')))
+from core.config import settings
+from core.database import Base
+from models.analysis_job import AnalysisJob
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+# This line is IMPORTANT: it tells Alembic to use our app's settings for the DB URL
+config.set_main_option("sqlalchemy.url", settings.DATABASE_URL)
+# Interpret the config file for Python logging.
+# This line reads the logging configuration from alembic.ini
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+# add your model's MetaData object here
+# for 'autogenerate' support
+target_metadata = Base.metadata
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode."""
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+    connectable = engine_from_config(
+        config.get_section(config.config_main_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

backend/alembic/script.py.mako ADDED Viewed

	@@ -0,0 +1,28 @@

+"""${message}
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+# revision identifiers, used by Alembic.
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, Sequence[str], None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+def upgrade() -> None:
+    """Upgrade schema."""
+    ${upgrades if upgrades else "pass"}
+def downgrade() -> None:
+    """Downgrade schema."""
+    ${downgrades if downgrades else "pass"}

backend/alembic/versions/17ec047335c5_create_analysis_jobs_table.py ADDED Viewed

	@@ -0,0 +1,40 @@

+"""Create analysis_jobs table
+Revision ID: 17ec047335c5
+Revises:
+Create Date: 2025-09-01 16:55:38.619192
+"""
+from typing import Sequence, Union
+from alembic import op
+import sqlalchemy as sa
+# revision identifiers, used by Alembic.
+revision: str = '17ec047335c5'
+down_revision: Union[str, Sequence[str], None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('analysis_jobs',
+    sa.Column('id', sa.UUID(), nullable=False),
+    sa.Column('ticker', sa.String(), nullable=False),
+    sa.Column('status', sa.String(), nullable=False),
+    sa.Column('result', sa.JSON(), nullable=True),
+    sa.PrimaryKeyConstraint('id')
+    )
+    op.create_index(op.f('ix_analysis_jobs_ticker'), 'analysis_jobs', ['ticker'], unique=False)
+    # ### end Alembic commands ###
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_index(op.f('ix_analysis_jobs_ticker'), table_name='analysis_jobs')
+    op.drop_table('analysis_jobs')
+    # ### end Alembic commands ###

backend/alembic/versions/__pycache__/17ec047335c5_create_analysis_jobs_table.cpython-311.pyc ADDED Viewed

Binary file (2.22 kB). View file

backend/celery_worker.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from celery import Celery
+from core.config import settings
+# giving app a more descriptive name
+celery = Celery(
+    "quantitative_analysis_platform",
+    broker=settings.CELERY_BROKER_URL,
+    backend=settings.CELERY_RESULT_BACKEND,
+    include=["tasks.data_tasks", "tasks.news_tasks"]
+)
+celery.conf.update(
+    task_serializer="json",
+    accept_content=["json"],
+    result_serializer="json",
+    timezone="UTC",
+    enable_utc=True,
+)

backend/core/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (729 Bytes). View file

backend/core/__pycache__/database.cpython-311.pyc ADDED Viewed

Binary file (563 Bytes). View file

backend/core/config.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    DATABASE_URL: str
+    CELERY_BROKER_URL: str
+    CELERY_RESULT_BACKEND: str
+    model_config = SettingsConfigDict(env_file=".env")
+settings = Settings()

backend/core/database.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, declarative_base
+from .config import settings
+engine = create_engine(settings.DATABASE_URL)
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()

backend/main.py CHANGED Viewed

@@ -1,20 +1,50 @@
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-app = FastAPI(title="AI Hedge Fund API")
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
-@app.get("/")
-def read_root():
-    """This function runs when someone visits the main URL."""
-    return {"status": "ok", "message": "Welcome to the API!"}

+# backend/main.py
+from fastapi import FastAPI, Depends, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from sqlalchemy.orm import Session
+from uuid import UUID
+import models.analysis_job as model
+import schemas
+from core.database import SessionLocal, engine
+from tasks.data_tasks import run_data_analysis
+from tasks.news_tasks import run_intelligence_analysis
+from celery import chain
+model.Base.metadata.create_all(bind=engine)
+app = FastAPI(title="Quantitative Analysis Platform API", version="0.1.0")
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"]
 )
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+@app.post("/jobs", response_model=schemas.Job, status_code=201)
+def create_analysis_job(job_request: schemas.JobCreate, db: Session = Depends(get_db)):
+    db_job = model.AnalysisJob(ticker=job_request.ticker.upper())
+    db.add(db_job)
+    db.commit()
+    db.refresh(db_job)
+    # THE CRITICAL CHANGE IS HERE
+    analysis_chain = chain(
+        run_data_analysis.s(str(db_job.id), db_job.ticker),
+        # By making the signature immutable, we tell Celery to ignore
+        # the result of the previous task and only use the arguments we provide.
+        run_intelligence_analysis.s(str(db_job.id)).set(immutable=True)
+    )
+    analysis_chain.apply_async()
+    return db_job
+@app.get("/jobs/{job_id}", response_model=schemas.Job)
+def get_job_status(job_id: UUID, db: Session = Depends(get_db)):
+    db_job = db.query(model.AnalysisJob).filter(model.AnalysisJob.id == job_id).first()
+    if db_job is None: raise HTTPException(status_code=404, detail="Job not found")
+    return db_job

backend/models/__pycache__/analysis_job.cpython-311.pyc ADDED Viewed

Binary file (1.05 kB). View file

backend/models/analysis_job.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from sqlalchemy import Column, String, JSON
+from sqlalchemy.dialects.postgresql import UUID
+import uuid
+from core.database import Base
+class AnalysisJob(Base):
+    __tablename__ = "analysis_jobs"
+    id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
+    ticker = Column(String, nullable=False, index=True)
+    status = Column(String, default="PENDING", nullable=False)
+    result = Column(JSON, nullable=True)

backend/requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+fastapi
+uvicorn[standard]
+pydantic-settings
+sqlalchemy
+psycopg2-binary
+alembic
+# task queue
+celery
+redis[redis-stack]
+# data agent
+yfinance
+# news & sentiment agent
+newspaper3k
+lxml_html_clean
+torch
+transformers
+sentence-transformers
+# special install for Twitter scraping
+snscrape@git+https://github.com/JustAnotherArchivist/snscrape.git@master

backend/schemas.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from pydantic import BaseModel, ConfigDict
+from uuid import UUID
+from typing import Optional, Dict, Any
+class JobCreate(BaseModel):
+    ticker: str
+class Job(BaseModel):
+    id: UUID
+    ticker: str
+    status: str
+    result: Optional[Dict[str, Any]] = None
+    model_config = ConfigDict(from_attributes=True)

backend/tasks/__pycache__/news_tasks.cpython-311.pyc ADDED Viewed

Binary file (2.82 kB). View file

backend/tasks/news_tasks.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from celery_worker import celery
+from core.database import SessionLocal
+from models.analysis_job import AnalysisJob
+from tools.news_tools import get_news_and_sentiment, get_twitter_sentiment
+from uuid import UUID
+@celery.task
+def run_intelligence_analysis(job_id: str):
+    db = SessionLocal()
+    job = None
+    try:
+        job = db.query(AnalysisJob).filter(AnalysisJob.id == UUID(job_id)).first()
+        if not job or not job.result: raise ValueError(f"Job {job_id} not found or has no data.")
+        current_data = job.result
+        company_name = current_data.get("company_name")
+        if not company_name: raise ValueError("Company name not found in data.")
+        print(f"Starting intelligence analysis for {company_name}...")
+        news = get_news_and_sentiment(current_data.get("ticker"), company_name)
+        twitter = get_twitter_sentiment(f"{company_name} stock")
+        current_data['intelligence_briefing'] = {"news": news, "twitter": twitter}
+        job.result = current_data
+        job.status = "SUCCESS"
+        db.commit()
+        print(f"Intelligence analysis for job {job_id} completed.")
+        final_result = str(job.result)
+    except Exception as e:
+        print(f"Error during intelligence analysis for job {job_id}: {e}")
+        if job:
+            job.status = "FAILED"
+            error_data = job.result if job.result else {}
+            error_data['error'] = f"Intelligence analysis failed: {str(e)}"
+            job.result = error_data
+            db.commit()
+        final_result = f"Error: {e}"
+    finally:
+        db.close()
+    return final_result

backend/tools/__pycache__/data_tools.cpython-311.pyc ADDED Viewed

Binary file (2.65 kB). View file

backend/tools/__pycache__/news_tools.cpython-311.pyc ADDED Viewed

Binary file (5.81 kB). View file

backend/tools/data_tools.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import yfinance as yf
+from typing import Dict, Any
+def get_stock_data(ticker: str) -> Dict[str, Any]:
+    # for NSE stocks, yfinance expects the '.NS' suffix. For BSE, it's '.BO'.
+    # assume NSE by default if no suffix is provided.
+    if not ticker.endswith(('.NS', '.BO')):
+        print(f"Ticker '{ticker}' has no exchange suffix. Assuming NSE and appending '.NS'.")
+        ticker = f"{ticker}.NS"
+    stock = yf.Ticker(ticker)
+    # yfinance can sometimes fail for certain tickers or data points.
+    try:
+        info = stock.info
+    except Exception as e:
+        print(f"Could not fetch info for {ticker}: {e}")
+        return {"error": f"Invalid ticker or no data available for {ticker}"}
+    # check if we got a valid response
+    if not info or info.get('regularMarketPrice') is None:
+         return {"error": f"Invalid ticker or no data available for {ticker}"}
+    # select key data points relevant to analysis
+    data = {
+        "ticker": ticker,
+        "company_name": info.get('longName'),
+        "current_price": info.get('currentPrice') or info.get('regularMarketPrice'),
+        "previous_close": info.get('previousClose'),
+        "market_cap": info.get('marketCap'),
+        "pe_ratio": info.get('trailingPE') or info.get('forwardPE'),
+        "pb_ratio": info.get('priceToBook'),
+        "dividend_yield": info.get('dividendYield'),
+        "sector": info.get('sector'),
+        "industry": info.get('industry'),
+        "summary": info.get('longBusinessSummary'),
+        "website": info.get('website'),
+        "logo_url": info.get('logo_url')
+    }
+    # clean up data by removing any keys with none values
+    return {k: v for k, v in data.items() if v is not None}

backend/tools/download_model.py ADDED Viewed

File without changes

backend/tools/news_tools.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import snscrape.modules.twitter as sntwitter
+import newspaper
+# We will now import AutoModelForSequenceClassification and AutoTokenizer
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch # We need torch to process the model's output
+from typing import List, Dict, Any
+# We will load the model and tokenizer inside the function
+sentiment_model = None
+tokenizer = None
+MODEL_PATH = '/code/sentiment_model'
+def load_sentiment_model():
+    """A function to load the model and tokenizer on demand using the transformers library."""
+    global sentiment_model, tokenizer
+    if sentiment_model is None or tokenizer is None:
+        print("Loading sentiment model and tokenizer for the first time...")
+        # Load the tokenizer from the local path
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+        # Load the model from the local path
+        sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)
+        print("Sentiment model and tokenizer loaded.")
+def analyze_sentiment_with_model(text: str) -> str:
+    """Uses the loaded model to predict sentiment."""
+    # This is the standard way to use a transformers model
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        logits = sentiment_model(**inputs).logits
+    scores = logits.softmax(dim=1)[0].tolist()
+    sentiment_map = {0: 'Positive', 1: 'Neutral', 2: 'Negative'} # This order might be different
+    # Let's verify the model's expected labels
+    model_labels = sentiment_model.config.id2label
+    if model_labels:
+        # e.g., {0: 'positive', 1: 'neutral', 2: 'negative'}
+        sentiment_map = {int(k): v.capitalize() for k, v in model_labels.items()}
+    best_index = scores.index(max(scores))
+    return sentiment_map.get(best_index, "Unknown")
+def get_news_and_sentiment(ticker: str, company_name: str) -> List[Dict[str, Any]]:
+    load_sentiment_model()
+    print(f"Fetching news for {company_name}...")
+    search_url = f"https://news.google.com/rss/search?q={company_name.replace(' ', '+')}+stock&hl=en-IN&gl=IN&ceid=IN:en"
+    news_source = newspaper.build(search_url, memoize_articles=False, language='en')
+    articles_data = []
+    for article in news_source.articles[:5]:
+        try:
+            article.download(); article.parse(); article.nlp()
+            if not article.text or len(article.text) < 150: continue
+            sentiment = analyze_sentiment_with_model(article.summary)
+            articles_data.append({
+                "title": article.title,
+                "summary": article.summary,
+                "url": article.url,
+                "sentiment": sentiment
+            })
+        except Exception as e:
+            print(f"Could not process article {article.url}: {e}")
+    return articles_data
+def get_twitter_sentiment(search_query: str) -> Dict[str, Any]:
+    load_sentiment_model()
+    print(f"Fetching Twitter sentiment for '{search_query}'...")
+    tweets = [tweet.rawContent for i, tweet in enumerate(sntwitter.TwitterSearchScraper(f"{search_query} lang:en").get_items()) if i < 50]
+    if not tweets: return {"error": "No recent tweets found."}
+    counts = {'Positive': 0, 'Negative': 0, 'Neutral': 0, 'Unknown': 0}
+    for text in tweets:
+        sentiment = analyze_sentiment_with_model(text)
+        counts[sentiment] += 1
+    return {
+        "search_query": search_query,
+        "total_tweets": len(tweets),
+        "positive": counts['Positive'],
+        "negative": counts['Negative'],
+        "neutral": counts['Neutral']
+    }

docker-compose.yml CHANGED Viewed

@@ -1,12 +1,35 @@
 services:
   backend:
     build:
-      context: .
       dockerfile: ./backend/Dockerfile
     ports:
       - "8000:8000"
     volumes:
-      - ./backend:/app
   frontend:
     build:
@@ -16,4 +39,6 @@ services:
       - "5173:5173"
     volumes:
       - ./frontend:/app
-      - /app/node_modules

 services:
+  redis:
+    image: redis:7-alpine
+    ports:
+      - "6379:6379"
   backend:
     build:
+      context: . # <-- Context is ROOT
       dockerfile: ./backend/Dockerfile
     ports:
       - "8000:8000"
     volumes:
+      - ./backend:/code/app
+    env_file:
+      - .env
+    command: python -m uvicorn main:app --host 0.0.0.0 --port 8000
+    depends_on:
+      - redis
+  worker:
+    build:
+      context: . # <-- Context is ROOT
+      dockerfile: ./backend/Dockerfile
+    volumes:
+      - ./backend:/code/app
+    env_file:
+      - .env
+    command: python -m celery -A celery_worker.celery worker --loglevel=info
+    depends_on:
+      - redis
+      - backend
   frontend:
     build:
       - "5173:5173"
     volumes:
       - ./frontend:/app
+      - /app/node_modules
+    depends_on:
+      - backend

ml_models/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

ml_models/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/368ed67c15df571f78fa692ab0e20262d0cbe8cc

ml_models/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/bedcf6808384112f9da4ecc439f2addbdac3785b

ml_models/merges.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc

ml_models/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/a7816a407990c2e8d254b10d36beeb972016086e

ml_models/tokenizer.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/d8059234da10f3ae6513891308e25f256dc1db6d

ml_models/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/45af26cf13c9666e90a0891616e8cf48b0404aed

ml_models/vocab.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../../blobs/4ebe4bb3f3114daf2e4cc349f24873a1175a35d7

poetry.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml CHANGED Viewed

@@ -9,7 +9,14 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "fastapi (>=0.116.1,<0.117.0)",
-    "uvicorn[standard] (>=0.35.0,<0.36.0)"
 ]

 requires-python = ">=3.10"
 dependencies = [
     "fastapi (>=0.116.1,<0.117.0)",
+    "uvicorn[standard] (>=0.35.0,<0.36.0)",
+    "sqlalchemy (>=2.0.43,<3.0.0)",
+    "psycopg2-binary (>=2.9.10,<3.0.0)",
+    "celery (>=5.5.3,<6.0.0)",
+    "redis[redis-stack] (>=6.4.0,<7.0.0)",
+    "alembic (>=1.16.5,<2.0.0)",
+    "pydantic-settings (>=2.10.1,<3.0.0)",
+    "yfinance (>=0.2.65,<0.3.0)"
 ]

tmp_down.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from sentence_transformers import CrossEncoder
+print("Starting model download to local cache...")
+# This will download the model to a central Hugging Face cache on your Mac
+# It might be in ~/.cache/huggingface/hub/ or similar
+model = CrossEncoder('cross-encoder/nli-roberta-base')
+print("Model download complete!")