cancer-api added
Browse files- Dockerfile +22 -0
- README.md +0 -2
- app/__init__.py +0 -0
- app/main.py +166 -0
- app/model.py +105 -0
- deploy.sh +3 -0
- docker-compose.yml +16 -0
- download_data_and_models.sh +47 -0
- requirements.txt +28 -0
    	
        Dockerfile
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Use official CUDA 12.1 image with Ubuntu
         | 
| 2 | 
            +
            FROM python:3.11-slim
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            # Set working directory
         | 
| 5 | 
            +
            WORKDIR /app
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            # Upgrade pip
         | 
| 8 | 
            +
            RUN pip3 install --upgrade pip
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # Copy Python dependencies
         | 
| 11 | 
            +
            ADD requirements.txt .
         | 
| 12 | 
            +
            RUN pip3 install -r requirements.txt 
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # Expose port
         | 
| 15 | 
            +
            EXPOSE 8000
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            # Copy application code
         | 
| 18 | 
            +
            COPY app ./app
         | 
| 19 | 
            +
            COPY models/fine_tuned ./models/fine_tuned
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            # Run the application
         | 
| 22 | 
            +
            CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
         | 
    	
        README.md
    CHANGED
    
    | @@ -1,2 +0,0 @@ | |
| 1 | 
            -
            # cancer_classify_extract-api
         | 
| 2 | 
            -
            To extract disease data and classify research article abstracts into cancer and non-cancer categories. 
         | 
|  | |
|  | |
|  | 
    	
        app/__init__.py
    ADDED
    
    | 
            File without changes
         | 
    	
        app/main.py
    ADDED
    
    | @@ -0,0 +1,166 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from fastapi import FastAPI, HTTPException
         | 
| 2 | 
            +
            from pydantic import BaseModel
         | 
| 3 | 
            +
            from typing import List, Union, Optional, Dict
         | 
| 4 | 
            +
            import logging
         | 
| 5 | 
            +
            from langchain.chains import SequentialChain, TransformChain
         | 
| 6 | 
            +
            from .model import CancerClassifier, CancerExtractor
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # Set up logging
         | 
| 9 | 
            +
            logging.basicConfig(level=logging.INFO)
         | 
| 10 | 
            +
            logger = logging.getLogger(__name__)
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            app = FastAPI(
         | 
| 13 | 
            +
                title="Cancer Text Processing API",
         | 
| 14 | 
            +
                description="API for cancer-related text classification and information extraction",
         | 
| 15 | 
            +
                version="1.0.0"
         | 
| 16 | 
            +
            )
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            class TextInput(BaseModel):
         | 
| 19 | 
            +
                text: Union[str, List[str]]
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            class ProcessingResult(BaseModel):
         | 
| 22 | 
            +
                text: str
         | 
| 23 | 
            +
                classification: Union[str, dict]
         | 
| 24 | 
            +
                extraction: Union[str, dict]
         | 
| 25 | 
            +
                error: Optional[str] = None
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            class BatchResponse(BaseModel):
         | 
| 28 | 
            +
                results: List[ProcessingResult]
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            # Initialize models
         | 
| 31 | 
            +
            try:
         | 
| 32 | 
            +
                logger.info("Loading classification model...")
         | 
| 33 | 
            +
                classification_pipeline = CancerClassifier("models/fine_tuned")
         | 
| 34 | 
            +
                
         | 
| 35 | 
            +
                logger.info("Loading extraction model...")
         | 
| 36 | 
            +
                extraction_pipeline = CancerExtractor()
         | 
| 37 | 
            +
                
         | 
| 38 | 
            +
                logger.info("Models loaded successfully")
         | 
| 39 | 
            +
            except Exception as e:
         | 
| 40 | 
            +
                logger.error(f"Failed to load models: {str(e)}")
         | 
| 41 | 
            +
                raise RuntimeError("Could not initialize models")
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            def batch_classification_transform(inputs: Dict) -> Dict:
         | 
| 44 | 
            +
                """Process batch of texts through classification model"""
         | 
| 45 | 
            +
                try:
         | 
| 46 | 
            +
                    texts = inputs["input_texts"]
         | 
| 47 | 
            +
                    if isinstance(texts, str):
         | 
| 48 | 
            +
                        texts = [texts]  # Convert single text to batch of one
         | 
| 49 | 
            +
                        
         | 
| 50 | 
            +
                    results = []
         | 
| 51 | 
            +
                    for text in texts:
         | 
| 52 | 
            +
                        try:
         | 
| 53 | 
            +
                            result = classification_pipeline.predict(text)
         | 
| 54 | 
            +
                            results.append(str(result))
         | 
| 55 | 
            +
                        except Exception as e:
         | 
| 56 | 
            +
                            logger.warning(f"Classification failed for text: {text[:50]}... Error: {str(e)}")
         | 
| 57 | 
            +
                            results.append({"error": str(e)})
         | 
| 58 | 
            +
                            
         | 
| 59 | 
            +
                    return {"classification_results": results}
         | 
| 60 | 
            +
                except Exception as e:
         | 
| 61 | 
            +
                    logger.error(f"Batch classification failed: {str(e)}")
         | 
| 62 | 
            +
                    raise
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            def batch_extraction_transform(inputs: Dict) -> Dict:
         | 
| 65 | 
            +
                """Process batch of texts through extraction model"""
         | 
| 66 | 
            +
                try:
         | 
| 67 | 
            +
                    texts = inputs["input_texts"]
         | 
| 68 | 
            +
                    if isinstance(texts, str):
         | 
| 69 | 
            +
                        texts = [texts]  # Convert single text to batch of one
         | 
| 70 | 
            +
                        
         | 
| 71 | 
            +
                    results = []
         | 
| 72 | 
            +
                    for text in texts:
         | 
| 73 | 
            +
                        try:
         | 
| 74 | 
            +
                            result = extraction_pipeline.predict(text)
         | 
| 75 | 
            +
                            results.append(str(result))
         | 
| 76 | 
            +
                        except Exception as e:
         | 
| 77 | 
            +
                            logger.warning(f"Extraction failed for text: {text[:50]}... Error: {str(e)}")
         | 
| 78 | 
            +
                            results.append({"error": str(e)})
         | 
| 79 | 
            +
                            
         | 
| 80 | 
            +
                    return {"extraction_results": results}
         | 
| 81 | 
            +
                except Exception as e:
         | 
| 82 | 
            +
                    logger.error(f"Batch extraction failed: {str(e)}")
         | 
| 83 | 
            +
                    raise
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            # Create processing chains
         | 
| 86 | 
            +
            classification_chain = TransformChain(
         | 
| 87 | 
            +
                input_variables=["input_texts"],
         | 
| 88 | 
            +
                output_variables=["classification_results"],
         | 
| 89 | 
            +
                transform=batch_classification_transform
         | 
| 90 | 
            +
            )
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            extraction_chain = TransformChain(
         | 
| 93 | 
            +
                input_variables=["input_texts"],
         | 
| 94 | 
            +
                output_variables=["extraction_results"],
         | 
| 95 | 
            +
                transform=batch_extraction_transform
         | 
| 96 | 
            +
            )
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            # Create sequential chain
         | 
| 99 | 
            +
            processing_chain = SequentialChain(
         | 
| 100 | 
            +
                chains=[classification_chain, extraction_chain],
         | 
| 101 | 
            +
                input_variables=["input_texts"],
         | 
| 102 | 
            +
                output_variables=["classification_results", "extraction_results"],
         | 
| 103 | 
            +
                verbose=True
         | 
| 104 | 
            +
            )
         | 
| 105 | 
            +
             | 
| 106 | 
            +
            @app.post("/process", response_model=BatchResponse)
         | 
| 107 | 
            +
            async def process_texts(input: TextInput):
         | 
| 108 | 
            +
                """
         | 
| 109 | 
            +
                Process cancer-related texts through classification and extraction pipeline
         | 
| 110 | 
            +
                
         | 
| 111 | 
            +
                Args:
         | 
| 112 | 
            +
                    input: TextInput object containing either a single string or list of strings
         | 
| 113 | 
            +
                
         | 
| 114 | 
            +
                Returns:
         | 
| 115 | 
            +
                    BatchResponse with processing results for each input text
         | 
| 116 | 
            +
                """
         | 
| 117 | 
            +
                try:
         | 
| 118 | 
            +
                    texts = [input.text] if isinstance(input.text, str) else input.text
         | 
| 119 | 
            +
                    
         | 
| 120 | 
            +
                    # Validate input
         | 
| 121 | 
            +
                    if not isinstance(texts, list) or not all(isinstance(t, str) for t in texts):
         | 
| 122 | 
            +
                        raise HTTPException(status_code=400, detail="Input must be string or list of strings")
         | 
| 123 | 
            +
                    
         | 
| 124 | 
            +
                    # Process through LangChain pipeline
         | 
| 125 | 
            +
                    chain_result = processing_chain({"input_texts": texts})
         | 
| 126 | 
            +
                    
         | 
| 127 | 
            +
                    # Format results
         | 
| 128 | 
            +
                    results = []
         | 
| 129 | 
            +
                    for i, text in enumerate(texts):
         | 
| 130 | 
            +
                        classification = chain_result["classification_results"][i]
         | 
| 131 | 
            +
                        extraction = chain_result["extraction_results"][i]
         | 
| 132 | 
            +
                        
         | 
| 133 | 
            +
                        error = None
         | 
| 134 | 
            +
                        if isinstance(classification, dict) and "error" in classification:
         | 
| 135 | 
            +
                            error = classification["error"]
         | 
| 136 | 
            +
                        elif isinstance(extraction, dict) and "error" in extraction:
         | 
| 137 | 
            +
                            error = extraction["error"]
         | 
| 138 | 
            +
                        
         | 
| 139 | 
            +
                        results.append(ProcessingResult(
         | 
| 140 | 
            +
                            text=text,
         | 
| 141 | 
            +
                            classification=classification,
         | 
| 142 | 
            +
                            extraction=extraction,
         | 
| 143 | 
            +
                            error=error
         | 
| 144 | 
            +
                        ))
         | 
| 145 | 
            +
                    
         | 
| 146 | 
            +
                    return BatchResponse(results=results)
         | 
| 147 | 
            +
                    
         | 
| 148 | 
            +
                except Exception as e:
         | 
| 149 | 
            +
                    logger.error(f"Processing failed: {str(e)}")
         | 
| 150 | 
            +
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 151 | 
            +
             | 
| 152 | 
            +
            @app.get("/health")
         | 
| 153 | 
            +
            async def health_check():
         | 
| 154 | 
            +
                """Health check endpoint"""
         | 
| 155 | 
            +
                try:
         | 
| 156 | 
            +
                    # Test with a simple cancer-related phrase
         | 
| 157 | 
            +
                    test_text = "breast cancer diagnosis"
         | 
| 158 | 
            +
                    classification_pipeline.predict(test_text)
         | 
| 159 | 
            +
                    extraction_pipeline.predict(test_text)
         | 
| 160 | 
            +
                    return {"status": "healthy", "models": ["classification", "extraction"]}
         | 
| 161 | 
            +
                except Exception as e:
         | 
| 162 | 
            +
                    raise HTTPException(status_code=500, detail=str(e))
         | 
| 163 | 
            +
             | 
| 164 | 
            +
            if __name__ == "__main__":
         | 
| 165 | 
            +
                import uvicorn
         | 
| 166 | 
            +
                uvicorn.run(app, host="0.0.0.0", port=8000)
         | 
    	
        app/model.py
    ADDED
    
    | @@ -0,0 +1,105 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from transformers import pipeline
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            class CancerClassifier:
         | 
| 6 | 
            +
                def __init__(self, model_path: str):
         | 
| 7 | 
            +
                    self.classifier = pipeline(
         | 
| 8 | 
            +
                        "text-classification",
         | 
| 9 | 
            +
                        model=model_path,
         | 
| 10 | 
            +
                        tokenizer="microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
         | 
| 11 | 
            +
                        return_all_scores=True,
         | 
| 12 | 
            +
                        device=0 if os.environ.get("USE_GPU", "false").lower() == "true" else -1,
         | 
| 13 | 
            +
                    )
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                def predict(self, text: str):
         | 
| 16 | 
            +
                    results = self.classifier(text)
         | 
| 17 | 
            +
                    return {
         | 
| 18 | 
            +
                        "predicted_labels": ["Non-Cancer", "Cancer"],
         | 
| 19 | 
            +
                        "confidence_scores": {
         | 
| 20 | 
            +
                            "Non-Cancer": results[0][0]["score"],
         | 
| 21 | 
            +
                            "Cancer": results[0][1]["score"],
         | 
| 22 | 
            +
                        },
         | 
| 23 | 
            +
                    }
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            class CancerExtractor:
         | 
| 26 | 
            +
                def __init__(self, model_path ="alvaroalon2/biobert_diseases_ner"):
         | 
| 27 | 
            +
                    self.extractor = pipeline(
         | 
| 28 | 
            +
                        "ner",
         | 
| 29 | 
            +
                        model=model_path,
         | 
| 30 | 
            +
                        aggregation_strategy="simple",
         | 
| 31 | 
            +
                        device=0 if os.environ.get("USE_GPU", "false").lower() == "true" else -1,
         | 
| 32 | 
            +
                    )
         | 
| 33 | 
            +
                    self.cancers = [
         | 
| 34 | 
            +
                        "cancer",
         | 
| 35 | 
            +
                        "astrocytoma",
         | 
| 36 | 
            +
                        "medulloblastoma",
         | 
| 37 | 
            +
                        "meningioma",
         | 
| 38 | 
            +
                        "neoplasm",
         | 
| 39 | 
            +
                        "carcinoma",
         | 
| 40 | 
            +
                        "tumor",
         | 
| 41 | 
            +
                        "melanoma",
         | 
| 42 | 
            +
                        "mesothelioma",
         | 
| 43 | 
            +
                        "leukemia",
         | 
| 44 | 
            +
                        "lymphoma",
         | 
| 45 | 
            +
                        "sarcomas",
         | 
| 46 | 
            +
                    ]
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                def predict(self, text: str):
         | 
| 49 | 
            +
                    results = self.extractor(text)
         | 
| 50 | 
            +
                    extractions = self.extract_diseases(results)
         | 
| 51 | 
            +
                    extractions_cleaned = self.clean_diseases(extractions)
         | 
| 52 | 
            +
                    detections = self.detect_cancer(extractions_cleaned)
         | 
| 53 | 
            +
                    return detections
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                def extract_diseases(self, entities):
         | 
| 56 | 
            +
                    entities = self.merge_subwords(entities)
         | 
| 57 | 
            +
                    diseases = [
         | 
| 58 | 
            +
                        entity["word"]
         | 
| 59 | 
            +
                        for entity in entities
         | 
| 60 | 
            +
                        if "disease" in entity["entity_group"].lower()
         | 
| 61 | 
            +
                    ]
         | 
| 62 | 
            +
                    return diseases
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                def merge_subwords(self, entities):
         | 
| 65 | 
            +
                    merged_entities = []
         | 
| 66 | 
            +
                    current_entity = None
         | 
| 67 | 
            +
                    for entity in entities:
         | 
| 68 | 
            +
                        if current_entity is None:
         | 
| 69 | 
            +
                            current_entity = entity.copy()
         | 
| 70 | 
            +
                        else:
         | 
| 71 | 
            +
                            # Check if this entity is part of the same word as the previous one
         | 
| 72 | 
            +
                            if (
         | 
| 73 | 
            +
                                entity["start"] == current_entity["end"]
         | 
| 74 | 
            +
                                and "disease" in entity["entity_group"].lower()
         | 
| 75 | 
            +
                                and "disease" in current_entity["entity_group"].lower()
         | 
| 76 | 
            +
                            ):
         | 
| 77 | 
            +
                                # Merge with previous entity
         | 
| 78 | 
            +
                                current_entity["word"] += entity["word"].replace("##", "")
         | 
| 79 | 
            +
                                current_entity["end"] = entity["end"]
         | 
| 80 | 
            +
                                current_entity["score"] = (
         | 
| 81 | 
            +
                                    current_entity["score"] + entity["score"]
         | 
| 82 | 
            +
                                ) / 2
         | 
| 83 | 
            +
                            else:
         | 
| 84 | 
            +
                                merged_entities.append(current_entity)
         | 
| 85 | 
            +
                                current_entity = entity.copy()
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                    if current_entity is not None:
         | 
| 88 | 
            +
                        merged_entities.append(current_entity)
         | 
| 89 | 
            +
                    return merged_entities
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                def clean_diseases(self, text_list):
         | 
| 92 | 
            +
                    text_list = [re.sub(r"[^a-zA-Z]", " ", t) for t in text_list]
         | 
| 93 | 
            +
                    unique_text = set([t.lower() for t in text_list])  # and (t not in stop_words)
         | 
| 94 | 
            +
                    cleaned_text = [
         | 
| 95 | 
            +
                        t for t in unique_text if (3 <= len(t.strip()) <= 50 and ("##" not in t))
         | 
| 96 | 
            +
                    ]
         | 
| 97 | 
            +
                    return cleaned_text
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                def detect_cancer(self, text_list):
         | 
| 100 | 
            +
                    detected_cancers = [
         | 
| 101 | 
            +
                        word2.lower()
         | 
| 102 | 
            +
                        for word2 in text_list
         | 
| 103 | 
            +
                        if any(word1.lower() in word2.lower() for word1 in self.cancers)
         | 
| 104 | 
            +
                    ]
         | 
| 105 | 
            +
                    return set(detected_cancers)
         | 
    	
        deploy.sh
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            docker compose build
         | 
| 2 | 
            +
            docker compose up -d
         | 
| 3 | 
            +
            docker compose ps
         | 
    	
        docker-compose.yml
    ADDED
    
    | @@ -0,0 +1,16 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            services:
         | 
| 2 | 
            +
              cancer-api:
         | 
| 3 | 
            +
                build: .
         | 
| 4 | 
            +
                ports:
         | 
| 5 | 
            +
                  - "8000:8000"
         | 
| 6 | 
            +
                environment:
         | 
| 7 | 
            +
                  - PYTHONUNBUFFERED=1
         | 
| 8 | 
            +
                volumes:
         | 
| 9 | 
            +
                  - ./models:/app/models
         | 
| 10 | 
            +
                  - ./app:/app/app
         | 
| 11 | 
            +
                restart: unless-stopped
         | 
| 12 | 
            +
                healthcheck:
         | 
| 13 | 
            +
                  test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
         | 
| 14 | 
            +
                  interval: 30s
         | 
| 15 | 
            +
                  timeout: 10s
         | 
| 16 | 
            +
                  retries: 3
         | 
    	
        download_data_and_models.sh
    ADDED
    
    | @@ -0,0 +1,47 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/bin/bash
         | 
| 2 | 
            +
            #view url: https://drive.google.com/file/d/1DVPiNx-UKO7B6HvNniGIOOadJ3yq083i/view?usp=sharing
         | 
| 3 | 
            +
            FILE_ID="1DVPiNx-UKO7B6HvNniGIOOadJ3yq083i"
         | 
| 4 | 
            +
            FILE_NAME="cancer_data.zip"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            echo "Downloading data from Google Drive..."
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            curl -L -o "$FILE_NAME" "https://docs.google.com/uc?export=download&id=${FILE_ID}"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            # Check download success
         | 
| 11 | 
            +
            if [ ! -f "$FILE_NAME" ]; then
         | 
| 12 | 
            +
              echo "Download failed!"
         | 
| 13 | 
            +
              exit 1
         | 
| 14 | 
            +
            fi
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            # Unzip
         | 
| 17 | 
            +
            echo "Unzipping $FILE_NAME..."
         | 
| 18 | 
            +
            unzip "$FILE_NAME"
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            # Optional: Clean up
         | 
| 21 | 
            +
            # rm "$FILE_NAME"
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            echo "Done."
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            #!/bin/bash
         | 
| 26 | 
            +
            #view url: https://drive.google.com/file/d/1aR6PUjDi8fFBp0_pxe1pCv9S4EBptC5W/view?usp=sharing
         | 
| 27 | 
            +
            FILE_ID="1aR6PUjDi8fFBp0_pxe1pCv9S4EBptC5W"
         | 
| 28 | 
            +
            FILE_NAME="models-cancer-api.zip"
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            echo "Downloading model from Google Drive..."
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            curl -L -o "$FILE_NAME" "https://docs.google.com/uc?export=download&id=${FILE_ID}"
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            # Check download success
         | 
| 35 | 
            +
            if [ ! -f "$FILE_NAME" ]; then
         | 
| 36 | 
            +
              echo "Download failed!"
         | 
| 37 | 
            +
              exit 1
         | 
| 38 | 
            +
            fi
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            # Unzip
         | 
| 41 | 
            +
            echo "Unzipping $FILE_NAME..."
         | 
| 42 | 
            +
            unzip "$FILE_NAME"
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            # Optional: Clean up
         | 
| 45 | 
            +
            # rm "$FILE_NAME"
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            echo "Done."
         | 
    	
        requirements.txt
    ADDED
    
    | @@ -0,0 +1,28 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            fastapi==0.95.0
         | 
| 2 | 
            +
            uvicorn==0.21.1
         | 
| 3 | 
            +
            pydantic
         | 
| 4 | 
            +
            python-multipart==0.0.6
         | 
| 5 | 
            +
            chardet==5.2.0
         | 
| 6 | 
            +
            click==8.2.0
         | 
| 7 | 
            +
            datasets==3.6.0
         | 
| 8 | 
            +
            beautifulsoup4==4.13.4
         | 
| 9 | 
            +
            pandas==2.2.3
         | 
| 10 | 
            +
            nltk==3.9.1
         | 
| 11 | 
            +
            transformers==4.51.3
         | 
| 12 | 
            +
            huggingface-hub==0.31.2
         | 
| 13 | 
            +
            langchain
         | 
| 14 | 
            +
            langchain-community
         | 
| 15 | 
            +
            evaluate==0.4.3
         | 
| 16 | 
            +
            evaluation==0.0.2
         | 
| 17 | 
            +
            scikit-learn
         | 
| 18 | 
            +
            bioc==2.1
         | 
| 19 | 
            +
            hf-xet==1.1.2
         | 
| 20 | 
            +
            accelerate==1.7.0
         | 
| 21 | 
            +
            tqdm==4.67.1
         | 
| 22 | 
            +
            scikit-learn==1.6.1
         | 
| 23 | 
            +
            python-dotenv==1.1.0
         | 
| 24 | 
            +
            peft==0.15.2
         | 
| 25 | 
            +
            tokenizers==0.21.1
         | 
| 26 | 
            +
            torch==2.5.1
         | 
| 27 | 
            +
            torchvision==0.20.1
         | 
| 28 | 
            +
             |