import pickle import logging from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline from sklearn.naive_bayes import MultinomialNB import uvicorn from fastapi import FastAPI import transformers app = FastAPI() strings = set() # Set to store all input strings # Load the BERT LM and set it to eval mode model = transformers.BertModel.from_pretrained('bert-base-cased') model.eval() def predict(input_text: str): # Add the new input string to the set of strings strings.add(input_text) # Convert the input strings to input tensors for the BERT LM input_tensors = transformers.BertTokenizer.from_pretrained('bert-base-cased').batch_encode_plus(list(strings), max_length=512, pad_to_max_length=True, return_tensors='pt') input_ids = input_tensors['input_ids'] # Use the BERT LM to generate for all input strings with torch.no_grad(): outputs = model(input_ids) logits = output[0] # Find the input string that is most similar to the new input string, according to the BERT LM similarity_scores = torch.nn.functional.csine_similarity(logits[:, 0, :], logits[:, -1, :], dim=1) _, prediction_index = torch.max(similarity_scores, dim=0) prediction = list(strings)[prediction_index] return {"prediction": prediction, "num_strings": len(strings)} # Here you can do things such as load your models @app.get("/") def read_root(input_text): logging.info("Received request with input_text: %s", input_text) try: result = predict(input_text) logging.info("Prediction made: %s", result) return result except Exception as e: logging.error("An error occured: %s", e) return {"error": str(e)}