# AUTOGENERATED! DO NOT EDIT! File to edit: ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb. # %% auto 0 __all__ = ['hub_llm', 'title', 'description', 'combine_prompt_template', 'pdf_example_1', 'pdf_example_2', 'prompt_example_1', 'prompt_example_2', 'upload_file_input', 'custom_prompt_input', 'custom_chunk_input', 'chunk_size_input', 'chunk_overlap_input', 'examples', 'outputs', 'iface', 'summarize'] # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 3 from langchain_community.llms import HuggingFaceHub from langchain_community.document_loaders import PyPDFLoader from langchain_core.prompts import PromptTemplate from langchain.chains import LLMChain from langchain.text_splitter import CharacterTextSplitter from langchain.chains.mapreduce import MapReduceChain from langchain.prompts import PromptTemplate from langchain.docstore.document import Document from langchain.chains.summarize import load_summarize_chain import os import dotenv from dotenv import load_dotenv load_dotenv() # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 5 hub_llm = HuggingFaceHub( repo_id="facebook/bart-large-cnn", # facebook/bart-large-cnn or "google/flan-t5-base" or "google/pegasus-xsum" model_kwargs={ "temperature": 0.01, # Controls randomness (0.0: deterministic, 1.0: very random) "max_new_tokens": 256*2, # Maximum number of tokens to generate in the summary "min_length": 30, # Minimum length of the generated summary "repetition_penalty": 1.2, # Penalizes repeated tokens (higher value = less repetition) "top_k": 50, # Consider only the top k most likely tokens when generating "top_p": 0.95, # Consider tokens with cumulative probability up to top_p "early_stopping": True, # Stops generation when a certain condition is met (e.g., end-of-sequence token) } ) # %% ../drive/MyDrive/Codici/Python/Apps/Gradio_App/Langchain_apps/langchain_summarization_app.ipynb 15 from langchain.text_splitter import RecursiveCharacterTextSplitter import gradio as gr import time title="PDF Summarizer" description="Summarize your PDF using a custom combine prompt." # Default combine_prompt combine_prompt_template = """Write a comprehensive summary of this academic article. Divide the summary in: 1. Main Objective of the paper 2. Results {text} SUMMARY:""" # Example PDF files and prompts pdf_example_1 = './ZeroShotDataAug.pdf' pdf_example_2 = './bert.pdf' prompt_example_1 = """Write a comprehensive summary of this academic article. Divide the summary in: 1. Main Objective of the paper 2. Results {text} SUMMARY:""" prompt_example_2 = """Summarize the following document focusing on the key findings and methodology. {text} Summary:""" # Implementation def summarize(pdf_file, custom_prompt, custom_chunk, chunk_size, chunk_overlap): try: # Get the uploaded file path file_path = pdf_file.name # Load and process the PDF loader = PyPDFLoader(file_path) if custom_chunk: text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) docs = loader.load_and_split(text_splitter=text_splitter) else: docs = loader.load_and_split() PROMPT = PromptTemplate(template=custom_prompt, input_variables=['text']) chain = load_summarize_chain(hub_llm, chain_type='map_reduce', combine_prompt=PROMPT) # Introduce a delay before calling the API time.sleep(2) summary = chain.invoke(docs)['output_text'] return summary except Exception as e: return f"An error occurred: {e}" upload_file_input = gr.UploadButton(label="Upload PDF", file_types=[".pdf"], file_count="single") custom_prompt_input = gr.Textbox(label="Custom Prompt", lines=10, value=combine_prompt_template, info="Define your own prompt or leave empty for default.") custom_chunk_input = gr.Checkbox(label="Custom Chunk", value=False, info="Recommended to be left unchecked") chunk_size_input = gr.Number(label="Chunk Size", value=700,minimum=500,maximum=1000,step=100) chunk_overlap_input = gr.Number(label="Chunk Overlap", value=50,minimum=10,maximum=100,step=100) examples=[ [pdf_example_1, prompt_example_1, False, 700, 50], # [pdf_example_2, prompt_example_2, False, 700, 50] ] outputs = gr.Textbox(label="Summary") iface = gr.Interface( title=title, description=description, fn=summarize, inputs=[upload_file_input, custom_prompt_input, custom_chunk_input, chunk_size_input, chunk_overlap_input ], outputs=outputs, examples=examples, ) iface.launch( debug=False, # share=False, )