Spaces:

mgokg
/

yt-transcribe

Sleeping

App Files Files Community

yt-transcribe / app.py

mgokg

Update app.py

7a4ee36 verified 8 months ago

raw

history blame contribute delete

1.92 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	from gradio_client import Client
	from urllib.parse import urljoin
	#import pandas as pd
	#from io import StringIO
	import json
	#import groq
	import os

	def list_of_clubs(ort):
	base_url = "https://vereine-in-deutschland.net"
	all_links_text = []
	initial_url = f"{base_url}/vereine/Bayern/{ort}"

	try:
	response = requests.get(initial_url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')

	# Determine the last page
	link_element = soup.select_one('li.page-item:nth-child(8) > a:nth-child(1)')
	last_page = 10
	if link_element and 'href' in link_element.attrs:
	href = link_element['href']
	last_page = int(href.split('/')[-1])

	# Loop through all pages and collect links
	for page_number in range(1, last_page + 1):
	page_url = f"{base_url}/vereine/Bayern/{ort}/p/{page_number}"
	response = requests.get(page_url)
	response.raise_for_status()
	soup = BeautifulSoup(response.content, 'html.parser')
	target_div = soup.select_one('div.row-cols-1:nth-child(4)')

	if target_div:
	texts = [a.text for a in target_div.find_all('a', href=True)]
	all_links_text.extend(texts)
	else:
	print(f"Target div not found on page {page_number}")

	except Exception as e:
	return str(e), []

	all_links_text = all_links_text[0::2]
	return all_links_text

	def process_ort(ort):
	links_text = list_of_clubs(ort)
	return links_text

	# Create the Gradio interface
	iface = gr.Interface(
	fn=process_ort,
	inputs=gr.Textbox(lines=1, placeholder="Ort eingeben..."),
	outputs=gr.Textbox(),
	title="vereine",
	description="VereineFinder"
	)

	# Launch the Gradio app
	iface.launch()