Spaces:

lifewjola
/

news-headline-sentiment-analysis

Sleeping

App Files Files Community

news-headline-sentiment-analysis / app.py

lifewjola

Update app.py

655c99a over 1 year ago

raw

history blame contribute delete

3.21 kB

	import streamlit as st
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences

	# Set the parameters
	vocab_size = 10000
	max_length = 120
	embedding_dim = 16
	trunc_type = 'post'
	pad_type = 'post'
	oov_tok = '<oov>'
	training_size = 20000

	# Download and read the news headlines data
	import requests

	file_url = "https://storage.googleapis.com/wdd-2-node.appspot.com/x1.json"
	response = requests.get(file_url)

	with open("/tmp/headlines.json", "wb") as f:
	f.write(response.content)


	##read the data using the pandas library
	import pandas as pd

	data = pd.read_json("/tmp/headlines.json")

	# Segregating the headlines

	# create lists to store the headlines and labels
	headlines = list(data['headline'])
	labels = list(data['is_sarcastic'])

	# Set the paramters

	train_sentences = headlines[0: training_size]
	test_sentences = headlines[training_size: ]

	train_labels = labels[0: training_size]
	test_labels = labels[training_size: ]

	# Preprocess sentences

	# Tokenize the data
	tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
	tokenizer.fit_on_texts(headlines)

	word_index = tokenizer.word_index

	train_sequences = tokenizer.texts_to_sequences(train_sentences)
	train_padded_sequences = pad_sequences(train_sequences, maxlen=max_length, padding=pad_type, truncating=trunc_type)

	test_sequences = tokenizer.texts_to_sequences(test_sentences)
	test_padded_sequences = pad_sequences(test_sequences, maxlen=max_length, padding=pad_type, truncating=trunc_type)

	train_padded_sequences = np.array(train_padded_sequences)
	train_labels = np.array(train_labels)
	test_padded_sequences = np.array(test_padded_sequences)
	test_labels = np.array(test_labels)

	# Define the neural network
	model = tf.keras.Sequential([
	tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
	tf.keras.layers.GlobalAveragePooling1D(),
	tf.keras.layers.Dense(24, activation='relu'),
	tf.keras.layers.Dense(1, activation='sigmoid')
	])

	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

	# Streamlit App
	st.title("News Headline Sarcasm Detection")

	def predict_sarcasm(new_sentence):
	new_sequence = tokenizer.texts_to_sequences([new_sentence])
	new_padded_sequence = pad_sequences(new_sequence, maxlen=max_length, padding=pad_type, truncating=trunc_type)
	prediction = model.predict(new_padded_sequence)[0][0]
	return prediction

	example_headlines = [
	"Local School Celebrates Opening of New Science Lab",
	"Aliens Land on Earth, Admit They're Just Here for the Memes",
	"City Council Approves Funding for Public Transportation Expansion"
	]

	st.write("No news headline ideas? You can simply copy and paste any of the following headlines to test:")
	for headline in example_headlines:
	st.write(f"- {headline}")

	input_text = st.text_input("Enter a news headline:")
	if input_text:
	prediction = predict_sarcasm(input_text)
	if prediction >= 0.5:
	st.write(f"This headline is sarcastic. \nscore: {prediction*100:.2f}%")
	else:
	st.write(f"This headline is not sarcastic. \nscore: {prediction*100:.2f}%")