is445_demo / app.py
hyzhang00's picture
Update app.py
7ded404 verified
import streamlit as st
import pandas as pd
import altair as alt
st.title('UFO Data Visualization Analysis Report')
@st.cache_data
def load_data():
columns = [
'datetime', 'city', 'state', 'country', 'shape',
'duration_seconds', 'duration_reported', 'description',
'date_posted', 'latitude', 'longitude'
]
df = pd.read_csv(
'https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv',
header=None,
names=columns
)
df['datetime'] = pd.to_datetime(df['datetime'])
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['shape'] = df['shape'].fillna('unknown').str.lower()
return df
df = load_data()
st.markdown("## 1. Temporal Trends in UFO Sightings")
min_year = int(df['year'].min())
max_year = int(df['year'].max())
year_range = st.slider(
"Select Year Range",
min_value=min_year,
max_value=max_year,
value=(min_year, max_year)
)
time_agg = st.selectbox(
"Select Time Aggregation",
['Yearly', 'Monthly']
)
if time_agg == 'Yearly':
time_counts = df[df['year'].between(year_range[0], year_range[1])]\
.groupby('year').size().reset_index(name='count')
x_encoding = alt.X('year:Q', title='Year')
else:
df['date'] = pd.to_datetime(df[['year', 'month']].assign(day=1))
time_counts = df[df['year'].between(year_range[0], year_range[1])]\
.groupby('date').size().reset_index(name='count')
x_encoding = alt.X('date:T', title='Date')
base_chart = alt.Chart(time_counts).encode(
x=x_encoding,
y=alt.Y('count:Q', title='Number of Sightings'),
tooltip=['count:Q']
)
temporal_viz = (
base_chart.mark_area(opacity=0.3) +
base_chart.mark_line(color='steelblue') +
base_chart.mark_point(color='steelblue')
).properties(
width=700,
height=400
).interactive()
st.altair_chart(temporal_viz)
st.markdown("""**(1) Features Highlighted**
This visualization emphasizes the temporal evolution of UFO sightings from 1949 to 2013, highlighting both the overall trend and year-specific fluctuations in reporting frequency. The visualization reveals distinct patterns of increased reporting over time, with notable spikes in certain periods that could correlate with significant historical events or changes in reporting methods.
**(2) Design Choices**
I implemented several key design elements for optimal data representation:
+ A line chart with point markers was chosen for its effectiveness in showing continuous time-series data while maintaining precise year-specific values
+ Interactive tooltips were added to provide exact sighting counts
**(3) Potential Improvements**
Given more time, I would implement dual y-axes to show both sighting frequency and duration, and add the capability to filter by time periods and incorporate monthly/seasonal analysis options.""")
st.markdown("## 2. Analysis of UFO Shape Distribution")
all_shapes = sorted(df['shape'].unique())
selected_shapes = st.multiselect(
"Select UFO Shapes to Display",
options=all_shapes,
default=all_shapes[:10]
)
filtered_df = df[df['shape'].isin(selected_shapes)]
shape_counts = filtered_df['shape'].value_counts().reset_index()
shape_counts.columns = ['shape', 'count']
shape_viz = alt.Chart(shape_counts).mark_bar().encode(
y=alt.Y('shape:N',
sort='-x',
title='UFO Shape'),
x=alt.X('count:Q',
title='Number of Reports'),
color=alt.Color('count:Q', scale=alt.Scale(scheme='viridis')),
tooltip=['shape:N', 'count:Q']
).properties(
width=700,
height=max(len(selected_shapes) * 25, 400)
).interactive()
st.altair_chart(shape_viz)
st.markdown("""**(1) Features Highlighted**
This visualization focuses on the distribution of reported UFO shapes. The data shows clear preferences in how witnesses describe UFO shapes, with certain forms being consistently more common across reports. This analysis helps identify patterns in how people perceive and describe unidentified flying objects.
**(2) Design Choices**
The visualization employs several intentional design elements:
+ A horizontal bar chart format was chosen to accommodate long shape labels and enable easy comparison of quantities
+ Bars are sorted in descending order to immediately highlight the most common shapes
+ The chart focuses on the top 10 shapes to maintain clarity and prevent information overload
+ Interactive tooltips provide precise counts for each shape category.
**(3) Potential Improvements**
With additional time, I would expand this visualization by adding temporal analysis to show how shape distributions have changed over decades, incorporate geographical analysis to reveal regional patterns in shape reporting.""")