Spaces:
Sleeping
Sleeping
hw5
Browse files- app.py +93 -61
- requirements.txt +2 -1
app.py
CHANGED
@@ -1,73 +1,105 @@
|
|
1 |
-
# INSTRUCTIONS:
|
2 |
-
# 1. Open a "Terminal" by: View --> Terminal OR just the "Terminal" through the hamburger menu
|
3 |
-
# 2. run in terminal with: streamlit run app.py
|
4 |
-
# 3. click the "Open in Browser" link that pops up OR click on "Ports" and copy the URL
|
5 |
-
# 4. Open a Simple Browswer with View --> Command Palette --> Simple Browser: Show
|
6 |
-
# 5. use the URL from prior steps as intput into this simple browser
|
7 |
-
|
8 |
-
|
9 |
import streamlit as st
|
|
|
10 |
import altair as alt
|
11 |
-
from vega_datasets import data
|
12 |
-
|
13 |
-
st.title('Streamlit App for IS445: ID26572')
|
14 |
|
15 |
-
|
|
|
16 |
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
color = alt.Color("weather:N", scale=scale)
|
24 |
-
|
25 |
-
# We create two selections:
|
26 |
-
# - a brush that is active on the top panel
|
27 |
-
# - a multi-click that is active on the bottom panel
|
28 |
-
brush = alt.selection_interval(encodings=["x"])
|
29 |
-
click = alt.selection_point(encodings=["color"])
|
30 |
-
|
31 |
-
# Top panel is scatter plot of temperature vs time
|
32 |
-
points = (
|
33 |
-
alt.Chart()
|
34 |
-
.mark_point()
|
35 |
-
.encode(
|
36 |
-
alt.X("monthdate(date):T", title="Date (Month Year)"),
|
37 |
-
alt.Y(
|
38 |
-
"temp_max:Q",
|
39 |
-
title="Maximum Daily Temperature (C)",
|
40 |
-
scale=alt.Scale(domain=[-5, 40]),
|
41 |
-
),
|
42 |
-
color=alt.condition(brush, color, alt.value("lightgray")),
|
43 |
-
size=alt.Size("precipitation:Q", scale=alt.Scale(range=[5, 200])),
|
44 |
)
|
45 |
-
|
46 |
-
|
47 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
)
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
)
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
with tab2:
|
73 |
-
st.altair_chart(chart, theme=None, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
import altair as alt
|
|
|
|
|
|
|
4 |
|
5 |
+
# Set page title
|
6 |
+
st.title(' UFO Data Visualization Analysis Report')
|
7 |
|
8 |
+
# Load and process data
|
9 |
+
@st.cache_data
|
10 |
+
def load_data():
|
11 |
+
columns = [
|
12 |
+
'datetime', 'city', 'state', 'country', 'shape',
|
13 |
+
'duration_seconds', 'duration_reported', 'description',
|
14 |
+
'date_posted', 'latitude', 'longitude'
|
15 |
+
]
|
16 |
|
17 |
+
df = pd.read_csv(
|
18 |
+
'https://github.com/UIUC-iSchool-DataViz/is445_data/raw/main/ufo-scrubbed-geocoded-time-standardized-00.csv',
|
19 |
+
header=None,
|
20 |
+
names=columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
)
|
22 |
+
# print(df)
|
23 |
+
|
24 |
+
df['datetime'] = pd.to_datetime(df['datetime'])
|
25 |
+
df['year'] = df['datetime'].dt.year
|
26 |
+
df['month'] = df['datetime'].dt.month
|
27 |
+
df['shape'] = df['shape'].fillna('unknown').str.lower()
|
28 |
+
return df
|
29 |
+
|
30 |
+
|
31 |
+
df = load_data()
|
32 |
+
|
33 |
+
# First visualization
|
34 |
+
st.markdown("## 1. Temporal Trends in UFO Sightings")
|
35 |
+
|
36 |
+
|
37 |
+
yearly_counts = df.groupby('year').size().reset_index(name='count')
|
38 |
+
yearly_viz = alt.Chart(yearly_counts).mark_line(
|
39 |
+
point=True
|
40 |
+
).encode(
|
41 |
+
x=alt.X('year:Q', title='Year'),
|
42 |
+
y=alt.Y('count:Q', title='Number of Sightings'),
|
43 |
+
tooltip=['year:Q', 'count:Q']
|
44 |
+
).properties(
|
45 |
+
width=700,
|
46 |
+
height=400
|
47 |
)
|
48 |
|
49 |
+
st.altair_chart(yearly_viz)
|
50 |
+
|
51 |
+
st.markdown("""
|
52 |
+
**(1) Features Highlighted**
|
53 |
+
|
54 |
+
This visualization emphasizes the temporal evolution of UFO sightings from 1949 to 2013, highlighting both the overall trend and year-specific fluctuations in reporting frequency. The visualization reveals distinct patterns of increased reporting over time, with notable spikes in certain periods that could correlate with significant historical events or changes in reporting methods.
|
55 |
+
|
56 |
+
**(2) Design Choices**
|
57 |
+
|
58 |
+
I implemented several key design elements for optimal data representation:
|
59 |
+
+ A line chart with point markers was chosen for its effectiveness in showing continuous time-series data while maintaining precise year-specific values
|
60 |
+
+ Interactive tooltips were added to provide exact sighting counts
|
61 |
+
|
62 |
+
**(3) Potential Improvements**
|
63 |
+
|
64 |
+
Given more time, I would implement dual y-axes to show both sighting frequency and duration, and add the capability to filter by time periods and incorporate monthly/seasonal analysis options.
|
65 |
+
""")
|
66 |
+
|
67 |
+
# Second visualization
|
68 |
+
st.markdown("## 2. Analysis of UFO Shape Distribution")
|
69 |
+
|
70 |
+
shape_counts = df['shape'].value_counts().reset_index()
|
71 |
+
shape_counts.columns = ['shape', 'count']
|
72 |
+
shape_viz = alt.Chart(shape_counts).mark_bar().encode(
|
73 |
+
y=alt.Y('shape:N',
|
74 |
+
sort='-x',
|
75 |
+
title='UFO Shape'),
|
76 |
+
x=alt.X('count:Q',
|
77 |
+
title='Number of Reports'),
|
78 |
+
color=alt.Color('count:Q',
|
79 |
+
legend=None),
|
80 |
+
tooltip=['shape:N', 'count:Q']
|
81 |
+
).properties(
|
82 |
+
width=700,
|
83 |
+
height=400
|
84 |
)
|
85 |
|
86 |
+
st.altair_chart(shape_viz)
|
87 |
+
|
88 |
+
|
89 |
+
st.markdown("""
|
90 |
+
**(1) Features Highlighted**
|
91 |
+
|
92 |
+
This visualization focuses on the distribution of reported UFO shapes. The data shows clear preferences in how witnesses describe UFO shapes, with certain forms being consistently more common across reports. This analysis helps identify patterns in how people perceive and describe unidentified flying objects.
|
93 |
+
|
94 |
+
**(2) Design Choices**
|
95 |
+
|
96 |
+
The visualization employs several intentional design elements:
|
97 |
+
+ A horizontal bar chart format was chosen to accommodate long shape labels and enable easy comparison of quantities
|
98 |
+
+ Bars are sorted in descending order to immediately highlight the most common shapes
|
99 |
+
+ The chart focuses on the top 10 shapes to maintain clarity and prevent information overload
|
100 |
+
+ Interactive tooltips provide precise counts for each shape category.
|
101 |
|
102 |
+
**(3) Potential Improvements**
|
103 |
|
104 |
+
With additional time, I would expand this visualization by adding temporal analysis to show how shape distributions have changed over decades, incorporate geographical analysis to reveal regional patterns in shape reporting.
|
105 |
+
""")
|
|
|
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
streamlit
|
2 |
altair
|
3 |
-
vega_datasets
|
|
|
|
1 |
streamlit
|
2 |
altair
|
3 |
+
vega_datasets
|
4 |
+
pandas
|