File size: 5,442 Bytes
29b5760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
import pandas as pd
import plotly.express as px
import pycountry
import plotly.graph_objects as go
import plotly.figure_factory as ff
import pycountry_convert as pc
import pandas as pd
import numpy as np

import pycountry
import pycountry_convert as pc
user = pd.read_csv('train_users_2.csv')
user = user[user['age'] < 100]
user["year"] = user["date_account_created"].str[:4].astype(int)
user["month"] = user["date_account_created"].str[5:7].astype(int)
user['year-month'] = user['date_account_created'].str[:7]
user['date_first_booking'] = user['date_first_booking'].replace(np.nan, '2020-13-31')

user['month_booking'] = user['date_first_booking'].str[5:7].astype(int) 
user['year_booking'] = user['date_first_booking'].str[:4].astype(int)
user['year-month_booking'] = user['date_first_booking'].str[:7]
user["language"] = user["language"].str.upper()

df = user
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
st.set_page_config(layout="wide")
st.title("Airbnb New User Bookings")
col0, col1, col2 = st.columns([0.5, 2, 2])  # Adjust the width ratios here
with col0:
    region = st.selectbox("Select Region", df['language'].unique(), index=0)
    column = st.selectbox("Select Column", ['first_device_type', 'first_browser','affiliate_provider'], index=0)
    year = st.slider("Select Year", df['year'].min(), df['year'].max(), df['year'].max())
with col1:
    # Update graph based on column and year selection
    filtered_df = df[(df['year'] == year)]
    visit = filtered_df[column].value_counts()
    booking = filtered_df[filtered_df['year_booking']==year][column].value_counts()
    counts = pd.merge(visit, booking, left_index=True, right_index=True).reset_index()
    counts.columns = ['first_device_type', 'visit', 'booking']
    counts['rate'] = counts['booking'] / counts['visit']
    counts_t = counts.melt(id_vars=['first_device_type'], value_vars=['visit', 'booking'])
    fig = px.bar(counts_t, x="first_device_type", y="value", color='variable')
    fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1]))
    fig.add_trace(go.Scatter(x=counts['first_device_type'], y=counts['rate'], mode='lines+markers', name='Conversion Rate', yaxis='y2'))
    fig.update_layout(height=250, margin={'l': 20, 'b': 50, 't': 10, 'r': 10}, hovermode='closest')
    st.plotly_chart(fig)

    # Update new-users figure based on region selection
    dff = df[df['language'] == region]
    visit = dff.groupby(['year', 'month'])['year-month'].value_counts().reset_index(name='count')
    visit = visit.iloc[:, 2:]
    booking = dff.groupby(['year_booking', 'month_booking'])['year-month_booking'].value_counts().reset_index(name='count')
    booking = booking[booking['year_booking'] != 2020]
    booking = booking.iloc[:, 2:]
    counts = pd.merge(visit, booking, left_on=['year-month'], right_on=['year-month_booking'])
    counts.columns = [ 'year-month', 'visit',  'year-month_booking', 'booking']
    counts['rate'] = counts['booking'] / counts['visit']
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['visit'], mode='lines', name='visit'))
    fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['booking'], mode='lines', name='booking'))
    fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1]))
    fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['rate'], mode='lines+markers', name='Conversion Rate', yaxis='y2'))
    fig.update_layout(height=250, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
    st.plotly_chart(fig)

with col2:
    # Update destination-country figure based on year selection
    dff = df.loc[(df["country_destination"]!="NDF") & (df["country_destination"]!="other")]
    dff = dff[dff['year'] == year]
    counts = dff['country_destination'].value_counts()
    counts = pd.DataFrame(counts)
    counts.reset_index(inplace=True)
    counts.columns = ['country_destination', 'count']
    counts['country'] = counts['country_destination'].apply(lambda x: pycountry.countries.get(alpha_2=x).alpha_3)
    counts['continent'] = counts['country_destination'].apply(lambda x: pc.convert_continent_code_to_continent_name(pc.country_alpha2_to_continent_code(x)))
    fig = px.scatter_geo(counts, locations="country", color="continent", size='count', hover_name=counts['country'])
    fig.update_traces(customdata=counts['country'])
    title = '<b>{}</b>'.format(year)
    fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom', xref='paper', yref='paper', showarrow=False, align='left', text=title)
    fig.update_layout(height=250, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
    fig.update_layout(clickmode='event+select')
    st.plotly_chart(fig)
    # Update new-users-age figure based on selected country
    country_name = st.selectbox("Select Country", df['country_destination'].unique(), index=0)
    dff = df[df['country_destination'] == country_name]
    dff = dff[dff['year'] == year]  # Modify this line to match your data
    dff = dff[dff['gender'] != '-unknown-']
    fig = px.violin(dff, x="gender", y="age", box=True, color="gender", violinmode='overlay')
    title = '<b>{}</b>'.format(year)
    fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom', xref='paper', yref='paper', showarrow=False, align='left',
                    text=title)
    fig.update_layout(height=225, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
    st.plotly_chart(fig)