File size: 5,715 Bytes
7a7f593
a9d9e97
 
705057b
e988e3b
c4dea99
 
e988e3b
705057b
e2531ec
 
 
dcae8dc
 
 
 
 
 
 
 
e3b9f40
cd9e006
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbb4724
 
 
db82ba3
e3b9f40
db82ba3
 
cd9e006
131128e
04a1fbe
8ff835f
 
 
 
 
0d4253b
8ff835f
 
 
dbb4724
 
 
 
 
 
 
 
 
8ff835f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e48f638
131128e
8ff835f
db82ba3
70c529a
 
 
 
 
 
 
 
 
 
634c585
131128e
1cd9c27
f8b0ab5
cd9e006
db46464
1a8aed3
cd9e006
 
e61cedc
cd9e006
 
1cd9c27
cd9e006
1cd9c27
cd9e006
886eb90
3118a9b
cd9e006
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import numpy as np
import pandas as pd
import streamlit as st
import json

st.set_page_config(layout="wide")

# load data
f = open('data.json')
recipes_json = json.load(f)
recipes = pd.DataFrame(recipes_json)
ingredients = pd.DataFrame(recipes_json).explode('ingredients')
bar_list = ingredients.loc[ingredients['ingredients'].notnull(), 'ingredients'].unique().tolist()
bar_dict = []
for item in bar_list:
    item_dict = {}
    item_dict['ingredients'] = item
    item_dict['have'] = False
    bar_dict.append(item_dict)
    
bar_df = pd.DataFrame(bar_dict)
def similarity(ratings, kind='user', epsilon=1e-9):
        if kind == 'user':
            sim = ratings.dot(ratings.T) + epsilon
        elif kind == 'item':
            sim = ratings.T.dot(ratings) + epsilon
        norms = np.array([np.sqrt(np.diagonal(sim))])
        return (sim / norms / norms.T)
pivot = ingredients[['name', 'ingredients']].copy()
pivot['count'] = 1
pivot = pivot.set_index(['name', 'ingredients'])['count'].unstack().reset_index()
pivot.rename_axis(None, axis=1, inplace=True)
pivot.fillna(0, inplace=True)
pivot_names = pivot.columns
pivot_np = np.array(pivot.set_index('name'))
recipe_similarity = pd.DataFrame(similarity(pivot_np, kind='user'))
recipe_similarity.columns = pivot['name'].values
recipe_similarity.index = pivot['name'].values

# manage session state
if 'filter_bar' not in st.session_state:
    st.session_state.filter_bar = recipes['name'].to_list()

if 'bar_df' not in st.session_state:
    st.session_state.bar_df = bar_df
    st.session_state.bar_df_edited = st.session_state.bar_df.copy() 

# recipe finder section
st.header("Recipe Finder")
with st.expander("Find recipes by name, ingredients, and type"):
    # name search
    name_search = st.text_input('Search recipes by name')
    if name_search == "":
        filter_name = recipes['name'].to_list()
    else:
        filter_1 = recipes['name'].str.contains(name_search.lower())
        filter_name = recipes.loc[filter_1, 'name'].to_list()
    
    # ingredient filter
    options = st.multiselect(
        'Select ingredients to filter by:',ingredients['ingredients'].unique())
    filter_type = st.radio(
        "Specify type of ingredient filtering:",
        [
            'Recipe contains ANY of the specified ingredients', 
            'Recipe contains ALL of the specified ingredients'
        ]
    )
    if len(options)==0:
        filter_ingredient = recipes['name'].to_list()
    else:
        if filter_type == 'Recipe contains ANY of the specified ingredients':
            filter_1 = ingredients['ingredients'].isin(options)
            filter_ingredient = ingredients.loc[filter_1, 'name'].to_list()
        elif filter_type == 'Recipe contains ALL of the specified ingredients':
            filter_1 = ingredients['ingredients'].isin(options)
            ingredients['has_ingredient'] = 0
            ingredients.loc[filter_1, 'has_ingredient'] = 1
            pivot = ingredients.groupby('name').agg(sum_has_ingredients=('has_ingredient', 'sum')).reset_index()
            filter_ingredient = pivot.loc[pivot['sum_has_ingredients']==len(options), 'name'].to_list()
    
    # source filter
    source_options = st.multiselect(
        'Filter by source:',recipes['source'].unique())      
    if len(source_options)==0:
        filter_source = recipes['name'].to_list()
    else:
        filter_1 = recipes['source'].isin(source_options)
        filter_source = recipes.loc[filter_1, 'name'].to_list()
    
    # type filter
    type_options = st.multiselect(
        'Filter by type:',recipes['recipe_type'].unique())      
    if len(type_options)==0:
        filter_type = recipes['name'].to_list()
    else:
        filter_1 = recipes['recipe_type'].isin(type_options)
        filter_type = recipes.loc[filter_1, 'name'].to_list()

with st.expander("Find recipes by what you can make with your bar at home"):
    # input home bar
    st.session_state.bar_df_edited = st.data_editor(st.session_state.bar_df, disabled=["ingredients"], hide_index=True)
    if st.session_state.bar_df_edited['have'].sum() == 0:
        st.session_state.filter_bar = recipes['name'].to_list()
    else:
        ingredients_joined = ingredients.join(st.session_state.bar_df_edited.set_index('ingredients'), on='ingredients')
        ingredients_joined.replace({'have': {True: 1, False: 0}}, inplace=True)
        pivot = ingredients_joined.groupby('name').agg(sum_needs_ingredients=('have', 'count'), sum_has_ingredients=('have', 'sum')).reset_index()
        filter_1 = (pivot['sum_has_ingredients'] > 0) & (pivot['sum_needs_ingredients'] > 0)
        filter_2 = pivot['sum_needs_ingredients'] == pivot['sum_has_ingredients']
        filter_all = filter_1 & filter_2
        st.session_state.filter_bar = pivot.loc[filter_all, 'name'].to_list()

with st.expander("Find recipes that are similar to one another"):
    similar_select = st.multiselect("Select a recipe:", recipes['name'], max_selections=1)

# Recipes section
st.header("Recipes")
filter_all = list(set(filter_name) & set(filter_ingredient) & set(filter_source) & set(filter_type) & set(st.session_state.filter_bar))

if len(similar_select) > 0:
    st.markdown(f'Recipes sorted with most similar to **{similar_select[0]}** at the top.')
    st.dataframe(
        recipes.set_index('name') \
        .join(recipe_similarity[similar_select], on='name') \
        .reset_index() \
        .sort_values(by=similar_select, ascending=False) \
        .query('name in @filter_all'),
        column_config={similar_select[0]: st.column_config.NumberColumn('% similarity to selected')},
        hide_index=True)
else:
    st.dataframe(recipes[recipes['name'].isin(filter_all)], hide_index=True)