Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
import streamlit as st | |
import json | |
st.set_page_config(layout="wide") | |
# load data | |
f = open('data.json') | |
recipes_json = json.load(f) | |
recipes = pd.DataFrame(recipes_json) | |
ingredients = pd.DataFrame(recipes_json).explode('ingredients') | |
bar_list = ingredients.loc[ingredients['ingredients'].notnull(), 'ingredients'].unique().tolist() | |
bar_dict = [] | |
for item in bar_list: | |
item_dict = {} | |
item_dict['ingredients'] = item | |
item_dict['have'] = False | |
bar_dict.append(item_dict) | |
bar_df = pd.DataFrame(bar_dict) | |
def similarity(ratings, kind='user', epsilon=1e-9): | |
if kind == 'user': | |
sim = ratings.dot(ratings.T) + epsilon | |
elif kind == 'item': | |
sim = ratings.T.dot(ratings) + epsilon | |
norms = np.array([np.sqrt(np.diagonal(sim))]) | |
return (sim / norms / norms.T) | |
pivot = ingredients[['name', 'ingredients']].copy() | |
pivot['count'] = 1 | |
pivot = pivot.set_index(['name', 'ingredients'])['count'].unstack().reset_index() | |
pivot.rename_axis(None, axis=1, inplace=True) | |
pivot.fillna(0, inplace=True) | |
pivot_names = pivot.columns | |
pivot_np = np.array(pivot.set_index('name')) | |
recipe_similarity = pd.DataFrame(similarity(pivot_np, kind='user')) | |
recipe_similarity.columns = pivot['name'].values | |
recipe_similarity.index = pivot['name'].values | |
# manage session state | |
if 'filter_bar' not in st.session_state: | |
st.session_state.filter_bar = recipes['name'].to_list() | |
if 'bar_df' not in st.session_state: | |
st.session_state.bar_df = bar_df | |
st.session_state.bar_df_edited = st.session_state.bar_df.copy() | |
# recipe finder section | |
st.header("Recipe Finder") | |
with st.expander("Find recipes by name, ingredients, and type"): | |
# name search | |
name_search = st.text_input('Search recipes by name') | |
if name_search == "": | |
filter_name = recipes['name'].to_list() | |
else: | |
filter_1 = recipes['name'].str.contains(name_search.lower()) | |
filter_name = recipes.loc[filter_1, 'name'].to_list() | |
# ingredient filter | |
options = st.multiselect( | |
'Select ingredients to filter by:',ingredients['ingredients'].unique()) | |
filter_type = st.radio( | |
"Specify type of ingredient filtering:", | |
[ | |
'Recipe contains ANY of the specified ingredients', | |
'Recipe contains ALL of the specified ingredients' | |
] | |
) | |
if len(options)==0: | |
filter_ingredient = recipes['name'].to_list() | |
else: | |
if filter_type == 'Recipe contains ANY of the specified ingredients': | |
filter_1 = ingredients['ingredients'].isin(options) | |
filter_ingredient = ingredients.loc[filter_1, 'name'].to_list() | |
elif filter_type == 'Recipe contains ALL of the specified ingredients': | |
filter_1 = ingredients['ingredients'].isin(options) | |
ingredients['has_ingredient'] = 0 | |
ingredients.loc[filter_1, 'has_ingredient'] = 1 | |
pivot = ingredients.groupby('name').agg(sum_has_ingredients=('has_ingredient', 'sum')).reset_index() | |
filter_ingredient = pivot.loc[pivot['sum_has_ingredients']==len(options), 'name'].to_list() | |
# source filter | |
source_options = st.multiselect( | |
'Filter by source:',recipes['source'].unique()) | |
if len(source_options)==0: | |
filter_source = recipes['name'].to_list() | |
else: | |
filter_1 = recipes['source'].isin(source_options) | |
filter_source = recipes.loc[filter_1, 'name'].to_list() | |
# type filter | |
type_options = st.multiselect( | |
'Filter by type:',recipes['recipe_type'].unique()) | |
if len(type_options)==0: | |
filter_type = recipes['name'].to_list() | |
else: | |
filter_1 = recipes['recipe_type'].isin(type_options) | |
filter_type = recipes.loc[filter_1, 'name'].to_list() | |
with st.expander("Find recipes by what you can make with your bar at home"): | |
# input home bar | |
st.session_state.bar_df_edited = st.data_editor(st.session_state.bar_df, disabled=["ingredients"], hide_index=True) | |
if st.session_state.bar_df_edited['have'].sum() == 0: | |
st.session_state.filter_bar = recipes['name'].to_list() | |
else: | |
ingredients_joined = ingredients.join(st.session_state.bar_df_edited.set_index('ingredients'), on='ingredients') | |
ingredients_joined.replace({'have': {True: 1, False: 0}}, inplace=True) | |
pivot = ingredients_joined.groupby('name').agg(sum_needs_ingredients=('have', 'count'), sum_has_ingredients=('have', 'sum')).reset_index() | |
filter_1 = (pivot['sum_has_ingredients'] > 0) & (pivot['sum_needs_ingredients'] > 0) | |
filter_2 = pivot['sum_needs_ingredients'] == pivot['sum_has_ingredients'] | |
filter_all = filter_1 & filter_2 | |
st.session_state.filter_bar = pivot.loc[filter_all, 'name'].to_list() | |
with st.expander("Find recipes that are similar to one another"): | |
similar_select = st.multiselect("Select a recipe:", recipes['name'], max_selections=1) | |
# Recipes section | |
st.header("Recipes") | |
filter_all = list(set(filter_name) & set(filter_ingredient) & set(filter_source) & set(filter_type) & set(st.session_state.filter_bar)) | |
if len(similar_select) > 0: | |
st.markdown(f'Recipes sorted with most similar to **{similar_select[0]}** at the top.') | |
st.dataframe( | |
recipes.set_index('name') \ | |
.join(recipe_similarity[similar_select], on='name') \ | |
.reset_index() \ | |
.sort_values(by=similar_select, ascending=False) \ | |
.query('name in @filter_all'), | |
column_config={similar_select[0]: st.column_config.NumberColumn('% similarity to selected')}, | |
hide_index=True) | |
else: | |
st.dataframe(recipes[recipes['name'].isin(filter_all)], hide_index=True) |