import numpy as np import pandas as pd import streamlit as st import json st.set_page_config(layout="wide") # load data f = open('data.json') recipes_json = json.load(f) recipes = pd.DataFrame(recipes_json) ingredients = pd.DataFrame(recipes_json).explode('ingredients') bar_list = ingredients.loc[ingredients['ingredients'].notnull(), 'ingredients'].unique().tolist() bar_dict = [] for item in bar_list: item_dict = {} item_dict['ingredients'] = item item_dict['have'] = False bar_dict.append(item_dict) bar_df = pd.DataFrame(bar_dict) def similarity(ratings, kind='user', epsilon=1e-9): if kind == 'user': sim = ratings.dot(ratings.T) + epsilon elif kind == 'item': sim = ratings.T.dot(ratings) + epsilon norms = np.array([np.sqrt(np.diagonal(sim))]) return (sim / norms / norms.T) pivot = ingredients[['name', 'ingredients']].copy() pivot['count'] = 1 pivot = pivot.set_index(['name', 'ingredients'])['count'].unstack().reset_index() pivot.rename_axis(None, axis=1, inplace=True) pivot.fillna(0, inplace=True) pivot_names = pivot.columns pivot_np = np.array(pivot.set_index('name')) recipe_similarity = pd.DataFrame(similarity(pivot_np, kind='user')) recipe_similarity.columns = pivot['name'].values recipe_similarity.index = pivot['name'].values # manage session state if 'filter_bar' not in st.session_state: st.session_state.filter_bar = recipes['name'].to_list() if 'bar_df' not in st.session_state: st.session_state.bar_df = bar_df st.session_state.bar_df_edited = st.session_state.bar_df.copy() # recipe finder section st.header("Recipe Finder") with st.expander("Find recipes by name, ingredients, and type"): # name search name_search = st.text_input('Search recipes by name') if name_search == "": filter_name = recipes['name'].to_list() else: filter_1 = recipes['name'].str.contains(name_search.lower()) filter_name = recipes.loc[filter_1, 'name'].to_list() # ingredient filter options = st.multiselect( 'Select ingredients to filter by:',ingredients['ingredients'].unique()) filter_type = st.radio( "Specify type of ingredient filtering:", [ 'Recipe contains ANY of the specified ingredients', 'Recipe contains ALL of the specified ingredients' ] ) if len(options)==0: filter_ingredient = recipes['name'].to_list() else: if filter_type == 'Recipe contains ANY of the specified ingredients': filter_1 = ingredients['ingredients'].isin(options) filter_ingredient = ingredients.loc[filter_1, 'name'].to_list() elif filter_type == 'Recipe contains ALL of the specified ingredients': filter_1 = ingredients['ingredients'].isin(options) ingredients['has_ingredient'] = 0 ingredients.loc[filter_1, 'has_ingredient'] = 1 pivot = ingredients.groupby('name').agg(sum_has_ingredients=('has_ingredient', 'sum')).reset_index() filter_ingredient = pivot.loc[pivot['sum_has_ingredients']==len(options), 'name'].to_list() # source filter source_options = st.multiselect( 'Filter by source:',recipes['source'].unique()) if len(source_options)==0: filter_source = recipes['name'].to_list() else: filter_1 = recipes['source'].isin(source_options) filter_source = recipes.loc[filter_1, 'name'].to_list() # type filter type_options = st.multiselect( 'Filter by type:',recipes['recipe_type'].unique()) if len(type_options)==0: filter_type = recipes['name'].to_list() else: filter_1 = recipes['recipe_type'].isin(type_options) filter_type = recipes.loc[filter_1, 'name'].to_list() with st.expander("Find recipes by what you can make with your bar at home"): # input home bar st.session_state.bar_df_edited = st.data_editor(st.session_state.bar_df, disabled=["ingredients"], hide_index=True) if st.session_state.bar_df_edited['have'].sum() == 0: st.session_state.filter_bar = recipes['name'].to_list() else: ingredients_joined = ingredients.join(st.session_state.bar_df_edited.set_index('ingredients'), on='ingredients') ingredients_joined.replace({'have': {True: 1, False: 0}}, inplace=True) pivot = ingredients_joined.groupby('name').agg(sum_needs_ingredients=('have', 'count'), sum_has_ingredients=('have', 'sum')).reset_index() filter_1 = (pivot['sum_has_ingredients'] > 0) & (pivot['sum_needs_ingredients'] > 0) filter_2 = pivot['sum_needs_ingredients'] == pivot['sum_has_ingredients'] filter_all = filter_1 & filter_2 st.session_state.filter_bar = pivot.loc[filter_all, 'name'].to_list() with st.expander("Find recipes that are similar to one another"): similar_select = st.multiselect("Select a recipe:", recipes['name'], max_selections=1) # Recipes section st.header("Recipes") filter_all = list(set(filter_name) & set(filter_ingredient) & set(filter_source) & set(filter_type) & set(st.session_state.filter_bar)) if len(similar_select) > 0: st.markdown(f'Recipes sorted with most similar to **{similar_select[0]}** at the top.') st.dataframe( recipes.set_index('name') \ .join(recipe_similarity[similar_select], on='name') \ .reset_index() \ .sort_values(by=similar_select, ascending=False) \ .query('name in @filter_all'), column_config={similar_select[0]: st.column_config.NumberColumn('% similarity to selected')}, hide_index=True) else: st.dataframe(recipes[recipes['name'].isin(filter_all)], hide_index=True)