daily log 11.19.20
How to scrape data from your favorite recipe blogs so you can sort by ingredients you have and how much time you want to spend!!
To get “ingredients” df (which is what we really want/need), we take the “original” df and simply do this:
WORKS FOR BOTH!!
import requests
from bs4 import BeautifulSoup
# url = "https://www.halfbakedharvest.com/wprm_print/73488"
url = "https://minimalistbaker.com/wprm_print/69445"
def get_recipe_ingredients(url):
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
recipe_name = soup.find_all('h2', 'wprm-recipe-name')[0].text.strip()
recipe_img = soup.find_all('div', 'wprm-recipe-image')[0].find_all('img')[0]['src']
rating = soup.find_all("span", 'wprm-recipe-rating-average')[0].text.strip()
vote_count = soup.find_all("span", 'wprm-recipe-rating-count')[0].text.strip()
ingredients = soup.find_all("li","wprm-recipe-ingredient")
all_ingredients = []
for i in ingredients:
try:
amount = i.find_all("span","wprm-recipe-ingredient-amount")
amount = amount[0].text
except:
amount = 'no amount'
try:
unit = i.find_all("span","wprm-recipe-ingredient-unit")
unit = unit[0].text
except:
unit = 'no unit'
try:
name = i.find_all("span","wprm-recipe-ingredient-name")
name = name[0].text
except:
name = 'no name'
try:
name = i.find_all("span","wprm-recipe-ingredient-name")
name = name[0].text
except:
name = 'no name'
all_ingredients.append({'url': url,
'recipe_name': recipe_name,
'recipe_img': recipe_img,
'amount': amount,
'unit': unit,
'name': name,
'rating': rating,
'vote_count': vote_count})
return all_ingredients
get_recipe_ingredients(url)
FOR HBH
df = pd.read_csv('halfbakedharvest_links_with_ingredients.csv')
df = df[df['ingredients'] != 'no data']
test = df['ingredients']
ing_dict = [y for x in test for y in eval(x)]
ing_df = pd.DataFrame(ing_dict)
ing_df['recipe_id'] = ing_df.apply(lambda x: x['url'].split('/')[-1], axis=1)
ing_df.to_csv('halfbakedharvest_ingredients_only_v2.csv', index=False)
FOR MB
df = pd.read_csv('minimalist_baker_with_yoast.csv')
df = df[df['yoast_data'] != 'no data']
test = df['yoast_data']
ing_dict = [eval(x) for x in test]
ing_df = pd.DataFrame(ing_dict)
CTCI
function URLify(string, length){
console.log(string)
let new_string = ''
for(let i=0; i<length; i++){
if(string[i] == ' '){
new_string += '%20'
} else {
new_string += string[i]
}
}
return new_string
}
var test = "Mr John Smith "
var length = 13
URLify(test, length)
// SPACE: O(N)
// TIME: O(N)
function palindromePermutation(string){
// make the dictionary
let my_dict = {}
for(let i=0;i<string.length;i++){
if(string[i] !== " ") {
if(my_dict[string[i]]){
my_dict[string[i]] = my_dict[string[i]] + 1
} else {
my_dict[string[i]] = 1
}
}
}
let odd_number = 0
for (var key in my_dict) {
console.log(key, my_dict[key])
if (my_dict[key]%2 !== 0){
odd_number += 1
}
}
if (odd_number > 1){
return false
} else {
return true
}
// console.log(my_dict)
}
palindromePermutation('tact coa')
palindromePermutation('aaaatc')