daily log 10.19.20
CURRENT PROJECTS:
- CM – catcart (umjs-ecomm-catcart, personalposher, personalposherLIVE)
- FEM – rockpaper
- UJS – Firecode/Leetcode/System Design
CM REVIEW (from Friday):
- Stripe payments cannot be in an extension
- The user will have to create an account and a stripe subscription on the web, get a “is subscribed” mark on the mongo db and then sign into the extension
- The user will log in to mongo via the extension
Now we’re going to try to do react and mongo
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import seaborn as sns
%matplotlib inline
# VISUALIZATION VARIABLES
font = {'family': 'san-serif','size': 14, 'weight': 'normal'}
labelfont = {'family': 'san-serif', 'size': 18}
labelfont_2 = {'family': 'serif', 'size': 12}
titlefont = {'family': 'serif', 'size': 30}
# VISUALIZATION COLORS
gnbu = '"#F7FCF0" "#E0F3DB" "#CCEBC5" "#A8DDB5" "#7BCCC4" "#4EB3D3" "#2B8CBE" "#08589E"'.replace('"','').split(' ')
redgreen = "#D73027" "#F46D43" "#FDAE61" "#FEE08B" "#D9EF8B" "#A6D96A" "#66BD63" "#1A9850".replace('"','').split(' ')
# rdylgn = '"#D7191C" "#FDAE61" "#A6D96A" "#1A9641"'.replace('"','').split(' ')
# new_colors = plt.cm.GnBu(np.linspace(0, 1, 25))
new_colors = plt.cm.RdBu(np.linspace(0, 1, 25))
multiplot_x = FontProperties()
multiplot_x.set_family('serif')
multiplot_x.set_name('Times New Roman')
multiplot_x.set_style('italic')
labelfont2 = {'family': 'serif', 'size': 12}
# rdylgn = '"#cccccc" "#D7191C" "#FDAE61" "#A6D96A" "#1A9641"'.replace('"','').split(' ')
# color_dict = dict({0: "#cccccc",1:'#D7191C',2:'#FDAE61',3: '#A6D96A',4: '#1A9641'})
# rdylgn = list(reversed(rdylgn))
import colorsys
def _get_colors(num_colors):
colors=[]
for i in np.arange(0., 360., 360. / num_colors):
hue = i/360.
lightness = (50 + np.random.rand() * 10)/100.
saturation = (90 + np.random.rand() * 10)/100.
colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))
return colors
my_colors = _get_colors(17)
my_colors
# ===============================================================
# ===============================================================
# GRAPHING THE DATA
# ===============================================================
plt.figure(figsize=(20,20), dpi=500)
def make_stacked_bar_chart(sm_df, _y, _title, _xlabel, _ylabel, min_palette=False, ):
sm_df_g = sm_df.groupby(
[_y, 'dept_binned']).size().reset_index().pivot(
columns='dept_binned', index=_y, values=0)
sm_df_g.fillna(0, inplace=True)
sm_df_g.loc[:,'Total'] = sm_df_g.sum(axis=1)
sm_df_g.sort_values(by='Total', ascending=True, inplace=True)
subset = sm_df_g.columns[:-1]
sm_df_g = sm_df_g[subset]
if min_palette:
sm_palette = rdylgn[len(subset)-1:]
customPalette = sns.set_palette(sns.color_palette(sm_palette))
else:
customPalette = sns.set_palette(sns.color_palette(my_colors))
fig = plt.figure()
ax = sm_df_g.plot.barh(stacked=True, figsize=(16,9))
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
col_dict = dict(zip(range(0,len(sm_df.values)),[0]*len(sm_df_g.values)))
for col in sm_df_g.columns:
for i, v in enumerate(sm_df_g[col]):
col_dict[i] = col_dict[i] + v
new_v = col_dict[i]
v_text = str(round(v))
if(v>5):
ax.text(new_v-len(v_text), i,v_text+ " ", color='white', va='center', fontweight='bold', fontsize='10')
plt.title(_title, fontdict=titlefont, y=1.1)
plt.xlabel(_xlabel,fontdict=labelfont, labelpad=20)
plt.ylabel(_ylabel, fontdict=labelfont, labelpad=20)
ax.tick_params(axis='y', labelsize=18)
for tick in ax.get_yticklabels():
tick.set_fontname("Georgia")
plt.show()
# ===============================================================
# ===============================================================
# PREPARING THE DATA
# ===============================================================
df = pd.read_csv('VG_P10_ZIPS_V3.csv', encoding='latin-1')
us_zips = pd.read_csv('us-zip-code-latitude-and-longitude.csv', sep=';')
zip_lat = dict(zip(us_zips['Zip'].astype(str), us_zips['Latitude']))
zip_lon = dict(zip(us_zips['Zip'].astype(str), us_zips['Longitude']))
# zip_lat = dict(zip(us_zips['Zip'], us_zips['Latitude']))
# zip_lon = dict(zip(us_zips['Zip'], us_zips['Longitude']))
def get_city(city_str):
try:
return city_str.split(',')[0]
except:
return 'no data'
def get_state(city_str):
try:
return city_str.split(',')[1].split(' ')[1]
except:
return 'no data'
def get_lat(zip_code):
zc = str(zip_code)
while len(zc) < 5:
zc = '0'+zc
# zc = zip_code
try:
return zip_lat[zc]
except:
print(zc, zip_code)
return 'no data'
def get_lon(zip_code):
zc = str(zip_code)
# zc = zip_code
try:
return zip_lon[zc]
except:
return 'no data'
df['city'] = df.apply(lambda x: get_city(x['City, State Zip Code (Formatted)']), axis=1)
df['state'] = df.apply(lambda x: get_state(x['City, State Zip Code (Formatted)']), axis=1)
df['lat'] = df.apply(lambda x: get_lat(x['Zip Code (Formatted)']), axis=1)
df['lon'] = df.apply(lambda x: get_lon(x['Zip Code (Formatted)']), axis=1)
dept_binned = pd.read_csv('P7_TIME_ALLOCATION/VG_TIME__DATA_DEPT_NAMES_AND_BINS_V3.csv')
dept_binned = dict(zip(dept_binned['currently'], dept_binned['kendra']))
df['Dept'] = df['Org Level 1'].str.strip()
def get_dept_binned(dept):
try:
return dept_binned[dept]
except:
print(dept)
df['dept_binned'] = df.apply(lambda x: get_dept_binned(x['Dept']), axis=1)
df = df.dropna(subset=['Zip Code (Formatted)'])
# df = og_df_new.copy()
COLUMN = 'Zip Code (Formatted)'
NUM = 10
column_counts = dict(df[COLUMN].value_counts())
df['threshold'] = df.apply(lambda x: column_counts[x[COLUMN]] > NUM, axis = 1)
df = df[df['threshold'] == True]
df[COLUMN].value_counts()
# make_multi_bar_graph(df, COLUMN, 'ipf_binned')
# df['engineer_dept'] = ['yes' if 'Engineer'
# in x else 'no' for x in df['dept']]
sm_df = df[df['state'] == 'CA']
make_stacked_bar_chart(sm_df, COLUMN, 'Zip Code Distribution (in CA), By Department', 'Total Number of Teammates','Zip Code')
# plt.gcf().set_size_inches(20, 10)
# plt.show()
df.columns