daily log 10.19.20

2 minute read

CURRENT PROJECTS:

  1. CM – catcart (umjs-ecomm-catcart, personalposher, personalposherLIVE)
  2. FEM – rockpaper
  3. UJS – Firecode/Leetcode/System Design

CM REVIEW (from Friday):

  1. Stripe payments cannot be in an extension
  2. The user will have to create an account and a stripe subscription on the web, get a “is subscribed” mark on the mongo db and then sign into the extension
  3. The user will log in to mongo via the extension

Now we’re going to try to do react and mongo



import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import seaborn as sns
%matplotlib inline  

# VISUALIZATION VARIABLES
font = {'family': 'san-serif','size': 14, 'weight': 'normal'}
labelfont = {'family': 'san-serif', 'size': 18}
labelfont_2 = {'family': 'serif', 'size': 12}
titlefont = {'family': 'serif', 'size': 30}

# VISUALIZATION COLORS
gnbu = '"#F7FCF0" "#E0F3DB" "#CCEBC5" "#A8DDB5" "#7BCCC4" "#4EB3D3" "#2B8CBE" "#08589E"'.replace('"','').split(' ')
redgreen = "#D73027" "#F46D43" "#FDAE61" "#FEE08B" "#D9EF8B" "#A6D96A" "#66BD63" "#1A9850".replace('"','').split(' ')
# rdylgn = '"#D7191C" "#FDAE61" "#A6D96A" "#1A9641"'.replace('"','').split(' ')

# new_colors = plt.cm.GnBu(np.linspace(0, 1, 25))
new_colors = plt.cm.RdBu(np.linspace(0, 1, 25))

multiplot_x = FontProperties()
multiplot_x.set_family('serif')
multiplot_x.set_name('Times New Roman')
multiplot_x.set_style('italic')

labelfont2 = {'family': 'serif', 'size': 12}
# rdylgn = '"#cccccc" "#D7191C" "#FDAE61" "#A6D96A" "#1A9641"'.replace('"','').split(' ')
# color_dict = dict({0: "#cccccc",1:'#D7191C',2:'#FDAE61',3: '#A6D96A',4: '#1A9641'})
# rdylgn = list(reversed(rdylgn))


import colorsys

def _get_colors(num_colors):
    colors=[]
    for i in np.arange(0., 360., 360. / num_colors):
        hue = i/360.
        lightness = (50 + np.random.rand() * 10)/100.
        saturation = (90 + np.random.rand() * 10)/100.
        colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))
    return colors

my_colors = _get_colors(17)
my_colors

# ===============================================================
# ===============================================================
# GRAPHING THE DATA
# ===============================================================

plt.figure(figsize=(20,20), dpi=500)
def make_stacked_bar_chart(sm_df, _y, _title, _xlabel, _ylabel, min_palette=False, ):
    sm_df_g = sm_df.groupby(
        [_y, 'dept_binned']).size().reset_index().pivot(
        columns='dept_binned', index=_y, values=0)
    sm_df_g.fillna(0, inplace=True)

    sm_df_g.loc[:,'Total'] = sm_df_g.sum(axis=1)
    sm_df_g.sort_values(by='Total', ascending=True, inplace=True)

    subset = sm_df_g.columns[:-1]
    sm_df_g = sm_df_g[subset]
    if min_palette:
        sm_palette = rdylgn[len(subset)-1:]
        customPalette = sns.set_palette(sns.color_palette(sm_palette))
    else:
        customPalette = sns.set_palette(sns.color_palette(my_colors))
    fig = plt.figure()
    
    ax = sm_df_g.plot.barh(stacked=True, figsize=(16,9))
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    col_dict = dict(zip(range(0,len(sm_df.values)),[0]*len(sm_df_g.values)))
    for col in sm_df_g.columns:
        for i, v in enumerate(sm_df_g[col]):
            col_dict[i] = col_dict[i] + v
            new_v = col_dict[i]
            v_text = str(round(v))
            if(v>5):
                ax.text(new_v-len(v_text), i,v_text+ "  ", color='white', va='center', fontweight='bold', fontsize='10')
    plt.title(_title, fontdict=titlefont, y=1.1)
    plt.xlabel(_xlabel,fontdict=labelfont, labelpad=20)
    plt.ylabel(_ylabel, fontdict=labelfont, labelpad=20)
    ax.tick_params(axis='y', labelsize=18)
    for tick in ax.get_yticklabels():
        tick.set_fontname("Georgia")
    plt.show()
    
# ===============================================================
# ===============================================================
# PREPARING THE DATA
# ===============================================================












df = pd.read_csv('VG_P10_ZIPS_V3.csv', encoding='latin-1')
us_zips = pd.read_csv('us-zip-code-latitude-and-longitude.csv', sep=';')

zip_lat = dict(zip(us_zips['Zip'].astype(str), us_zips['Latitude']))
zip_lon = dict(zip(us_zips['Zip'].astype(str), us_zips['Longitude']))

# zip_lat = dict(zip(us_zips['Zip'], us_zips['Latitude']))
# zip_lon = dict(zip(us_zips['Zip'], us_zips['Longitude']))
def get_city(city_str):
    try:
        return city_str.split(',')[0]
    except:
        return 'no data'
        
def get_state(city_str):
    try:
        return city_str.split(',')[1].split(' ')[1]
    except:
        return 'no data'

        
def get_lat(zip_code):
    zc = str(zip_code)
    while len(zc) < 5:
        zc = '0'+zc
        
#     zc = zip_code
    try:
        return zip_lat[zc]
        
    except:
        print(zc, zip_code)
        return 'no data'


def get_lon(zip_code):
    zc = str(zip_code)
#     zc = zip_code
    try:
        return zip_lon[zc]
    except:
        return 'no data'
    
df['city'] = df.apply(lambda x: get_city(x['City, State Zip Code (Formatted)']), axis=1)
df['state'] = df.apply(lambda x: get_state(x['City, State Zip Code (Formatted)']), axis=1)
df['lat'] = df.apply(lambda x: get_lat(x['Zip Code (Formatted)']), axis=1)
df['lon'] = df.apply(lambda x: get_lon(x['Zip Code (Formatted)']), axis=1)

dept_binned = pd.read_csv('P7_TIME_ALLOCATION/VG_TIME__DATA_DEPT_NAMES_AND_BINS_V3.csv')
dept_binned = dict(zip(dept_binned['currently'], dept_binned['kendra']))

df['Dept'] = df['Org Level 1'].str.strip()

def get_dept_binned(dept):
    
    try:
        return dept_binned[dept]
    except:
        print(dept)
df['dept_binned'] = df.apply(lambda x: get_dept_binned(x['Dept']), axis=1)

df = df.dropna(subset=['Zip Code (Formatted)'])
# df = og_df_new.copy()
COLUMN = 'Zip Code (Formatted)'
NUM = 10

column_counts = dict(df[COLUMN].value_counts())
df['threshold'] = df.apply(lambda x: column_counts[x[COLUMN]] > NUM, axis = 1)

df = df[df['threshold'] == True]
df[COLUMN].value_counts()
# make_multi_bar_graph(df, COLUMN, 'ipf_binned')

# df['engineer_dept'] = ['yes' if 'Engineer' 
#                        in x else 'no' for x in df['dept']]
sm_df = df[df['state'] == 'CA']
make_stacked_bar_chart(sm_df, COLUMN, 'Zip Code Distribution (in CA), By Department', 'Total Number of Teammates','Zip Code')
# plt.gcf().set_size_inches(20, 10)
# plt.show()
df.columns


Tags:

Updated: