Hierarchical heatmap

def hierarchy_heatmap(pfile):
    df_h=pd.read_csv(pfile).set_index('id')
    gp_re=gp.sort_values('group', ascending=False)
    gp1_re= gp_re[gp_re['test']==1].index.values
    gp2_re= gp_re[gp_re['test']==2].index.values

    sns.clustermap(df_h.loc[gp1].dropna(), cmap='coolwarm', robust=True)
    plt.savefig('gp1_hmap',dpi=300)
    plt.clf()

    sns.clustermap(df_h.loc[gp2].dropna(), cmap='coolwarm', robust=True)
    plt.savefig('gp2_hmap',dpi=300)
    plt.clf()

Image map

def color_table(df_given,group,title,test):
    df=df_given

    a=df.loc[group]
    x=a.values.astype(int)

    ye=a.shape[0]
    xe=a.shape[1]

    #levels = [0, 1, 2]
    #colors = ['white','orangered', 'mediumspringgreen','dodgerblue']
    levels = [0, 1, 2, 3, 4]
    colors = ['red','dodgerblue','orange','mediumspringgreen',]

    cmap, norm = matplotlib.colors.from_levels_and_colors(levels, colors)

    fig, ax = plt.subplots()

    ax.imshow(x, interpolation='none', cmap=cmap, norm=norm)
    ax.set_title(title+' ( '+str(ye)+' / '+str(xe)+' )')
    ax.set_xlabel('Variants')
    ax.set_ylabel('Samples')

    plt.xticks(np.arange(-0.5, xe+0.5, step=10),labels=range(0,xe+1,10),fontsize=6)
    plt.yticks(np.arange(-0.5, ye+0.5, step=5),labels=range(0,ye+1,5),fontsize=6)
    plt.grid()
    #plt.subplots_adjust(left = 0, bottom = 0, right = 1, top = 1, hspace = 0, wspace = 0)
    #plt.show()
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=0.5)
    plt.savefig('fig_table'+str(test)+'_'+title, bbox_inces='tight', pad_inches=0, dpi=300)
    plt.clf()

Distribution

def dist_plot(all_geno,all_group,df_gene,title):
    df_fcnt=pd.DataFrame(all_geno,index=all_group)
    fig, ax = plt.subplots((len(all_group)-1)//8+1,8)#,constrained_layout=True)

    for i in enumerate(df_fcnt.index):
        rects=ax.flat[i[0]].bar(list(range(len(df_fcnt.columns))),df_fcnt.loc[i[1]],color=('dodgerblue','orange','mediumspringgreen','dodgerblue','orange','mediums\
pringgreen'),label=i[1])
        ttag=i[1].split('_')
        ax.flat[i[0]].set_title(ttag[0]+'_'+ttag[1], fontsize=6,fontweight="bold",position=(0.5, 1.0+0.1))
        #ax.flat[i[0]].axis('off')
        #ax.flat[i[0]].tick_params(labelsize=0)
        ax.flat[i[0]].set_xlabel(df_gene.loc[i[1]],fontsize=6,labelpad=0.5)

        ax.flat[i[0]].tick_params(which='both',labelleft=False,bottom=False,labelbottom=False)

        #ax.flat[i[0]].legend()
        for rect in rects:
            height = rect.get_height()
            ax.flat[i[0]].annotate('{}'.format(height),
                                   xy=(rect.get_x() + rect.get_width() / 2, height),
                                   xytext=(0, 3),  # 3 points vertical offset
                                   textcoords="offset points",
                                   fontsize=5,
                                   ha='center', va='bottom')

    for i in range(len(df_fcnt.index),len(ax.flat)):
        ax.flat[i].bar([0,1,2],[0,0,0])
        ax.flat[i].tick_params(which='both',labelleft=False,bottom=False,labelbottom=False,labelsize=0)
    fig.set_constrained_layout_pads(w_pad=2./72., h_pad=2./72.,
                                    hspace=0., wspace=0.)
    #plt.subplots(constrained_layout=True)
    #plot_children(fig, fig._layoutbox, printit=False)
    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=0.5)
    plt.subplots_adjust(hspace = 1.6)
    plt.savefig('fig_variant_'+title,dpi=300)

    plt.show()

Filtering

def filter_123(group,subgroup,title):
    df_group = df123.loc[group].T
    nonsubgroup=np.setdiff1d(group,subgroup)

    df_gene=df0['gene']
    all_group=[]
    all_geno=[]
    all_geno2=[]
    all_geno3=[]
    all_geno4=[]

    for i in df_group.index:
        vc0=df_group.loc[i].value_counts()
        vg00=0; vg01=0; vg02=0

        if len(vc0.index.values) >= 2:
            if len(subgroup)*0.05 < 2:
                thres=2
            else: thres=len(subgroup)*0.05

            if 1 in vc0: vg00=vc0[1]
            if 2 in vc0: vg01=vc0[2]
            if 3 in vc0: vg02=vc0[3]
            #if vg00>=thres and vg01>=thres and vg02>=thres:
            #################
            if vg01>=thres and (vg00>=thres or vg02>=thres):

                if vg00 < vg02:
                    df_group.loc[i]=df_group.loc[i].replace(1,4)
                    df_group.loc[i]=df_group.loc[i].replace(3,1)
                    df_group.loc[i]=df_group.loc[i].replace(4,3)
                ############

                vc1=df_group.loc[i].value_counts()
                vc2=df_group.loc[i,subgroup].value_counts()
                vc3=df_group.loc[i,nonsubgroup].value_counts()

                vg10=0; vg11=0; vg12=0
                vg20=0; vg21=0; vg22=0
                vg30=0; vg31=0; vg32=0

                if 1 in vc1: vg10=vc1[1]
                if 2 in vc1: vg11=vc1[2]
                if 3 in vc1: vg12=vc1[3]
                if 1 in vc2: vg20=vc2[1]
                if 2 in vc2: vg21=vc2[2]
                if 3 in vc2: vg22=vc2[3]
                if 1 in vc3: vg30=vc3[1]
                if 2 in vc3: vg31=vc3[2]
                if 3 in vc3: vg32=vc3[3]

                all_geno2.append([vg20,vg21,vg22])
                all_geno3.append([vg30,vg31,vg32])
                all_geno4.append([vg20,vg21,vg22,vg30,vg31,vg32])

                all_geno.append([vg10,vg11,vg12])

                all_group.append(vc0.name)

    df_fgeno=df_group.loc[all_group].T

    new_col=[]
    for i in df_fgeno.columns:
        new_col.append(df_gene.loc[i]+'/'+i)

    df_fgeno.columns=new_col

    color_table(df_fgeno,group,title,5)
    dist_plot(all_geno,all_group,df_gene,title+'_all')
    dist_plot(all_geno4,all_group,df_gene,title)

    return df_fgeno

Bar plot

def box_plot(df_plot,fnm):
    plt.figure(num=None, figsize=(10, 6), dpi=300, facecolor='w', edgecolor='k')

    flierprops=dict(marker='o', markersize=2)
    ymax=math.ceil(df_plot.max().max()/10)*10

    ax=sns.boxplot(data=df_plot, flierprops=flierprops, medianprops={'color': 'red'}, color='k', fliersize=2, notch=False, linewidth=0.8)
    plt.setp(ax.artists, edgecolor = 'k', facecolor='w')
    sns.stripplot(data=df_plot, jitter=0.2, size=2, alpha=0.5, color='orange')

    plt.ylim([0,ymax])
    plt.yticks(np.arange(0,ymax+1,2.5).tolist())
    plt.title(kind+fnm)
    plt.grid(ls='--',alpha=0.6)
    plt.tight_layout()
    plt.savefig('./image/'+kind+'_chr'+fnm)
    plt.clf()