o
    ؾg                     @   s  d dl Zd dlmZ d dlZd dlmZ ddlm	Z	m
Z
mZ ddlmZmZmZmZ ddlmZ dyd	d
Zdd Zdd Zdd Zdd ZdzddZ			d{d d!Z		d|d"d#Z	%	d}d&d'Z		d~d(d)Zd*d+ Z		dd.d/Zd0d1 Zd2d3 Z 	6	7	dd8d9Z!		dd<d=Z"		dd>d?Z#dd@dAZ$	$	ddFdGZ%	E	ddJdKZ&	L	ddMdNZ'	O	ddPdQZ(	S	ddTdUZ)dVdW Z*dddddddXdYdZgdd$d[d\d]d%dddfd^d_Z+d`da Z,dbdc Z-ddde Z.dfdg Z/dhdi Z0djdk Z1dldm Z2dndo Z3	L	r	ddwdxZ4dS )    N)
csr_matrix)AnnData   )extract
rename_net
filt_min_n)get_filterbyexpr_inputsget_min_sample_sizeget_cpm_cutoffget_cpm)compute_es_per_rankFc                 C   sT   | sz	dd l m} W |S  ty   tdw zdd l}W |S  ty)   tdw )Nr   zKmatplotlib is not installed. Please install it with: pip install matplotlib)matplotlib.pyplotpyplot	ExceptionImportError
matplotlib)
return_mplpltmpl r   \/var/www/html/backend_erp/backend_erp_env/lib/python3.10/site-packages/decoupler/plotting.pycheck_if_matplotlib   s   
r   c                  C   &   zdd l } W | S  ty   tdw )Nr   zEseaborn is not installed. Please install it with: pip install seaborn)seabornr   r   )snsr   r   r   check_if_seaborn      
r   c                  C   r   )Nr   zKadjustText is not installed. Please install it with: pip install adjustText)
adjustTextr   r   )atr   r   r   check_if_adjustText"   r   r   c                  C   sN   zdd l } W n ty   tdw ddlm} || j|dk r%td| S )Nr   zCigraph is not installed. Please install it with: pip install igraph)Versionz0.10.0z^igraph version needs to be at least 0.10.0. Please install it with: pip install igraph==0.10.0)igraphr   r   packaging.versionr    __version__)igr    r   r   r   check_if_igraph*   s   r%   c                 C   s>   |d ur|d ur| d ur| j |dd d S tdtdd S )Ntight)bbox_inchesz fig is None, cannot save figure.zax is None, cannot save figure.)savefig
ValueError)figaxsaver   r   r   	save_plot5   s   r-   c                 C   sX   |d u rt j}|d u rt j}| d t |k }t | d t |k }| j||@  } | S )NpvalslogFCs)npinfabsloc)df
sign_limit
lFCs_limitmsk_signmsk_lFCsr   r   r   filter_limits@   s   r9      sourcetargetweight皙?      ?#D62728#1F77B4gray   r:   d   c           #   
   C   s6  t  }t }| j|j}| j|j}| j||f } |j||f }t|	 }	d}|du r9|jdd||d\}}|durt	||||d}tj
t|d ddd }t|d dk }|du rdtd	||d
 |k d}| j|g j|d< t|j|g j |d< |tjt|dd  }t|||d}|r| }nd}|jjdddd||d|d |d|| ni| j|g jj|didd}t|j|g j |d< |tjt|dd  }t|||d}||d< ||j|d |
k|d |	k@ df< ||j|d |
 k|d |	k@ df< |jjdddd|d |d| |j|	ddd |j|
ddd |j|
 ddd |t|d |
k|d |	k@  jddd}|jd| }|d g }t|d |d |jD ]\} }!}"||| |!|" qjt |dkr|j!|t"ddd|d t#||| |r|S dS )a  
    Plot logFC and p-values. If name and net are provided, it does the same for the targets of a selected source.

    Parameters
    ----------
    logFCs : DataFrame
        Data-frame of logFCs (contrasts x features).
    pvals : DataFrame
        Data-frame of p-values (contrasts x features).
    contrast : str
        Name of the contrast (row) to plot.
    name : str, None
        Name of the source to plot. If None, plot classic volcano (without subsetting targets).
    net : DataFrame, None
        Network dataframe. If None, plot classic volcano (without subsetting targets).
    top : int
        Number of top differentially expressed features.
    source : str
        Column name in net with source nodes.
    target : str
        Column name in net with target nodes.
    weight : str, None
        Column name in net with weights. If none, set to None.
    sign_thr : float
        Significance threshold for p-values.
    lFCs_thr : float
        Significance threshold for logFCs.
    sign_limit : float
        Limit of p-values to plot in -log10.
    lFCs_limit : float
        Limit of logFCs to plot in absolute value.
    color_pos: str
        Color to plot significant positive genes.
    color_neg: str
        Color to plot significant negative genes.
    color_null: str
        Color to plot rest of the genes.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Nr   figsizedpir;   r<   r=   r=   )ddof   r   z%If net is given, name cannot be None.r;   r<   r/   r.   axisr5   r6   coolwarmF)xyccmapvminvmaxsharexr+   z	{0} | {1}rP   rQ   rR   rV   r+   z{0}--blackrQ   	linestylecolorrP   r[   r\   	ascending-log10(pvals)-
arrowstyler\   
arrowpropsr+   )$r   r   indexintersectioncolumnsr3   r0   log10subplotsr   stdr2   anyr)   	set_indexTpdisnullr9   plotscatter	set_titleformatrenameaxhlineaxvlinesort_valuesiloc
set_ylabelzipappendtextlenadjust_textdictr-   )#r/   r.   contrastnamenettopr;   r<   r=   sign_thrlFCs_thrr5   r6   	color_pos	color_neg
color_nullrG   rH   r+   
return_figr,   r   r   rf   rh   r*   max_nhas_negr4   rT   signstextsrP   rQ   sr   r   r   plot_volcanoP   sh   :"$,
 r   c                 C   s  t  }t }t| }|  }|| |d< t||  |d< t|||d}|
|d< |d |k|d |k@ }|d | k|d |k@ }||j|df< |	|j|df< d}|du rc|jdd||d\}}|jj	dddd|d	 |
d
 |j|ddd |j|ddd |j| ddd |||B  jddd}|jd| }|d g }t|d |d |jD ]\}}}||||| qt|dkr|j|tddd|d t||| |r|S dS )a  
    Plot logFC and p-values from a long formated data-frame.

    Parameters
    ----------
    data : pd.DataFrame
        Results of DEA in long format.
    x : str
        Column name of data storing the logFCs.
    y : str
        Columns name of data storing the p-values.
    top : int
        Number of top differentially expressed features to show.
    sign_thr : float
        Significance threshold for p-values.
    lFCs_thr : float
        Significance threshold for logFCs.
    sign_limit : float
        Limit of p-values to plot in -log10.
    lFCs_limit : float
        Limit of logFCs to plot in absolute value.
    color_pos: str
        Color to plot significant positive genes.
    color_neg: str
        Color to plot significant negative genes.
    color_null: str
        Color to plot rest of the genes.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    r/   r.   rN   r=   Nr   rF   FrW   TrX   rY   rZ   r]   r^   r`   r   ra   rb   rd   )r   r   r0   ri   copyr9   r3   rj   rq   rr   set_axisbelowrv   rw   rx   ry   rz   r{   rf   r|   r}   r~   r   r   r-   )datarP   rQ   r   r   r   r5   r6   r   r   r   rG   rH   r+   r   r,   r   r   r4   up_mskdw_mskr*   r   r   r   r   r   r   plot_volcano_df   s@   /

 r   
   r:   r:   c                 C   s  t  }t }| |  } | }||| |k ||g }tj| |gddd} | | dk| | dk@ | | dk | | dk @ B }d| d< d| j|df< d}|
du r\|jdd||	d	\}}
| jj	||d|
d
 |

  |
d |
jdddd |
jdddd |
| t| | | |   |j}| j|ddf } g }t| | | | | jD ]\}}}||
||| qt|dkr|j|tddd|
d t||
| |r|S dS )a  
    Plot the weight and statistic of the target genes of a given source.

    Parameters
    ----------
    data : pd.DataFrame
        Results of DEA in long format.
    stat : str
        Column name of data storing feature statistics.
    source_name : str
        Name of source to plot.
    net : DataFrame
        Network dataframe..
    source : str
        Column name in net with source nodes.
    target : str
        Column name in net with target nodes.
    weight : str, None
        Column name in net with weights.
    top : int
        Number of features to show labels.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    r   inner)rM   joinr   rA   r\   r@   NrF   )rP   rQ   rR   r+   TrX   rY   rZ   r]   ra   rb   rd   )r   r   r   rm   ro   concatr3   rj   rq   rr   gridr   rv   rw   rs   r0   r2   rx   tailrf   r{   r|   r}   r~   r   r   r-   )r   statsource_namer   r;   r<   r=   r   rG   rH   r+   r   r,   r   r   wposr*   	top_namesr   rP   rQ   r   r   r   r   plot_targets8  s:   )0

" r   c                 C   s   t  }t }t| |d\}}}t||jd }t|tr$| 	 }n|	 }d}|du r9|j
dd||d\}}|r@t|}|j||||	d |durU|j|ddd |jd	d
d |dure|| |durn|| t||| |
rx|S dS )a	  
    Plot distribution of features' values per sample.

    Parameters
    ----------
    mat : list, DataFrame or AnnData
        List of [features, matrix], dataframe (samples x features) or an AnnData instance.
    thr : float
        Threshold to plot horizontal line.
    log : bool
        Whether to log1p the data or not.
    use_raw : bool
        Use raw attribute of mat if present.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    title : str
        Text to write as title of the plot.
    ylabel : str
        Text to write as ylabel of the plot.
    color : str
        Color to plot the violins.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    )use_rawr   NrF   )rP   rQ   r+   r\   rX   rY   rZ   rP   Z   rM   rotation)r   r   r   r0   repeatshape
isinstancer   toarrayflattenrj   log1p
violinplotrv   tick_paramsrs   rz   r-   )matthrlogr   rG   rH   r+   titleylabelr\   r   r,   r   r   mrrR   rP   rQ   r*   r   r   r   plot_violins  s0   '



r   c                 C   sV   | d u r|  } |d u r| }|d u r| }| |kr| } ||kr&|  }| ||fS N)minmaxmean)rT   vcenterrU   valuesr   r   r   
set_limits  s   
r      rO   c                 C   s  t  }t }tdd}tt|  rtd| j|g }d|j_d|j	_|j
ddtt|jd f j| j|diddd}|rNd	\}}nd
\}}d}|
du rc|jdd||	d\}}
|j||||
d |rtdd |
jd D }|
d |
d ntdd |
jd D }|
jddd |
d |
d t||||d \}}}|jj|||d}||}|||}t|
jd |D ]	\}}|| q|j j!||d}|"g  |
# j$||
d t%||
| |r|S dS )a`  
    Plot barplots showing the top absolute value activities.

    Parameters
    ----------
    acts : DataFrame
        Activities obtained from any method.
    contrast : str
        Name of the contrast (row) to plot.
    top : int
        Number of top features to plot.
    vertical : bool
        Whether to plot verticaly or horizontaly.
    cmap : str
        Colormap to use.
    vmin : float, None
        The value representing the lower limit of the color scale.
    vcenter : float, None
        The value representing the center of the color scale.
    vmax : float, None
        The value representing the upper limit of the color scale.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Tr   z&Input acts contains non finite values.Nr   actsr   rL   )r   rf   )rf   r   rF   )r   rP   rQ   r+   c                 S      g | ]}|  qS r   )	get_width.0barr   r   r   
<listcomp>@      z plot_barplot.<locals>.<listcomp>Activity c                 S   r   r   )
get_heightr   r   r   r   r   D  r   rP   r   r   rT   r   rU   )rS   norm)r+   )&r   r   r0   rl   isfiniter)   r3   rf   r   rh   ry   argsortr2   r   rn   r   reset_indexru   rx   rj   barplotarray
containers
set_xlabelrz   r   r   colorsTwoSlopeNormget_cmapr{   set_facecolorcmScalarMappable	set_array
get_figurecolorbarr-   )r   r   r   verticalrS   rT   r   rU   rG   rH   r+   r   r,   r   r   r   r4   rP   rQ   r*   sizesdivnormcmap_f
div_colorsr   r\   smr   r   r   plot_barplot  sZ   )








r   c                 C   sx   t |tu r	|g}|d u rt| jd dg}d g}||fS g }| | j}t|}|D ]}||k}|| q,||fS )Nr   T)typestrr0   fullr   r   uniquer|   )r4   groupbymskscatssubcatmskr   r   r   
build_msks^  s   

r   c                 C   sJ   |d ur	|  | |d u r| }| | |d u r| }| | d S r   )rs   upperr   rz   )r+   r   xlabelr   rP   rQ   r   r   r   write_labelsr  s   

r   aurocauprcTrY   c                    s  t  }t }t| |\}}d}g }t||D ]f\}}| | jddgddjdd jdddd }|| j|| j|d j du rQ|j	d	d	||d
\}  
d  jdd  jd	|d |r| fddttD }|| q|dur|r jdddd |rtt  t  g}tt  t  g} ||  || |r|j|tddd d t |
|||| t| | |r|S dS )a  
    Plot scatter plot of metrics across two different axes.

    Parameters
    ----------
    df : DataFrame
        Performance metrics per method, obtained by running run_benchmark.
    x : str
        Name of the metric to plot in the x axis.
    y : str
        Name of the metric to plot in the y axis.
    groupby : str
        Metrics can be gruped by an extra categorical column.
    show_text : bool
        Whether to plot text labels.
    show_legend : bool
        Whether to plot the legend.
    mirror_xy : bool
        Whether to make x and y axis have the same values.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    title : str
        Text to write as title of the plot.
    xlabel : str
        Text to write as xlabel of the plot.
    ylabel : str
        Text to write as ylabel of the plot.
    color : str
        Color to plot the dots.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    NmethodmetricTobservednumeric_onlyscorerf   rh   r   r   rF   r   zorder)r   labelc                    s*   g | ]} j | | | d dqS )rK   r   )r}   )r   ir+   s_valsx_valsy_valsr   r   r     s   * z(plot_metrics_scatter.<locals>.<listcomp>center leftr   r?   Fr3   bbox_to_anchorframeonra   rB   rb   rd   )r   r   r   r{   r   r   r   pivotr   rj   r   r   rr   ranger~   extendlegendr0   r   
get_xticks
get_yticksr   set_xlimset_ylimr   r   r   r-   )r4   rP   rQ   r   	show_textshow_legend	mirror_xyrG   rH   r+   r   r   r   r\   r   r,   r   r   r   r   r*   r   r   r   r   r}   r   min_nr   r  r   plot_metrics_scatter~  sT   0




 
""r     r      c
                 C   s  t  }
t| | j}tt|j| }t| | j}t| d j}|D ][}|D ]V}| | | |k| | |k@  jd dk}|r|D ]<}|dksN|dkrctj	|||tj
tj
gg||dddgd}ntj	||d|tj
tj
gg||ddddgd}t| |g} qDq+q'|
j||||dddd	\}}| }t|jD ])}|| }|t|k r|| }t| | | |k |||d
d
d
||d	 q|d q|t|d   \}}|j||ddd
d t|||	 |r|S dS )a  
    Extension of the function plot_metrics_scatter to group metrics by two categories at the same time.

    Parameters
    ----------
    df : DataFrame
        Performance metrics per method, obtained by running run_benchmark.
    col : str
        Name of the group column to group by. Each of its categories will become a subplot.
    x : str
        Name of the metric to plot in the x axis.
    y : str
        Name of the metric to plot in the y axis.
    groupby : str
        Name of the group column to additionaly group by. Each of its categories will appear in the legend.
    n_cols : int
        Number of columns per row.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    r   r   r   r   ci)rh   r   T)rG   rH   tight_layoutrV   shareyF)rP   rQ   r   r  r  r  r+   r   offr   r  r  r  N)r   r0   r   r   intceilsizer   ro   	DataFramenanr   rj   ravelr  r~   r  rM   get_legend_handles_labelsr  r-   )r4   colrP   rQ   r   n_colsrG   rH   r   r,   r   r   n_rowsgroupbysmetricsr   grpbyis_emptyr   tmpr*   axesr   r+   handleslabelsr   r   r   plot_metrics_scatter_cols  sD   #&*r0  c                 K   s  t  }t }|dvrtd| | d |k } d}|du r'|jdd||d\}}|d |jdd	 t|tu r||d
kr|| jd
|gddj	dd
 jd
dd dj}|jdd
d|| ||t| |  d| | dur{|jdddd n%|du r| d
gj	dddj}|jdd
d| ||d| ntd|jddd |dur|| |du rd}|| |du r| }|| t|||
 |	r|S dS )ao  
    Plot boxplots showing the distribution of scores between methods for a metric.

    Parameters
    ----------
    df : DataFrame
        Performance metrics per method, obtained by running run_benchmark.
    metric : str
        Name of metric to plot, must be either "mcauroc", "mcauprc", "rank" or "nrank".
    groupby : str
        Metrics can be gruped by an extra categorical column.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    title : str
        Text to write as title of the plot.
    xlabel : str
        Text to write as xlabel of the plot.
    ylabel : str
        Text to write as ylabel of the plot.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.
    kwargs : dict
        Other keyword arguments are passed through to seaborn.boxplot().

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    )mcaurocmcauprcranknrankrecallzGArgument metric must be either "mcauroc", "mcauprc", "rank" or "nrank".r   Nr   rF   Tr   r   r   r   r   r   )rP   rQ   huer   r+   order	hue_orderr  r  Fr  )rP   rQ   r   r+   r7  z6Argument groupby must be a string and not be "method".rP   r   r   Methodsr   )r   r   r)   rj   r   r   r   r   r   r   r   r   rx   rf   boxplotr0   sortr   
get_legendr  r   rs   r   r   rz   r-   )r4   r   r   rG   rH   r+   r   r   r   r   r,   kwargsr   r   r*   r7  r   r   r   plot_metrics_boxplot7  sl   '




r>  c              
   K   s  t  }t }	| j }
t|
d |
d< t|
d |
d< t|tu r+|dur+tdt|tu r{|	j	dt
|||dd\}}| }t||D ]1\}}|jdd	 |d |jddd|||
dd
| |jddd|d |d |d qHn>d}|du r|	j	dd||d\}}|jdd	 |d |jddd|||
dd
| |jddd|d |d |d t||| |r|S dS )ae  
    Quality Control plot to assess the quality of the obtained pseudobulk samples.

    Parameters
    ----------
    adata : AnnData
        AnnData obtained after running ``decoupler.get_pseudobulk``.
    groupby : str, list
        Name of the ``.obs`` column to group by. Can also be a list of columns.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.
    kwargs : dict
        Other keyword arguments are passed through to seaborn.scatterplot().

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    psbulk_n_cellspsbulk_countsNz1If a grupby is a list of columns ax must be None.r   TrG   rH   r  r   r   )rP   rQ   r6  r+   r   r   r  r  F)r3   r  r	  r   zLog10 number of cellsLog10 total sum of countsrF   r   )r   r   obsr   r0   ri   r   listr)   rj   r~   r#  r{   r   r   scatterplotr  r   rz   r-   )adatar   rG   rH   r+   r   r,   r=  r   r   r4   r*   r-  grpr   r   r   plot_psbulk_samples  s>   


	


rH     ffffff?viridisr:   r  c              
   K   s8  t  }t }t| |\}}}|du rtj|dd}t||||}|d8 }|d8 }t||}t||}tj||kdd}tj|dd}tj||dk < d}|du rZ|j	dd|	|
d\}}|j
dt|||dtd	d
dd|d| |j|d ddd |jt|ddd |d |d t||| |r|S dS )a  
    Plot to help determining the thresholds of the ``decoupler.filter_by_expr`` function.

    Parameters
    ----------
    adata : AnnData
        AnnData obtained after running ``decoupler.get_pseudobulk``.
    obs : DataFrame, None
        If provided, metadata dataframe, only needed if ``adata`` is not an ``AnnData``.
    group : str, None
        Name of the ``.obs`` column to group by. If None, it assumes that all samples belong to one group.
    lib_size : int, float, None
        Library size. If None, default to the sum of reads per sample.
    min_count : int
        Minimum count requiered per gene for at least some samples.
    min_total_count : int
        Minimum total count required per gene across all samples.
    large_n : int
        Number of samples per group that is considered to be "large".
    min_prop : float
        Minimum proportion of samples in the smallest group that express the gene.
    cmap : str
        Colormap to use.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.
    kwargs : dict
        Other keyword arguments are passed through to ``sns.histplot``.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Nr   rL   g+=r         ?rF   Tg      ?zNumber of features)shrinkr   )FT)rP   rQ   rS   cbarcbar_kwsdiscreter+   r?   rB   rX   )rQ   rR   lsrP   rR   rR  rB  zNumber of samplesr   )r   r   r   r0   sumr	   r
   r   r"  rj   histplotri   r   rv   rw   r   rz   r-   )rF  rC  grouplib_size	min_countmin_total_countlarge_nmin_proprS   rG   rH   r+   r   r,   r=  r   r   rQ   	var_namesmin_sample_size
cpm_cutoffcpmsample_sizetotal_countsr*   r   r   r   plot_filter_by_expr  s<   ,




rb  皙?rK   c	                 K   s   t  }
d}t| trl| j }dt|v r$| jd }t|tjr#|j}nt	|t
j||kdd}d}|du rB|
jdd||d\}}|j|fdd	d
|	}|j|ddd |d |d t||| |rj|S dS t	|)a6  
    Plot to help determining the thresholds of the ``decoupler.filter_by_prop`` function.

    Parameters
    ----------
    adata : AnnData
        AnnData obtained after running ``decoupler.get_pseudobulk``. It requieres ``.layer['psbulk_props']``.
    min_prop : float
        Minimum proportion of cells that express a gene in a sample.
    min_smpls : int
        Minimum number of samples with bigger or equal proportion of cells with expression than ``min_prop``.
    cmap : str
        Colormap to use.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.
    kwargs : dict
        Other keyword arguments are passed through to ``matplotlib.pyplot.hist``.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    zadata must be an AnnData object that contains the layer 'psbulk_props'. Please check the
            function decoupler.get_pseudobulk.psbulk_propsr   rL   Nr   rF   TrB   )r   r\   rY   rX   rS  z#Number of samples where >= min_propzNumber of genes)r   r   r   layerskeysrD  ro   r!  r   r)   r0   rT  rj   histrw   r   rz   r-   )rF  r[  	min_smplsrS   rG   rH   r+   r   r,   r=  r   msg
layer_keyspropsnsmplsr*   _r   r   r   plot_filter_by_propC  s.   "




rn  RdBu_rc           &      C   s,  t  }t dd}G dd d|jj}| jjd}| | j}t|}|| }|| }t|||dd}t	||dd	}||d
 |k }t
| }|| }|| }t|j}t||d }|j}| }d||  }t|tj|tj|t|\}}}t|}|| }|dkr||||k  }n||||k  }|jdddg di|d|d\}}| }|d } | d | j||ddd | j|| ddd | jdddd | d | | |d } | d | g  | g  | j|| ddddd |d } | d | g  | g  tt |d}!tt!|d}"||!d|"d}#| j"|tj#ddf d|#|d | $d|jd  |d } | d | j%||ddd  ||dk }$|$jdkrh|d! }%n|$d! d }%| j|%ddd | &d" | d# |j'ddd$ t(|| |
 |	r||fS dS )%a)  
    Plot the running score of GSEA.

    Parameters
    ----------
    df : DataFrame
        Long format DataFrame with features to be ranked. Assumes features are indexes.
    stat : str
        Name of the column containing the ranking metric.
    net : DataFrame
        Network in long format.
    set_name : str
        Name of the feature set to plot.
    source : str
        Column name in net with source nodes.
    target : str
        Column name in net with target nodes.
    cmap : str
        Colormap to use.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure
        Returns Figure object.
    le_c: ndarray
        List of leading edge features. If ES is positive, these are the top ranked features. If ES is negative, these are
        the bottom ranked features.
    Tr   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	z-plot_running_score.<locals>.MidpointNormalizeNFc                    s   || _ t ||| d S r   )r   super__init__)selfrT   rU   r   clip	__class__r   r   rq    s   z6plot_running_score.<locals>.MidpointNormalize.__init__c                 S   s0   | j | j| jgg d}}tjt|||S )N)r   r?   r   )rT   r   rU   r0   mamasked_arrayinterp)rr  valuers  rP   rQ   r   r   r   __call__  s   z6plot_running_score.<locals>.MidpointNormalize.__call__)NNNFr   )__name__
__module____qualname__rq  rz  __classcell__r   r   rt  r   MidpointNormalize  s    r  UNrI   r   )r  r;   r<   rM  r  r   height_ratios)r  r?   r?   rK   )gridspec_kwrG   rV   rH           z#88c544rK   )r\   	linewidthrX   r[   r\   zEnrichment Scorer?   )r  r\   b   r   )
rasterizedr   rS      z#C9D3DB)y1y2r\   RankzRanked metric)wspacehspace))r   r   	Normalizerf   r   astyper0   r   r   r   r   aranger   isinrT  r   float32int64boolsignrj   r#  marginsrq   rw   rv   rz   rs   set_yticklabels
set_yticksvlines
percentiler   r   
pcolormeshnewaxisr  fill_betweenr   subplots_adjustr-   )&r4   r   r   set_namer;   r<   rS   rG   rH   r   r,   r   r   r  rR   r   r   snetidxrnksset_msk
n_features	nf_in_setdecmx_valuejesr  set_rnksle_cr*   r-  r+   rT   rU   midnormnon_zero_rnkszero_rnkr   r   r   plot_running_score  s   (


*
"












r  r  r  c                 K   s   t  }| | j }| | j}t|}|| }|| }d}|	du r-|jdd||d\}}	|	jd	|||d| |	| |	| |	j	dd |durU|	j
|d|d |dur^|	| t||	| |
rh|S dS )
aN  
    Plot results of enrichment analysis as bars.

    Parameters
    ----------
    df : DataFrame
        Results of enrichment analysis.
    x : str
        Column name of ``df`` to use as continous value.
    y : str
        Column name of ``df`` to use as labels.
    color : str
        Color to plot the bars.
    thr : float, None
        If specified, x value where to draw a vertical dashed line.
    thr_color : str
        Color of the vertical dashed line.
    title : str, None
        Text to write as title of the plot.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.
    kwargs : dict
        Other keyword arguments are passed through to ``matplotlib.pyplot.barh``.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Nr   rF   )rQ   widthr\   g?)rQ   rX   r  r   )r   r   r   r0   r   rj   barhr   rz   r  rw   rs   r-   )r4   rP   rQ   r\   r   	thr_colorr   rG   rH   r+   r   r,   r=  r   x_valy_valr  r*   r   r   r   plot_barplot_df  s*   )




r  	viridis_rr  r:   c              	      s`  t   | | j}|dur| | j}n| jj}| | j}| | j}t|}|| }|| }|| }|| } jdd||	d\}}
|  jd  d }|
jdd |
j|||||d}|
	d	 |

| |jd
dd fddd\}}|
j|||ddddd |j|dddddd}|jj|dd |
jddd |dur|
| t||
| |r|S dS )a
  
    Plot results of enrichment analysis as dots.

    Parameters
    ----------
    df : DataFrame
        Results of enrichment analysis.
    x : str
        Column name of ``df`` to use as continous value.
    y : str
        Column name of ``df`` to use as labels.
    c : str
        Column name of ``df`` to use for coloring.
    s : str
        Column name of ``df`` to use for dot size.
    scale : int
        Parameter to control the size of the dots.
    cmap : str
        Colormap to use.
    title : str, None
        Text to write as title of the plot.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    ax : Axes, None
        A matplotlib axes object. If None returns new figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Nr   rF   lines.markersizerK   rP   rL   )rP   rQ   rR   r   rS   Tr   r  z{x:.2f}c                    s   t |  jd   S )Nr  )r0   sqrtrcParams)r   r   scaler   r   <lambda>  s    zplot_dotplot.<locals>.<lambda>)propnumfmtfuncF)rM  g?z
upper leftrM  )r   r	  r  r3   labelspacingg      ?r   r   )rM  rc  right)rN  aspectorientationanchorlocationleft)r3   皙?)rP   rQ   )r   r   rf   r0   r   rj   r  r   rr   r   r   legend_elementsr  r   r+   rs   r  r-   )r4   rP   rQ   rR   r   r  rS   r   rG   rH   r+   r   r,   r  r  c_valsr  idxsr*   nsrr   r.  r/  clbr   r  r   plot_dotplotf  sn   )







r  c                    sF  d}t | drt | dst | dst | dsJ ||d us"J d|| j v s-J d| j| }	||	jv s;J d|d urG|	|	j| }	|d urS|	|	j| }	|d usc|d	u sc|d uscJ d
|d uri|sw|d urq|d usw|r{|d ur{td|d urt | dsJ d|| j v sJ dt	| j| t
jr| j| }
|	|
fS |dddddddd  fddt| j| jd D }t
j| j| | jj|d}
|	|
fS |d urt | dsJ d|| j v sJ dt
j| j| | jj| jjd}
|	|
fS |rt | dsJ dt
j| j| jj| jjd}
|	|
fS )NzXdata sould be an AnnData/MuData object with an .uns, and .obsm, .X, or .layers attributeunsobsmXre  zuns_key must be specifiedzuns_key not found in data.unsz8stat_col must be one of the columns in the anova resultsTz2one of obsm_key, use_X, or layer must be specifiedz9only one of obsm_key, use_X, or layer should be specifiedz)the data does not have an .obsm attributezobsm_key not found in data.obsmX_r   pcaPCmofaFactorumapUMAPc                    s   g | ]
}d   d| qS )z{0}{1}r   )rt   )r   rP   column_namer   r   r         z-_check_assoc_plot_intputs.<locals>.<listcomp>r   rf   rh   z*the data does not have a .layers attributezlayer not found in data.layersz%the data does not have a .X attribute)hasattrr  rf  rh   variabler  factorr)   r  r   ro   r!  replacer  r   rC  rf   re  varr  )r   associationscolsuns_keyobsm_keyuse_Xlayerstat_colri  statsr   rh   r   r  r   _check_assoc_plot_intputs  sF   0
 (
$"
r  p_adjScoresStatsPurplesBrBGSet2c           $   	   C   s  zddl }ddlm} ddlm} ddlm} W n ty#   tdw t| |||||||\}}|d 	 }|j
dd|djjdd|f }d|j_d|j_|dv rYt| }|jdd|jd| f  }|jdd|jf  }|j||d	d
dd}|j|	d
 dd |  |d |||j |jt|
 k}|j|dd|t|
d}|j|dd |j|j|dd	dd}|j|	d dd |  |d |D ]}t| j| j}|j |||d}|j|ddd q|!||j |d | } | jdddd | "  |s|dur|#  | j$ddd}!|d d
d
d!d"d|!j%d#}"|!&|" | j$ddd}!|d d
d
d!d"d|!j%d#}"|!&|" | j'}#|#(|d  |#)|d
  |#*| t+|#d$| |rj|#S dS )%a2	  
    Create a composite plot displaying association results between scores (bottom) and summary statistics
    (top) using a clustermap.

    Requires PyComplexHeatmap to be installed.

    Parameters:
    ------------
    data : AnnData or MuData
        The input data containing the association results in .uns[uns_key] and the underlying data in
        .obsm[obsm_key], .X or .layers[layer].
    uns_key : str, optional
        Key in `data.uns` where the association statistics are stored.
    associations : list, optional
        List of association names to be plotted. If None, all associations will be plotted (default is None).
    cols : list, optional
        List of columns to be plotted (i.e. columns of .obsm/rows of .var). If None, all columns will be
        plotted (default is None).
    obsm_key : str, optional
        Key of `data.obsm` used to plot the bottom clustermap. Either `obsm_key`, `use_X`, or `layer` must be specified.
    use_X : bool, optional
        Boolean indicating whether to use the data in `.X` for the bottom clustermap. Either `obsm_key`, `use_X`,
        or `layer` must be specified.
    layer : str, optional
        Key of `data.layers` used to plot the bottom clustermap. Either `obsm_key`, `use_X`, or `layer` must be specified.
    stat_col : str, optional
        Name of the summary statistic column in `data.uns[uns_key]` to be shown in the top clustermap (default is 'p_adj').
    titles : list, optional
        A list of two strings representing the titles for the ClusterMap for scores and statistics,
        respectively (default is ['Scores', 'Stats']).
    sign_thr : float
        Significance threshold for p-values.
    n_factors : int
        Number of factors (score features) to plot.
    cmap_stats : str
        Colormap to use for stats values.
    cmap_scores : str
        Colormap to use for scores values.
    cmap_cats : str
        Colormap to use for ``obs_annotation_cols`` categories.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    r   N)	RectanglezImarsilea is not installed. Please install it using "pip install marsilea"r  r  r   )pvalr  r  r   h1)rS   r  heightr   center)r   alignr  *z	{0} < {1})markerr   r  )r   g?h2)rS   r   r  rc  )padr   r  rowrK   )sidestack_by
stack_size)
board_nameax_name)r   r   Fz.1)filleclw	transformT),marsileamarsilea.plotterplottermatplotlib.patchesr  r   r   r   r  r   r
  rn   r3   rf   r   rh   r0   ri   r   Heatmap	add_titleadd_legendsadd_dendrogram	add_rightLabelsr   
MarkerMeshrt   r   	add_layerrD  rC  Colors
add_bottomrendercloseget_ax	transAxes
add_artistfigureset_figwidthset_figheightset_dpir-   )$r   r  r  r  obs_annotation_colsr  r  r  r  titlesr   	n_factors
cmap_statscmap_scores	cmap_catsrG   rH   r   r,   rv  mpr  r   r  r   	col_orderr  sign_mskr  cat_colr   
cat_colorsrR   haxborderr*   r   r   r   plot_associations  sn   ;  




r,  c                 C   sF   t t | j|jg}dd t|D }t || j d }||fS )Nc                 S   s   i | ]\}}||qS r   r   )r   r   kr   r   r   
<dictcomp>  s    z"get_dict_types.<locals>.<dictcomp>r   )r0   r   hstackrh   	enumerater  )actrC  vsv_dicttypesr   r   r   get_dict_types  s   r5  c                 C   sL   g }|j D ]}|j|df |j|df }}| | | | g}|| q|S )Nr;   r<   )rf   r3   r|   )r3  r   edgesr   r;   r<   edger   r   r   net_to_edgelist  s   
r8  c                 C   sp   t  }t| |\}}t||}|j|dd}|d j|jd< ||jd< t| |jd< t	
|dd|jd< |S )	NT)r6  directedr=   r   r   circlesquarer   )r%   r5  r8  Graphr   r  r2  rD  rf  r0   where)r1  rC  r   r$   r3  r4  r6  gr   r   r   get_g  s   

r?  c                 C   sf   t dd}| j }|rtt|}|jj| |d}|S t|}t|}|jj||d}|S )NTr   )rT   rU   )	r   r   r#  r0   r   r2   r   r  r   )r4   r   r   rP   rU   r   rT   r   r   r   get_norm  s   



r@  c                 C   s   t | trt|j| g}|S t | trt|j| }|S t | trD|r5tt|j	d  d |  }|S t|j	d  d |  }|S t
dt| )Nr   zZn_sources needs to be a list of source names or an         integer number, {0} was passed.)r   r   r0   r  rh   rD  r  r   r2   r   r)   rt   r   )	n_sourcesr1  by_abss_idxr   r   r   get_source_idxs  s   

	

rD  c                    s   t | trt|d t| g}|S t | tr%t|d t| }|S t | tr^ fddt|d |d D |d< |rFt|d |d< |j	ddgddgd	j
dgdd
| jj}|S tdt| )Nr<   c                    s0   g | ]\}}| j v r jd  | | nd qS r   )rh   ry   )r   tr   rC  r   r   r     s   0 z#get_target_idxs.<locals>.<listcomp>r=   prodr;   TFr^   r   zZn_targets needs to be a list of target names or an         integer number, {0} was passed.)r   r   r0   r  r  rD  r  r{   r2   rx   r   headrf   r   r)   rt   r   )	n_targetsrC  r   rB  t_idxr   rG  r   get_target_idxs  s.   


$
rL  c                 C   s   t |j| jkr|jjdksJ dt|| |}| jd d |f } |jt |d t	| j
t	  }d|j
vr?d|d< t||||}|j| }|jd d t |j
t	|d t	f }|jt |d t	|j
t	d d f }| ||fS )Nr   z,obs and act need to have the same row index.r;   r=   r<   )r0   allrf   r   rD  ry   r3   r  r  r   rh   r   rL  )r1  rC  r   rA  rJ  rB  rC  rK  r   r   r   get_obs_act_net  s   &(

,,
rN  c                    s   t dd}|j } |v rS|v rS|j  |jg }	t| jd D ](\}
}| jd |
 rA|	||| jd  q'|	 ||| jd  q'd}n fdd| jd D }	d}|	| jd	< |S )
NTr   r   r   r   c                    s   g | ]
}|d kr
 nqS )r  r   )r   typs_cmapt_cmapr   r   r   
  r  zadd_colors.<locals>.<listcomp>Fr\   )r   	colormapsrf  r   r0  r2  r|   r   )r>  r1  rC  s_normt_normrQ  rR  r   cmapsr\   r   r-  is_cmapr   rP  r   
add_colors  s   


rX  darkreddarkblueEnrichment scoreGene expressionkkr   r      c           %         s  t dd}t dd}t }|du rE|du rE| d  d}tjtd|jfdg|d	}| d
  d}tjtd|jfdg|d	}t	||| |||\}}}t
||
d}t
||
d}t|||} fdd|jd D |jd< t|||||||	}|j||dd} |jjddg dd}!| |!ddddf }"| |!d }#| |!d }$|j||"|||d |r| j|jj||d|#d|d | j|jj||	d|$d|d n
|#d |$d t| d| |r| S dS )a  
    Plot results of enrichment analysis as network.

    Parameters
    ----------
    net : DataFrame
        Network dataframe with ``source`` and ``target`` columns (``weight`` is optional).
    obs : None, DataFrame
        Input of enrichment analysis, needs to be a one row dataframe with targets as features. Used to filter net.
    act : None, DataFrame
        Ouput of enrichment analysis, needs to be a one row dataframe with sources as features. Used to filter net.
    n_sources : str, list, int
        Number of top sources to plot or list of source names.
    n_targets : str, list, int
        Number of top targets to plot or list of target names.
    by_abs : bool
        Whether to consider the absolute value when sorting for ``n_sources`` and ``n_targets``.
    node_size : float
        Size of the nodes in the plot.
    label_size : int
        Size of the labels in the plot.
    s_cmap : str
        Color or colormap to use to color sources.
    t_cmap : str
        Color or colormap to use to color targets.
    vcenter : bool
        Whether to center colors around 0.
    c_pos_w : str
        Color for edges with positive weights. If no weights are available, they are set to positive by default.
    c_neg_w : str
        Color for edges with negative weights.
    s_label : str
        Label to place in the source colorbar.
    t_label : str
        Label to place in the target colorbar.
    layout : str
        Layout to use to order the nodes. Check ``igraph`` documentation for more options.
    figsize : tuple
        Figure size.
    dpi : int
        DPI resolution of figure.
    return_fig : bool
        Whether to return a Figure object or not.
    save : str, None
        Path to where to save the plot. Infer the filetype if ending on {``.pdf``, ``.png``, ``.svg``}.

    Returns
    -------
    fig : Figure, None
        If return_fig, returns Figure object.
    Fr   TNr;   r  r   0r  r<   )r   c                    s   g | ]
}|d kr
n qS rE  r   )r   r   c_neg_wc_pos_wr   r   r   [  r  z plot_network.<locals>.<listcomp>r=   r\   rA  r:   r  )r   r   r   r   r  )r  r  )r  r   )r  r  )r<   layoutvertex_sizevertex_label_size)r   rS   
horizontal)caxr  r   r  )r   r%   r   r  ro   r!  r0   onesr   rN  r@  r?  r  rX  r  gridspecGridSpecadd_subplotrq   r   r   r   rM   r-   )%r   rC  r1  rA  rJ  rB  	node_size
label_sizerQ  rR  r   rc  rb  s_labelt_labelrd  rG   rH   r   r,   r   r   r$   sourcestargetsfactfobsfnetrT  rU  r>  rW  r*   gsax1ax2ax3r   ra  r   plot_network  sF   
8
  

rz  )F)NN)NNr:   r;   r<   r=   r>   r?   NNr@   rA   rB   rC   rE   NFN)r:   r>   r?   NNr@   rA   rB   rC   rE   NFN)	r;   r<   r=   r   r   rE   NFN)NFFrC   rE   NNNrA   FN)r   FrO   Nr   NrC   rE   NFN)r   r   NTTTr   rE   NNNNrY   FN)r   r   Nr  r  rE   FN)	Nr   rE   NNNNFN)r   rE   NFN)NNNr   rI  r   rJ  rK  rL  rE   NFN)rc  rK   rK  rL  rE   NFN)r;   r<   ro  r   rE   FN)	rB   NrY   Nr  rE   NFN)r:   r  Nr  rE   NFN)NNr:   r   Tr?   r:   ro  rK  FrY  rZ  r[  r\  r]  r^  r_  FN)5numpyr0   scipy.sparser   pandasro   anndatar   prer   r   r   utils_anndatar   r	   r
   r   method_gsear   r   r   r   r%   r-   r9   r   r   r   r   r   r   r   r   r  r0  r>  rH  rb  rn  r  r  r  r  r,  r5  r8  r?  r@  rD  rL  rN  rX  rz  r   r   r   r   <module>   s    


 
b
W
N
m
h
Q

mE
Z
C
 
G
n
+
 	