Source code for acdc_py.get_opt

from ._get_opt import _GS_clustering, _GS_metric_search_data, _GS_metric_value, _GS_params, _SA_clustering, _SA_metric_search_data, _SA_metric_value, _SA_params

### ---------- EXPORT LIST ----------
__all__ = []

# @-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-
# ------------------------------------------------------------------------------
# -------------------------- ** OPTIMMIZATION FUNCS ** -------------------------
# ------------------------------------------------------------------------------
# @-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-@-

[docs] def SA_clustering( adata, dist_slot=None, use_reduction=True, reduction_slot="X_pca", opt_metric = "sil_mean", opt_metric_dir = "max", cluster_labels = None, cluster_name = None, n_clusts = None, seed = 0, approx_size = None, key_added = "clusters", knn_slot = 'knn', verbose = True, njobs = 1 ): """\ Get the clustering using the parameters found by optimizing a particular metric using the results produced by the SA function. Note that this requires running the acdc.SA function first in order to produce adata.uns['SA_results_dict']. Parameters ---------- adata An anndata object containing a distance object in adata.obsp. dist_slot : default: None Slot in adata.obsp where a pre-generated distance matrix computed across all cells is stored in adata for use in construction of NN. (Default = None, i.e. distance matrix will be automatically computed as a correlation distance and stored in "corr_dist"). use_reduction : default: True Whether to use a reduction (True) (highly recommended - accurate & much faster) or to use the direct matrix (False) for clustering. reduction_slot : default: "X_pca" If reduction is TRUE, then specify which slot for the reduction to use. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). cluster_labels : default: None A column in adata.obs with a set of cluster labels containing a cluster to subcluster. Specify the cluster with the cluster_name parameter. cluster_name : default: None A cluster from cluster_labels to subcluster. When None, cluster whole dataset. n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to compute the optimal clustering solution with this many clusters. seed : default: 0 Random seed to use. approx_size : default: None When set to a positive integer, instead of running GS on the entire dataset, perform GS on a subsample and diffuse those results. This will lead to an approximation of the optimal solution for cases where the dataset is too large to perform GS on due to time or memory constraints. key_added : default: "clusters" Slot in obs to store the resulting clusters. knn_slot : default: "knn" Slot in uns that stores the KNN array used to compute a neighbors graph (i.e. adata.obs['connectivities']). verbose : default: True Include additional output with True. Alternative = False. njobs : default: 1 Paralleization option that allows users to speed up runtime. Returns ------- Adds fields to the input adata, such that it contains the clustering stored in adata.obs[key_added]. """ _SA_clustering( adata, dist_slot, use_reduction, reduction_slot, opt_metric, opt_metric_dir, cluster_labels, cluster_name, n_clusts, seed, approx_size, key_added, knn_slot, verbose, njobs )
[docs] def SA_params( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal parameters for clustering found by optimizing a particular metric using the results produced by the SA function. Note that this requires running the acdc.SA function first in order to produce adata.uns['SA_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.SA function in adata.uns['SA_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- A dictionary with keys opt_res and opt_knn and the corresponding values that produce the requested clustering solution. """ return _SA_params( adata, opt_metric, opt_metric_dir, n_clusts )
[docs] def SA_metric_value( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal value for a particular metric found when using parameters for clustering that optimize said metric. This will be identified in using the results produced by the SA function. Note that this therefore requires running acdc.GS first in order to produce adata.uns['SA_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.SA function in adata.uns['SA_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- The value of the metric when optimized. """ return _SA_metric_value( adata, opt_metric, opt_metric_dir, n_clusts )
[docs] def SA_metric_search_data( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal parameters for clustering along with all their associated statistics. These will be found by optimizing a particular metric using the results produced by the SA function. Note that this requires running the acdc.SA function first in order to produce adata.uns['SA_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.SA function in adata.uns['SA_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- A pandas series containing the resolution and knn that produce the requested clustering solution along with all other metrics. """ return _SA_metric_search_data( adata, opt_metric, opt_metric_dir, n_clusts )
[docs] def GS_clustering( adata, dist_slot=None, use_reduction=True, reduction_slot="X_pca", opt_metric = "sil_mean", opt_metric_dir = "max", cluster_labels = None, cluster_name = None, n_clusts = None, seed = 0, approx_size = None, key_added = "clusters", knn_slot = 'knn', verbose = True, njobs = 1 ): """\ Get the clustering using the parameters found by optimizing a particular metric using the results produced by the GS function. Note that this requires running the acdc.GS function first in order to produce adata.uns['GS_results_dict']. Parameters ---------- adata An anndata object containing a distance object in adata.obsp. dist_slot : default: None Slot in adata.obsp where a pre-generated distance matrix computed across all cells is stored in adata for use in construction of NN. (Default = None, i.e. distance matrix will be automatically computed as a correlation distance and stored in "corr_dist"). use_reduction : default: True Whether to use a reduction (True) (highly recommended - accurate & much faster) or to use the direct matrix (False) for clustering. reduction_slot : default: "X_pca" If reduction is TRUE, then specify which slot for the reduction to use. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). cluster_labels : default: None A column in adata.obs with a set of cluster labels containing a cluster to subcluster. Specify the cluster with the cluster_name parameter. cluster_name : default: None A cluster from cluster_labels to subcluster. When None, cluster whole dataset. n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to compute the optimal clustering solution with this many clusters. seed : default: 0) Random seed to use. approx_size : default: None When set to a positive integer, instead of running GS on the entire dataset, perform GS on a subsample and diffuse those results. This will lead to an approximation of the optimal solution for cases where the dataset is too large to perform GS on due to time or memory constraints. key_added : default: "clusters" Slot in obs to store the resulting clusters. knn_slot : default: "knn" Slot in uns that stores the KNN array used to compute a neighbors graph (i.e. adata.obs['connectivities']). verbose : default: True Include additional output with True. Alternative = False. njobs : default: 1 Paralleization option that allows users to speed up runtime. Returns ------- Adds fields to the input adata, such that it contains the clustering stored in adata.obs[key_added]. """ _GS_clustering( adata, dist_slot, use_reduction, reduction_slot, opt_metric, opt_metric_dir, cluster_labels, cluster_name, n_clusts, seed, approx_size, key_added, knn_slot, verbose, njobs )
[docs] def GS_params( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal parameters for clustering found by optimizing a particular metric using the results produced by the GS function. Note that this requires running the acdc.GS function first in order to produce adata.uns['GS_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.GS function in adata.uns['GS_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- A dictionary with keys opt_res and opt_knn and the corresponding values that produce the requested clustering solution. """ return _GS_params( adata, opt_metric, opt_metric_dir, n_clusts )
[docs] def GS_metric_value( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal value for a particular metric found when using parameters for clustering that optimize said metric. This will be identified in using the results produced by the GS function. Note that this therefore requires running acdc.GS first in order to produce adata.uns['GS_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.GS function in adata.uns['GS_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- The value of the metric when optimized. """ return _GS_metric_value( adata, opt_metric, opt_metric_dir, n_clusts )
[docs] def GS_metric_search_data( adata, opt_metric = "sil_mean", opt_metric_dir = "max", n_clusts = None ): """\ Get the optimal parameters for clustering along with all their associated statistics. These will be found by optimizing a particular metric using the results produced by the GS function. Note that this requires running the acdc.GS function first in order to produce adata.uns['GS_results_dict']. Parameters ---------- adata An anndata object containing the results of the acdc.SA function in adata.uns['GS_results_dict']. opt_metric : default: "sil_mean" A metric from metrics to use to optimize parameters for the clustering. opt_metric_dir : default: "max" Whether opt_metric is more optimal by maximizing ("max") or by minimizing ("min"). n_clusts : default: None If not None, restrict the search space to the number of clusters equal to n_clusts in order to retrieve the parameters for optimal clustering with this many clusters. Returns ------- A pandas series containing the resolution and knn that produce the requested clustering solution along with all other metrics. """ return _GS_metric_search_data( adata, opt_metric, opt_metric_dir, n_clusts )