# -*- coding: utf-8 -*- """ Tools to compute persistence diagrams Persistent homology from ripser and gudhi library Confidence sets from arxiv:1303.7117 """ import numpy as np from scipy.spatial.distance import directed_hausdorff import matplotlib.pyplot as plt from tqdm import trange import ripser from persim import plot_diagrams import gudhi from decorators import multi_input def hausdorff(data1, data2, homdim, coeff): """Hausdorff metric between two persistence diagrams""" dgm1 = (ripser.ripser(data1,maxdim=homdim,coeff=coeff))['dgms'] dgm2 = (ripser.ripser(data2,maxdim=homdim,coeff=coeff))['dgms'] distance = directed_hausdorff(dgm1[homdim], dgm2[homdim])[0] return distance @multi_input def confidence(X, alpha=0.05, Nsubsamples=20, homdim=1, coeff=2): """ Compute the confidence interval of the persistence diagram of a dataset Computation done by subsampling as in arxiv:1303.7117 Parameters ---------- X: dataframe(n_datapoints, n_features): Dataframe containing the data alpha : float between 0 and 1, optional, default 0.05 1-alpha is the confidence Nsubsamples : int, optional, default 20 The number of subsamples homdim : int, optional, default 1 The dimension of the homology coeff : int prime, optional, default 2 The coefficient basis """ N = X.shape[0] distances = np.zeros(Nsubsamples) iterator = trange(0, Nsubsamples, position=0, leave=True) iterator.set_description("Computing confidence interval") for i in iterator: subsample = X.iloc[np.random.choice(N, N, replace=True)] distances[i] = hausdorff(X, subsample, homdim, coeff) distances.sort() confidence = np.sqrt(2) * 2 * distances[int(alpha*Nsubsamples)] return confidence @multi_input def persistence(X, homdim=1, coeff=2, threshold=float('inf'), show_largest_homology=0, distance_matrix=False, Nsubsamples=0, alpha=0.05, cycle=None, save_path=None, ax=None): """ Plot the persistence diagram of a dataset using ripser Also prints the five largest homology components Parameters ---------- X: dataframe(n_datapoints, n_features): Dataframe containing the data homdim : int, optional, default 1 The dimension of the homology coeff : int prime, optional, default 2 The coefficient basis threshold : float, optional, default infinity The maximum distance in the filtration show_largest_homology: int, optional, default 0 Print this many of the largest homology components distance_matrix : bool, optional, default False When true X will be interepreted as a distance matrix Nsubsamples : int, optional, default 0 The number of subsamples used in computing the confidence interval Does not compute the confidence interval when this is 0 alpha : float between 0 and 1, optional, default 0.05 1-alpha is the confidence cycle : int, optional, default None If given highlight the homology component in the plot corresponding to this cycle id save_path : str, optional, default None When given save the plot here """ custom_ax = ax is not None if ax is None: ax = plt result = ripser.ripser(X, maxdim=homdim, coeff=coeff, do_cocycles=True, distance_matrix=distance_matrix, thresh=threshold) diagrams = result['dgms'] plot_diagrams(diagrams, show=False, ax=ax) if (Nsubsamples>0): conf = confidence(X, alpha, Nsubsamples, homdim, 2) line_length = 10000 plt.plot([0, line_length], [conf, line_length + conf], color='green', linestyle='dashed',linewidth=2) if cycle is not None: dgm1 = diagrams[1] ax.scatter(dgm1[cycle, 0], dgm1[cycle, 1], 20, 'k', 'x') if save_path is not None: path = save_path + 'Z' + str(coeff) if (Nsubsamples>0): path += '_confidence' + str(1-alpha) path += '.png' plt.savefig(path) if not custom_ax: plt.show() if show_largest_homology != 0: dgm = diagrams[homdim] largest_indices = np.argsort(dgm[:, 0] - dgm[:, 1]) largest_components = dgm[largest_indices[:show_largest_homology]] print(f"Largest {homdim}-homology components:") print(largest_components) return @multi_input def persistence_witness(X, number_of_landmarks=100, max_alpha_square=0.0, homdim=1): """ Plot the persistence diagram of a dataset using gudhi Uses a witness complex allowing it to be used on larger datasets Parameters ---------- X: dataframe(n_datapoints, n_features): Dataframe containing the data number_of_landmarks : int, optional, default 100 The number of landmarks in the witness complex max_alpha_square : double, optional, default 0.0 Maximal squared relaxation parameter homdim : int, optional, default 1 The dimension of the homology """ print("Sampling landmarks...", end=" ") witnesses = X.to_numpy() landmarks = gudhi.pick_n_random_points( points=witnesses, nb_points=number_of_landmarks ) print("done") message = ( "EuclideanStrongWitnessComplex with max_edge_length=" + repr(max_alpha_square) + " - Number of landmarks=" + repr(number_of_landmarks) ) print(message) witness_complex = gudhi.EuclideanStrongWitnessComplex( witnesses=witnesses, landmarks=landmarks ) simplex_tree = witness_complex.create_simplex_tree( max_alpha_square=max_alpha_square, limit_dimension=homdim ) message = "Number of simplices=" + repr(simplex_tree.num_simplices()) print(message) diag = simplex_tree.persistence() print("betti_numbers()=") print(simplex_tree.betti_numbers()) gudhi.plot_persistence_diagram(diag, band=0.0) plt.show() return