Topology in neuroscience
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

169 lines
6.0 KiB

# -*- coding: utf-8 -*-
"""
Tools to compute persistence diagrams
Persistent homology from ripser and gudhi library
Confidence sets from arxiv:1303.7117
"""
import numpy as np
from scipy.spatial.distance import directed_hausdorff
import matplotlib.pyplot as plt
from tqdm import trange
import ripser
from persim import plot_diagrams
import gudhi
from decorators import multi_input
def hausdorff(data1, data2, homdim, coeff):
"""Hausdorff metric between two persistence diagrams"""
dgm1 = (ripser.ripser(data1,maxdim=homdim,coeff=coeff))['dgms']
dgm2 = (ripser.ripser(data2,maxdim=homdim,coeff=coeff))['dgms']
distance = directed_hausdorff(dgm1[homdim], dgm2[homdim])[0]
return distance
@multi_input
def confidence(X, alpha=0.05, Nsubsamples=20, homdim=1, coeff=2):
"""
Compute the confidence interval of the persistence diagram of a dataset
Computation done by subsampling as in arxiv:1303.7117
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
alpha : float between 0 and 1, optional, default 0.05
1-alpha is the confidence
Nsubsamples : int, optional, default 20
The number of subsamples
homdim : int, optional, default 1
The dimension of the homology
coeff : int prime, optional, default 2
The coefficient basis
"""
N = X.shape[0]
distances = np.zeros(Nsubsamples)
iterator = trange(0, Nsubsamples, position=0, leave=True)
iterator.set_description("Computing confidence interval")
for i in iterator:
subsample = X.iloc[np.random.choice(N, N, replace=True)]
distances[i] = hausdorff(X, subsample, homdim, coeff)
distances.sort()
confidence = np.sqrt(2) * 2 * distances[int(alpha*Nsubsamples)]
return confidence
@multi_input
def persistence(X, homdim=1, coeff=2, threshold=float('inf'),
show_largest_homology=0, distance_matrix=False, Nsubsamples=0,
alpha=0.05, cycle=None, save_path=None):
"""
Plot the persistence diagram of a dataset using ripser
Also prints the five largest homology components
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
homdim : int, optional, default 1
The dimension of the homology
coeff : int prime, optional, default 2
The coefficient basis
threshold : float, optional, default infinity
The maximum distance in the filtration
show_largest_homology: int, optional, default 0
Print this many of the largest homology components
distance_matrix : bool, optional, default False
When true X will be interepreted as a distance matrix
Nsubsamples : int, optional, default 0
The number of subsamples used in computing the confidence interval
Does not compute the confidence interval when this is 0
alpha : float between 0 and 1, optional, default 0.05
1-alpha is the confidence
cycle : int, optional, default None
If given highlight the homology component in the plot corresponding to
this cycle id
save_path : str, optional, default None
When given save the plot here
"""
result = ripser.ripser(X, maxdim=homdim, coeff=coeff, do_cocycles=True,
distance_matrix=distance_matrix, thresh=threshold)
diagrams = result['dgms']
plot_diagrams(diagrams, show=False)
if (Nsubsamples>0):
conf = confidence(X, alpha, Nsubsamples, homdim, 2)
line_length = 10000
plt.plot([0, line_length], [conf, line_length + conf], color='green',
linestyle='dashed',linewidth=2)
if cycle is not None:
dgm1 = diagrams[1]
plt.scatter(dgm1[cycle, 0], dgm1[cycle, 1], 20, 'k', 'x')
if save_path is not None:
path = save_path + 'Z' + str(coeff)
if (Nsubsamples>0):
path += '_confidence' + str(1-alpha)
path += '.png'
plt.savefig(path)
plt.show()
if show_largest_homology != 0:
dgm = diagrams[homdim]
largest_indices = np.argsort(dgm[:, 0] - dgm[:, 1])
largest_components = dgm[largest_indices[:show_largest_homology]]
print(f"Largest {homdim}-homology components:")
print(largest_components)
return
@multi_input
def persistence_witness(X, number_of_landmarks=100, max_alpha_square=0.0,
homdim=1):
"""
Plot the persistence diagram of a dataset using gudhi
Uses a witness complex allowing it to be used on larger datasets
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
number_of_landmarks : int, optional, default 100
The number of landmarks in the witness complex
max_alpha_square : double, optional, default 0.0
Maximal squared relaxation parameter
homdim : int, optional, default 1
The dimension of the homology
"""
print("Sampling landmarks...", end=" ")
witnesses = X.to_numpy()
landmarks = gudhi.pick_n_random_points(
points=witnesses, nb_points=number_of_landmarks
)
print("done")
message = (
"EuclideanStrongWitnessComplex with max_edge_length="
+ repr(max_alpha_square)
+ " - Number of landmarks="
+ repr(number_of_landmarks)
)
print(message)
witness_complex = gudhi.EuclideanStrongWitnessComplex(
witnesses=witnesses, landmarks=landmarks
)
simplex_tree = witness_complex.create_simplex_tree(
max_alpha_square=max_alpha_square,
limit_dimension=homdim
)
message = "Number of simplices=" + repr(simplex_tree.num_simplices())
print(message)
diag = simplex_tree.persistence()
print("betti_numbers()=")
print(simplex_tree.betti_numbers())
gudhi.plot_persistence_diagram(diag, band=0.0)
plt.show()
return