Browse Source

Clean up the repo

master
Lev 2 years ago
parent
commit
ab3c64ff03
  1. 78
      AnalyzeExp.ipynb
  2. BIN
      Decoded_Orientation.pdf
  3. 358
      NoisyCircle.ipynb
  4. 311
      TestInteractivity.ipynb
  5. 560
      TuningCurvesFull.ipynb
  6. 0
      model/plotting_utils.py
  7. 0
      model/sym_model.py
  8. 0
      model/utils.py
  9. 0
      numerical/__init__.py
  10. 189
      numerical/decoding.py
  11. 0
      numerical/decorators.py
  12. 0
      numerical/dimension.py
  13. 0
      numerical/example.py
  14. 0
      numerical/gratings.py
  15. 0
      numerical/load.py
  16. 0
      numerical/noisereduction.py
  17. 169
      numerical/persistence.py
  18. 0
      numerical/plotting.py

78
AnalyzeExp.ipynb

@ -0,0 +1,78 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"from load import pkl_load"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [
{
"ename": "AttributeError",
"evalue": "Can't get attribute 'simple_cell_model' on <module '__main__'>",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)",
"Input \u001B[0;32mIn [2]\u001B[0m, in \u001B[0;36m<cell line: 1>\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m model_center_fp \u001B[38;5;241m=\u001B[39m \u001B[43mpkl_load\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43msimple_cell_center_fp\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n",
"File \u001B[0;32m~/dev/amgen/load.py:6\u001B[0m, in \u001B[0;36mpkl_load\u001B[0;34m(filename)\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mpkl_load\u001B[39m(filename):\n\u001B[1;32m 5\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(filename, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mrb\u001B[39m\u001B[38;5;124m'\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m f:\n\u001B[0;32m----> 6\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mpickle\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload\u001B[49m\u001B[43m(\u001B[49m\u001B[43mf\u001B[49m\u001B[43m)\u001B[49m\n",
"\u001B[0;31mAttributeError\u001B[0m: Can't get attribute 'simple_cell_model' on <module '__main__'>"
]
}
],
"source": [
"model_center_fp = pkl_load('simple_cell_center_fp')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

BIN
Decoded_Orientation.pdf

Binary file not shown.

358
NoisyCircle.ipynb

File diff suppressed because one or more lines are too long

311
TestInteractivity.ipynb

File diff suppressed because one or more lines are too long

560
TuningCurvesFull.ipynb

File diff suppressed because one or more lines are too long

0
plotting_utils.py → model/plotting_utils.py

0
sym_model.py → model/sym_model.py

0
utils.py → model/utils.py

0
model/__init__.py → numerical/__init__.py

189
numerical/decoding.py

@ -0,0 +1,189 @@
# -*- coding: utf-8 -*-
"""
Use cohomology to decode datasets with circular parameters
Persistent homology from arxiv:1908.02518
Homological decoding from DOI:10.1007/s00454-011-9344-x and arxiv:1711.07205
"""
import math
import numpy as np
from scipy.optimize import least_squares
import pandas as pd
from tqdm import trange
import ripser
from persistence import persistence
EPSILON = 0.0000000000001
def shortest_cycle(graph, node2, node1):
"""
Returns the shortest cycle going through an edge
Used for computing weights in decode
Parameters
----------
graph: ndarray (n_nodes, n_nodes)
A matrix containing the weights of the edges in the graph
node1: int
The index of the first node of the edge
node2: int
The index of the second node of the edge
Returns
-------
cycle: list of ints
A list of indices representing the nodes of the cycle in order
"""
N = graph.shape[0]
distances = np.inf * np.ones(N)
distances[node2] = 0
prev_nodes = np.zeros(N)
prev_nodes[:] = np.nan
prev_nodes[node2] = node1
while (math.isnan(prev_nodes[node1])):
distances_buffer = distances
for j in range(N):
possible_path_lengths = distances_buffer + graph[:, j]
if (np.min(possible_path_lengths) < distances[j]):
prev_nodes[j] = np.argmin(possible_path_lengths)
distances[j] = np.min(possible_path_lengths)
prev_nodes = prev_nodes.astype(int)
cycle = [node1]
while (cycle[0] != node2):
cycle.insert(0, prev_nodes[cycle[0]])
cycle.insert(0, node1)
return cycle
def cohomological_parameterization(X, cocycle_number=1, coeff=2, weighted=False):
"""
Compute an angular parametrization on the data set corresponding to a given
1-cycle
Parameters
----------
X: ndarray(n_datapoints, n_features):
Array containing the data
cocycle_number: int, optional, default 1
An integer specifying the 1-cycle used
The n-th most stable 1-cycle is used, where n = cocycle_number
coeff: int prime, optional, default 1
The coefficient basis in which we compute the cohomology
weighted: bool, optional, default False
When true use a weighted graph for smoother parameterization
as proposed in arxiv:1711.07205
Returns
-------
decoding: ndarray(n_datapoints)
The parameterization of the dataset consisting of a number between
0 and 1 for each datapoint, to be interpreted modulo 1
"""
# Get the cocycle
result = ripser.ripser(X, maxdim=1, coeff=coeff, do_cocycles=True)
diagrams = result['dgms']
cocycles = result['cocycles']
D = result['dperm2all']
dgm1 = diagrams[1]
idx = np.argsort(dgm1[:, 1] - dgm1[:, 0])[-cocycle_number]
cocycle = cocycles[1][idx]
persistence(X, homdim=1, coeff=coeff, show_largest_homology=0,
Nsubsamples=0, save_path=None, cycle=idx)
thresh = dgm1[idx, 1] - EPSILON
# Compute connectivity
N = X.shape[0]
connectivity = np.zeros([N, N])
for i in range(N):
for j in range(i):
if D[i, j] <= thresh:
connectivity[i, j] = 1
cocycle_array = np.zeros([N, N])
# Lift cocycle
for i in range(cocycle.shape[0]):
cocycle_array[cocycle[i, 0], cocycle[i, 1]] = (
((cocycle[i, 2] + coeff / 2) % coeff) - coeff / 2
)
# Weights
if (weighted):
def real_cocycle(x):
real_cocycle = (
connectivity * (cocycle_array + np.subtract.outer(x, x))
)
return np.ravel(real_cocycle)
# Compute graph
x0 = np.zeros(N)
res = least_squares(real_cocycle, x0)
real_cocyle_array = res.fun
real_cocyle_array = real_cocyle_array.reshape(N, N)
real_cocyle_array = real_cocyle_array - np.transpose(real_cocyle_array)
graph = np.array(real_cocyle_array > 0).astype(float)
graph[graph == 0] = np.inf
graph = (D + EPSILON) * graph # Add epsilon to avoid NaNs
# Compute weights
cycle_counts = np.zeros([N, N])
iterator = trange(0, N, position=0, leave=True)
iterator.set_description("Computing weights for decoding")
for i in iterator:
for j in range(N):
if (graph[i, j] != np.inf):
cycle = shortest_cycle(graph, j, i)
for k in range(len(cycle) - 1):
cycle_counts[cycle[k], cycle[k + 1]] += 1
weights = cycle_counts / (D + EPSILON) ** 2
weights = np.sqrt(weights)
else:
weights = np.outer(np.ones(N), np.ones(N))
def real_cocycle(x):
real_cocycle = (
weights * connectivity * (cocycle_array + np.subtract.outer(x, x))
)
return np.ravel(real_cocycle)
# Smooth cocycle
print("Decoding...", end=" ")
x0 = np.zeros(N)
res = least_squares(real_cocycle, x0)
decoding = res.x
decoding = np.mod(decoding, 1)
print("done")
decoding = pd.DataFrame(decoding, columns=["decoding"])
decoding = decoding.set_index(X.index)
return decoding
def remove_feature(X, decoding, shift=0, cut_amplitude=1.0):
"""
Removes a decoded feature from a dataset by making a cut at a fixed value
of the decoding
Parameters
----------
X: dataframe(n_datapoints, n_features):
Array containing the data
decoding : dataframe(n_datapoints)
The decoded feature, assumed to be angular with periodicity 1
shift : float between 0 and 1, optional, default 0
The location of the cut
cut_amplitude : float, optional, default 1
Amplitude of the cut
"""
cuts = np.zeros(X.shape)
decoding = decoding.to_numpy()[:, 0]
for i in range(X.shape[1]):
effective_amplitude = cut_amplitude * (np.max(X[i]) - np.min(X[i]))
cuts[:, i] = effective_amplitude * ((decoding - shift) % 1)
reduced_data = X + cuts
return reduced_data

0
model/decorators.py → numerical/decorators.py

0
model/dimension.py → numerical/dimension.py

0
model/example.py → numerical/example.py

0
model/gratings.py → numerical/gratings.py

0
model/load.py → numerical/load.py

0
model/noisereduction.py → numerical/noisereduction.py

169
numerical/persistence.py

@ -0,0 +1,169 @@
# -*- coding: utf-8 -*-
"""
Tools to compute persistence diagrams
Persistent homology from ripser and gudhi library
Confidence sets from arxiv:1303.7117
"""
import numpy as np
from scipy.spatial.distance import directed_hausdorff
import matplotlib.pyplot as plt
from tqdm import trange
import ripser
from persim import plot_diagrams
import gudhi
from decorators import multi_input
def hausdorff(data1, data2, homdim, coeff):
"""Hausdorff metric between two persistence diagrams"""
dgm1 = (ripser.ripser(data1,maxdim=homdim,coeff=coeff))['dgms']
dgm2 = (ripser.ripser(data2,maxdim=homdim,coeff=coeff))['dgms']
distance = directed_hausdorff(dgm1[homdim], dgm2[homdim])[0]
return distance
@multi_input
def confidence(X, alpha=0.05, Nsubsamples=20, homdim=1, coeff=2):
"""
Compute the confidence interval of the persistence diagram of a dataset
Computation done by subsampling as in arxiv:1303.7117
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
alpha : float between 0 and 1, optional, default 0.05
1-alpha is the confidence
Nsubsamples : int, optional, default 20
The number of subsamples
homdim : int, optional, default 1
The dimension of the homology
coeff : int prime, optional, default 2
The coefficient basis
"""
N = X.shape[0]
distances = np.zeros(Nsubsamples)
iterator = trange(0, Nsubsamples, position=0, leave=True)
iterator.set_description("Computing confidence interval")
for i in iterator:
subsample = X.iloc[np.random.choice(N, N, replace=True)]
distances[i] = hausdorff(X, subsample, homdim, coeff)
distances.sort()
confidence = np.sqrt(2) * 2 * distances[int(alpha*Nsubsamples)]
return confidence
@multi_input
def persistence(X, homdim=1, coeff=2, threshold=float('inf'),
show_largest_homology=0, distance_matrix=False, Nsubsamples=0,
alpha=0.05, cycle=None, save_path=None):
"""
Plot the persistence diagram of a dataset using ripser
Also prints the five largest homology components
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
homdim : int, optional, default 1
The dimension of the homology
coeff : int prime, optional, default 2
The coefficient basis
threshold : float, optional, default infinity
The maximum distance in the filtration
show_largest_homology: int, optional, default 0
Print this many of the largest homology components
distance_matrix : bool, optional, default False
When true X will be interepreted as a distance matrix
Nsubsamples : int, optional, default 0
The number of subsamples used in computing the confidence interval
Does not compute the confidence interval when this is 0
alpha : float between 0 and 1, optional, default 0.05
1-alpha is the confidence
cycle : int, optional, default None
If given highlight the homology component in the plot corresponding to
this cycle id
save_path : str, optional, default None
When given save the plot here
"""
result = ripser.ripser(X, maxdim=homdim, coeff=coeff, do_cocycles=True,
distance_matrix=distance_matrix, thresh=threshold)
diagrams = result['dgms']
plot_diagrams(diagrams, show=False)
if (Nsubsamples>0):
conf = confidence(X, alpha, Nsubsamples, homdim, 2)
line_length = 10000
plt.plot([0, line_length], [conf, line_length + conf], color='green',
linestyle='dashed',linewidth=2)
if cycle is not None:
dgm1 = diagrams[1]
plt.scatter(dgm1[cycle, 0], dgm1[cycle, 1], 20, 'k', 'x')
if save_path is not None:
path = save_path + 'Z' + str(coeff)
if (Nsubsamples>0):
path += '_confidence' + str(1-alpha)
path += '.png'
plt.savefig(path)
plt.show()
if show_largest_homology != 0:
dgm = diagrams[homdim]
largest_indices = np.argsort(dgm[:, 0] - dgm[:, 1])
largest_components = dgm[largest_indices[:show_largest_homology]]
print(f"Largest {homdim}-homology components:")
print(largest_components)
return
@multi_input
def persistence_witness(X, number_of_landmarks=100, max_alpha_square=0.0,
homdim=1):
"""
Plot the persistence diagram of a dataset using gudhi
Uses a witness complex allowing it to be used on larger datasets
Parameters
----------
X: dataframe(n_datapoints, n_features):
Dataframe containing the data
number_of_landmarks : int, optional, default 100
The number of landmarks in the witness complex
max_alpha_square : double, optional, default 0.0
Maximal squared relaxation parameter
homdim : int, optional, default 1
The dimension of the homology
"""
print("Sampling landmarks...", end=" ")
witnesses = X.to_numpy()
landmarks = gudhi.pick_n_random_points(
points=witnesses, nb_points=number_of_landmarks
)
print("done")
message = (
"EuclideanStrongWitnessComplex with max_edge_length="
+ repr(max_alpha_square)
+ " - Number of landmarks="
+ repr(number_of_landmarks)
)
print(message)
witness_complex = gudhi.EuclideanStrongWitnessComplex(
witnesses=witnesses, landmarks=landmarks
)
simplex_tree = witness_complex.create_simplex_tree(
max_alpha_square=max_alpha_square,
limit_dimension=homdim
)
message = "Number of simplices=" + repr(simplex_tree.num_simplices())
print(message)
diag = simplex_tree.persistence()
print("betti_numbers()=")
print(simplex_tree.betti_numbers())
gudhi.plot_persistence_diagram(diag, band=0.0)
plt.show()
return

0
model/plotting.py → numerical/plotting.py

Loading…
Cancel
Save