|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
""" |
|
|
|
|
Use cohomology to decode datasets with circular parameters |
|
|
|
|
|
|
|
|
@ -14,7 +14,7 @@ from tqdm import trange
|
|
|
|
|
|
|
|
|
|
import ripser |
|
|
|
|
|
|
|
|
|
from persistence import persistence |
|
|
|
|
from .persistence import persistence |
|
|
|
|
|
|
|
|
|
EPSILON = 0.0000000000001 |
|
|
|
|
|
|
|
|
@ -48,18 +48,19 @@ def shortest_cycle(graph, node2, node1):
|
|
|
|
|
while (math.isnan(prev_nodes[node1])): |
|
|
|
|
distances_buffer = distances |
|
|
|
|
for j in range(N): |
|
|
|
|
possible_path_lengths = distances_buffer + graph[:,j] |
|
|
|
|
possible_path_lengths = distances_buffer + graph[:, j] |
|
|
|
|
if (np.min(possible_path_lengths) < distances[j]): |
|
|
|
|
prev_nodes[j] = np.argmin(possible_path_lengths) |
|
|
|
|
distances[j] = np.min(possible_path_lengths) |
|
|
|
|
prev_nodes = prev_nodes.astype(int) |
|
|
|
|
cycle = [node1] |
|
|
|
|
while (cycle[0] != node2): |
|
|
|
|
cycle.insert(0,prev_nodes[cycle[0]]) |
|
|
|
|
cycle.insert(0,node1) |
|
|
|
|
cycle.insert(0, prev_nodes[cycle[0]]) |
|
|
|
|
cycle.insert(0, node1) |
|
|
|
|
return cycle |
|
|
|
|
|
|
|
|
|
def cohomological_parameterization(X ,cocycle_number=1, coeff=2,weighted=False): |
|
|
|
|
|
|
|
|
|
def cohomological_parameterization(X, cocycle_number=1, coeff=2, weighted=False): |
|
|
|
|
""" |
|
|
|
|
Compute an angular parametrization on the data set corresponding to a given |
|
|
|
|
1-cycle |
|
|
|
@ -89,67 +90,67 @@ def cohomological_parameterization(X ,cocycle_number=1, coeff=2,weighted=False):
|
|
|
|
|
cocycles = result['cocycles'] |
|
|
|
|
D = result['dperm2all'] |
|
|
|
|
dgm1 = diagrams[1] |
|
|
|
|
idx = np.argsort(dgm1[:, 1] - dgm1[:, 0])[-cocycle_number] |
|
|
|
|
idx = np.argsort(dgm1[:, 1] - dgm1[:, 0])[-cocycle_number] |
|
|
|
|
cocycle = cocycles[1][idx] |
|
|
|
|
persistence(X, homdim=1, coeff=coeff, show_largest_homology=0, |
|
|
|
|
Nsubsamples=0, save_path=None, cycle=idx) |
|
|
|
|
thresh = dgm1[idx, 1]-EPSILON |
|
|
|
|
|
|
|
|
|
thresh = dgm1[idx, 1] - EPSILON |
|
|
|
|
|
|
|
|
|
# Compute connectivity |
|
|
|
|
N = X.shape[0] |
|
|
|
|
connectivity = np.zeros([N,N]) |
|
|
|
|
connectivity = np.zeros([N, N]) |
|
|
|
|
for i in range(N): |
|
|
|
|
for j in range(i): |
|
|
|
|
if D[i, j] <= thresh: |
|
|
|
|
connectivity[i,j] = 1 |
|
|
|
|
cocycle_array = np.zeros([N,N]) |
|
|
|
|
|
|
|
|
|
connectivity[i, j] = 1 |
|
|
|
|
cocycle_array = np.zeros([N, N]) |
|
|
|
|
|
|
|
|
|
# Lift cocycle |
|
|
|
|
for i in range(cocycle.shape[0]): |
|
|
|
|
cocycle_array[cocycle[i,0],cocycle[i,1]] = ( |
|
|
|
|
((cocycle[i,2] + coeff/2) % coeff) - coeff/2 |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
cocycle_array[cocycle[i, 0], cocycle[i, 1]] = ( |
|
|
|
|
((cocycle[i, 2] + coeff / 2) % coeff) - coeff / 2 |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
# Weights |
|
|
|
|
if (weighted): |
|
|
|
|
def real_cocycle(x): |
|
|
|
|
real_cocycle =( |
|
|
|
|
connectivity * (cocycle_array + np.subtract.outer(x, x)) |
|
|
|
|
) |
|
|
|
|
real_cocycle = ( |
|
|
|
|
connectivity * (cocycle_array + np.subtract.outer(x, x)) |
|
|
|
|
) |
|
|
|
|
return np.ravel(real_cocycle) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Compute graph |
|
|
|
|
x0 = np.zeros(N) |
|
|
|
|
res = least_squares(real_cocycle, x0) |
|
|
|
|
real_cocyle_array = res.fun |
|
|
|
|
real_cocyle_array = real_cocyle_array.reshape(N,N) |
|
|
|
|
real_cocyle_array = real_cocyle_array.reshape(N, N) |
|
|
|
|
real_cocyle_array = real_cocyle_array - np.transpose(real_cocyle_array) |
|
|
|
|
graph = np.array(real_cocyle_array>0).astype(float) |
|
|
|
|
graph[graph==0] = np.inf |
|
|
|
|
graph = (D + EPSILON) * graph # Add epsilon to avoid NaNs |
|
|
|
|
|
|
|
|
|
graph = np.array(real_cocyle_array > 0).astype(float) |
|
|
|
|
graph[graph == 0] = np.inf |
|
|
|
|
graph = (D + EPSILON) * graph # Add epsilon to avoid NaNs |
|
|
|
|
|
|
|
|
|
# Compute weights |
|
|
|
|
cycle_counts = np.zeros([N,N]) |
|
|
|
|
cycle_counts = np.zeros([N, N]) |
|
|
|
|
iterator = trange(0, N, position=0, leave=True) |
|
|
|
|
iterator.set_description("Computing weights for decoding") |
|
|
|
|
for i in iterator: |
|
|
|
|
for j in range(N): |
|
|
|
|
if (graph[i,j] != np.inf): |
|
|
|
|
if (graph[i, j] != np.inf): |
|
|
|
|
cycle = shortest_cycle(graph, j, i) |
|
|
|
|
for k in range(len(cycle)-1): |
|
|
|
|
cycle_counts[cycle[k], cycle[k+1]] += 1 |
|
|
|
|
|
|
|
|
|
weights = cycle_counts / (D + EPSILON)**2 |
|
|
|
|
for k in range(len(cycle) - 1): |
|
|
|
|
cycle_counts[cycle[k], cycle[k + 1]] += 1 |
|
|
|
|
|
|
|
|
|
weights = cycle_counts / (D + EPSILON) ** 2 |
|
|
|
|
weights = np.sqrt(weights) |
|
|
|
|
else: |
|
|
|
|
weights = np.outer(np.ones(N),np.ones(N)) |
|
|
|
|
|
|
|
|
|
weights = np.outer(np.ones(N), np.ones(N)) |
|
|
|
|
|
|
|
|
|
def real_cocycle(x): |
|
|
|
|
real_cocycle =( |
|
|
|
|
weights * connectivity * (cocycle_array + np.subtract.outer(x, x)) |
|
|
|
|
) |
|
|
|
|
real_cocycle = ( |
|
|
|
|
weights * connectivity * (cocycle_array + np.subtract.outer(x, x)) |
|
|
|
|
) |
|
|
|
|
return np.ravel(real_cocycle) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Smooth cocycle |
|
|
|
|
print("Decoding...", end=" ") |
|
|
|
|
x0 = np.zeros(N) |
|
|
|
@ -157,7 +158,7 @@ def cohomological_parameterization(X ,cocycle_number=1, coeff=2,weighted=False):
|
|
|
|
|
decoding = res.x |
|
|
|
|
decoding = np.mod(decoding, 1) |
|
|
|
|
print("done") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
decoding = pd.DataFrame(decoding, columns=["decoding"]) |
|
|
|
|
decoding = decoding.set_index(X.index) |
|
|
|
|
return decoding |
|
|
|
@ -180,9 +181,9 @@ def remove_feature(X, decoding, shift=0, cut_amplitude=1.0):
|
|
|
|
|
Amplitude of the cut |
|
|
|
|
""" |
|
|
|
|
cuts = np.zeros(X.shape) |
|
|
|
|
decoding = decoding.to_numpy()[:,0] |
|
|
|
|
decoding = decoding.to_numpy()[:, 0] |
|
|
|
|
for i in range(X.shape[1]): |
|
|
|
|
effective_amplitude = cut_amplitude * (np.max(X[i]) - np.min(X[i])) |
|
|
|
|
cuts[:,i] = effective_amplitude * ((decoding - shift) % 1) |
|
|
|
|
cuts[:, i] = effective_amplitude * ((decoding - shift) % 1) |
|
|
|
|
reduced_data = X + cuts |
|
|
|
|
return reduced_data |
|
|
|
|
return reduced_data |
|
|
|
|