Clean up the repo

2 years ago · ab3c64ff03
18 changed files with 436 additions and 1229 deletions
--- a/AnalyzeExp.ipynb
+++ b/AnalyzeExp.ipynb
@ -0,0 +1,78 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "outputs": [],
+   "source": [
+    "from load import pkl_load"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "Can't get attribute 'simple_cell_model' on <module '__main__'>",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
+      "\u001B[0;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
+      "Input \u001B[0;32mIn [2]\u001B[0m, in \u001B[0;36m<cell line: 1>\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m model_center_fp \u001B[38;5;241m=\u001B[39m \u001B[43mpkl_load\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43msimple_cell_center_fp\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n",
+      "File \u001B[0;32m~/dev/amgen/load.py:6\u001B[0m, in \u001B[0;36mpkl_load\u001B[0;34m(filename)\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mpkl_load\u001B[39m(filename):\n\u001B[1;32m      5\u001B[0m     \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(filename, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mrb\u001B[39m\u001B[38;5;124m'\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m f:\n\u001B[0;32m----> 6\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mpickle\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload\u001B[49m\u001B[43m(\u001B[49m\u001B[43mf\u001B[49m\u001B[43m)\u001B[49m\n",
+      "\u001B[0;31mAttributeError\u001B[0m: Can't get attribute 'simple_cell_model' on <module '__main__'>"
+     ]
+    }
+   ],
+   "source": [
+    "model_center_fp = pkl_load('simple_cell_center_fp')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/Decoded_Orientation.pdf
+++ b/Decoded_Orientation.pdf
--- a/NoisyCircle.ipynb
+++ b/NoisyCircle.ipynb
--- a/TestInteractivity.ipynb
+++ b/TestInteractivity.ipynb
--- a/TuningCurvesFull.ipynb
+++ b/TuningCurvesFull.ipynb
--- a/model/plotting_utils.py
+++ b/model/plotting_utils.py
--- a/model/sym_model.py
+++ b/model/sym_model.py
--- a/model/utils.py
+++ b/model/utils.py
--- a/numerical/init.py
+++ b/numerical/init.py
--- a/numerical/decoding.py
+++ b/numerical/decoding.py
@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+"""
+Use cohomology to decode datasets with circular parameters
+
+Persistent homology from arxiv:1908.02518
+Homological decoding from DOI:10.1007/s00454-011-9344-x and arxiv:1711.07205
+"""
+import math
+import numpy as np
+from scipy.optimize import least_squares
+import pandas as pd
+
+from tqdm import trange
+
+import ripser
+
+from persistence import persistence
+
+EPSILON = 0.0000000000001
+
+
+def shortest_cycle(graph, node2, node1):
+    """
+    Returns the shortest cycle going through an edge
+    
+    Used for computing weights in decode
+    
+    Parameters
+    ----------
+    graph: ndarray (n_nodes, n_nodes)
+        A matrix containing the weights of the edges in the graph
+    node1: int
+        The index of the first node of the edge
+    node2: int
+        The index of the second node of the edge
+
+    Returns
+    -------
+    cycle: list of ints
+        A list of indices representing the nodes of the cycle in order
+    """
+    N = graph.shape[0]
+    distances = np.inf * np.ones(N)
+    distances[node2] = 0
+    prev_nodes = np.zeros(N)
+    prev_nodes[:] = np.nan
+    prev_nodes[node2] = node1
+    while (math.isnan(prev_nodes[node1])):
+        distances_buffer = distances
+        for j in range(N):
+            possible_path_lengths = distances_buffer + graph[:, j]
+            if (np.min(possible_path_lengths) < distances[j]):
+                prev_nodes[j] = np.argmin(possible_path_lengths)
+                distances[j] = np.min(possible_path_lengths)
+    prev_nodes = prev_nodes.astype(int)
+    cycle = [node1]
+    while (cycle[0] != node2):
+        cycle.insert(0, prev_nodes[cycle[0]])
+    cycle.insert(0, node1)
+    return cycle
+
+
+def cohomological_parameterization(X, cocycle_number=1, coeff=2, weighted=False):
+    """
+    Compute an angular parametrization on the data set corresponding to a given
+    1-cycle
+    
+    Parameters
+    ----------
+    X: ndarray(n_datapoints, n_features):
+        Array containing the data
+    cocycle_number: int, optional, default 1
+        An integer specifying the 1-cycle used
+        The n-th most stable 1-cycle is used, where n = cocycle_number
+    coeff: int prime, optional, default 1
+        The coefficient basis in which we compute the cohomology
+    weighted: bool, optional, default False
+        When true use a weighted graph for smoother parameterization
+        as proposed in arxiv:1711.07205
+    
+    Returns
+    -------
+    decoding: ndarray(n_datapoints)
+        The parameterization of the dataset consisting of a number between
+        0 and 1 for each datapoint, to be interpreted modulo 1
+    """
+    # Get the cocycle
+    result = ripser.ripser(X, maxdim=1, coeff=coeff, do_cocycles=True)
+    diagrams = result['dgms']
+    cocycles = result['cocycles']
+    D = result['dperm2all']
+    dgm1 = diagrams[1]
+    idx = np.argsort(dgm1[:, 1] - dgm1[:, 0])[-cocycle_number]
+    cocycle = cocycles[1][idx]
+    persistence(X, homdim=1, coeff=coeff, show_largest_homology=0,
+                Nsubsamples=0, save_path=None, cycle=idx)
+    thresh = dgm1[idx, 1] - EPSILON
+
+    # Compute connectivity
+    N = X.shape[0]
+    connectivity = np.zeros([N, N])
+    for i in range(N):
+        for j in range(i):
+            if D[i, j] <= thresh:
+                connectivity[i, j] = 1
+    cocycle_array = np.zeros([N, N])
+
+    # Lift cocycle
+    for i in range(cocycle.shape[0]):
+        cocycle_array[cocycle[i, 0], cocycle[i, 1]] = (
+                ((cocycle[i, 2] + coeff / 2) % coeff) - coeff / 2
+        )
+
+    # Weights
+    if (weighted):
+        def real_cocycle(x):
+            real_cocycle = (
+                    connectivity * (cocycle_array + np.subtract.outer(x, x))
+            )
+            return np.ravel(real_cocycle)
+
+        # Compute graph
+        x0 = np.zeros(N)
+        res = least_squares(real_cocycle, x0)
+        real_cocyle_array = res.fun
+        real_cocyle_array = real_cocyle_array.reshape(N, N)
+        real_cocyle_array = real_cocyle_array - np.transpose(real_cocyle_array)
+        graph = np.array(real_cocyle_array > 0).astype(float)
+        graph[graph == 0] = np.inf
+        graph = (D + EPSILON) * graph  # Add epsilon to avoid NaNs
+
+        # Compute weights
+        cycle_counts = np.zeros([N, N])
+        iterator = trange(0, N, position=0, leave=True)
+        iterator.set_description("Computing weights for decoding")
+        for i in iterator:
+            for j in range(N):
+                if (graph[i, j] != np.inf):
+                    cycle = shortest_cycle(graph, j, i)
+                    for k in range(len(cycle) - 1):
+                        cycle_counts[cycle[k], cycle[k + 1]] += 1
+
+        weights = cycle_counts / (D + EPSILON) ** 2
+        weights = np.sqrt(weights)
+    else:
+        weights = np.outer(np.ones(N), np.ones(N))
+
+    def real_cocycle(x):
+        real_cocycle = (
+                weights * connectivity * (cocycle_array + np.subtract.outer(x, x))
+        )
+        return np.ravel(real_cocycle)
+
+    # Smooth cocycle
+    print("Decoding...", end=" ")
+    x0 = np.zeros(N)
+    res = least_squares(real_cocycle, x0)
+    decoding = res.x
+    decoding = np.mod(decoding, 1)
+    print("done")
+
+    decoding = pd.DataFrame(decoding, columns=["decoding"])
+    decoding = decoding.set_index(X.index)
+    return decoding
+
+
+def remove_feature(X, decoding, shift=0, cut_amplitude=1.0):
+    """
+    Removes a decoded feature from a dataset by making a cut at a fixed value
+    of the decoding
+
+    Parameters
+    ----------
+    X: dataframe(n_datapoints, n_features):
+        Array containing the data
+    decoding : dataframe(n_datapoints)
+        The decoded feature, assumed to be angular with periodicity 1
+    shift : float between 0 and 1, optional, default 0
+        The location of the cut
+    cut_amplitude : float, optional, default 1
+        Amplitude of the cut
+    """
+    cuts = np.zeros(X.shape)
+    decoding = decoding.to_numpy()[:, 0]
+    for i in range(X.shape[1]):
+        effective_amplitude = cut_amplitude * (np.max(X[i]) - np.min(X[i]))
+        cuts[:, i] = effective_amplitude * ((decoding - shift) % 1)
+    reduced_data = X + cuts
+    return reduced_data
--- a/numerical/decorators.py
+++ b/numerical/decorators.py
--- a/numerical/dimension.py
+++ b/numerical/dimension.py
--- a/numerical/example.py
+++ b/numerical/example.py
--- a/numerical/gratings.py
+++ b/numerical/gratings.py
--- a/numerical/load.py
+++ b/numerical/load.py
--- a/numerical/noisereduction.py
+++ b/numerical/noisereduction.py
--- a/numerical/persistence.py
+++ b/numerical/persistence.py
@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+"""
+Tools to compute persistence diagrams
+
+Persistent homology from ripser and gudhi library
+Confidence sets from arxiv:1303.7117
+"""
+import numpy as np
+from scipy.spatial.distance import directed_hausdorff
+
+import matplotlib.pyplot as plt
+
+from tqdm import trange
+
+import ripser
+from persim import plot_diagrams
+import gudhi
+
+from decorators import multi_input
+
+    
+def hausdorff(data1, data2, homdim, coeff):
+    """Hausdorff metric between two persistence diagrams"""
+    dgm1 = (ripser.ripser(data1,maxdim=homdim,coeff=coeff))['dgms']
+    dgm2 = (ripser.ripser(data2,maxdim=homdim,coeff=coeff))['dgms']
+    distance = directed_hausdorff(dgm1[homdim], dgm2[homdim])[0]
+    return distance
+
+@multi_input
+def confidence(X, alpha=0.05, Nsubsamples=20, homdim=1, coeff=2):
+    """
+    Compute the confidence interval of the persistence diagram of a dataset
+    
+    Computation done by subsampling as in arxiv:1303.7117
+    
+    Parameters
+    ----------
+    X: dataframe(n_datapoints, n_features):
+        Dataframe containing the data
+    alpha : float between 0 and 1, optional, default 0.05
+        1-alpha is the confidence
+    Nsubsamples : int, optional, default 20
+        The number of subsamples
+    homdim : int, optional, default 1
+        The dimension of the homology
+    coeff : int prime, optional, default 2
+        The coefficient basis
+    """
+    N = X.shape[0]
+    distances = np.zeros(Nsubsamples)
+    iterator = trange(0, Nsubsamples, position=0, leave=True)
+    iterator.set_description("Computing confidence interval")
+    for i in iterator:
+        subsample = X.iloc[np.random.choice(N, N, replace=True)]
+        distances[i] = hausdorff(X, subsample, homdim, coeff)
+    distances.sort()
+    confidence = np.sqrt(2) * 2 * distances[int(alpha*Nsubsamples)]
+    return confidence
+
+@multi_input
+def persistence(X, homdim=1, coeff=2, threshold=float('inf'),
+                show_largest_homology=0, distance_matrix=False, Nsubsamples=0,
+                alpha=0.05, cycle=None, save_path=None):
+    """
+    Plot the persistence diagram of a dataset using ripser
+
+    Also prints the five largest homology components
+    
+    Parameters
+    ----------
+    X: dataframe(n_datapoints, n_features):
+        Dataframe containing the data
+    homdim : int, optional, default 1
+        The dimension of the homology
+    coeff : int prime, optional, default 2
+        The coefficient basis
+    threshold : float, optional, default infinity
+        The maximum distance in the filtration
+    show_largest_homology: int, optional, default 0
+        Print this many of the largest homology components
+    distance_matrix : bool, optional, default False
+        When true X will be interepreted as a distance matrix
+    Nsubsamples : int, optional, default 0
+        The number of subsamples used in computing the confidence interval
+        Does not compute the confidence interval when this is 0
+    alpha : float between 0 and 1, optional, default 0.05
+        1-alpha is the confidence
+    cycle : int, optional, default None
+        If given highlight the homology component in the plot corresponding to
+        this cycle id
+    save_path : str, optional, default None
+        When given save the plot here
+    """
+    result = ripser.ripser(X, maxdim=homdim, coeff=coeff, do_cocycles=True,
+                           distance_matrix=distance_matrix, thresh=threshold)
+    diagrams = result['dgms']
+    plot_diagrams(diagrams, show=False)
+    if (Nsubsamples>0):
+        conf = confidence(X, alpha, Nsubsamples, homdim, 2)
+        line_length = 10000
+        plt.plot([0, line_length], [conf, line_length + conf], color='green',
+                 linestyle='dashed',linewidth=2)
+    if cycle is not None:
+        dgm1 = diagrams[1]
+        plt.scatter(dgm1[cycle, 0], dgm1[cycle, 1], 20, 'k', 'x')
+    if save_path is not None:
+        path = save_path + 'Z' + str(coeff)
+        if (Nsubsamples>0):
+            path += '_confidence' + str(1-alpha)
+        path += '.png'
+        plt.savefig(path)
+    plt.show()
+    
+    if show_largest_homology != 0:
+        dgm = diagrams[homdim]
+        largest_indices = np.argsort(dgm[:, 0] - dgm[:, 1])
+        largest_components = dgm[largest_indices[:show_largest_homology]]
+        print(f"Largest {homdim}-homology components:")
+        print(largest_components)
+    return
+
+@multi_input
+def persistence_witness(X, number_of_landmarks=100, max_alpha_square=0.0,
+                        homdim=1):
+    """
+    Plot the persistence diagram of a dataset using gudhi
+
+    Uses a witness complex allowing it to be used on larger datasets
+    
+    Parameters
+    ----------
+    X: dataframe(n_datapoints, n_features):
+        Dataframe containing the data
+    number_of_landmarks : int, optional, default 100
+        The number of landmarks in the witness complex
+    max_alpha_square : double, optional, default 0.0
+        Maximal squared relaxation parameter
+    homdim : int, optional, default 1
+        The dimension of the homology
+    """
+    print("Sampling landmarks...", end=" ")
+    
+    witnesses = X.to_numpy()
+    landmarks = gudhi.pick_n_random_points(
+        points=witnesses, nb_points=number_of_landmarks
+    )
+    print("done")
+    message = (
+        "EuclideanStrongWitnessComplex with max_edge_length="
+        + repr(max_alpha_square)
+        + " - Number of landmarks="
+        + repr(number_of_landmarks)
+    )
+    print(message)
+    witness_complex = gudhi.EuclideanStrongWitnessComplex(
+        witnesses=witnesses, landmarks=landmarks
+    )
+    simplex_tree = witness_complex.create_simplex_tree(
+        max_alpha_square=max_alpha_square,
+        limit_dimension=homdim
+    )
+    message = "Number of simplices=" + repr(simplex_tree.num_simplices())
+    print(message)
+    diag = simplex_tree.persistence()
+    print("betti_numbers()=")
+    print(simplex_tree.betti_numbers())
+    gudhi.plot_persistence_diagram(diag, band=0.0)
+    plt.show()
+    return
--- a/numerical/plotting.py
+++ b/numerical/plotting.py