Clean up the repo

2 years ago · ab3c64ff03
18 changed files with 436 additions and 1229 deletions
--- a/AnalyzeExp.ipynb
+++ b/AnalyzeExp.ipynb
@ -0,0 +1,78 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [],
   "source": [
    "from load import pkl_load"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "ename": "AttributeError",
     "evalue": "Can't get attribute 'simple_cell_model' on <module '__main__'>",
     "output_type": "error",
     "traceback": [
      "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
      "\u001B[0;31mAttributeError\u001B[0m                            Traceback (most recent call last)",
      "Input \u001B[0;32mIn [2]\u001B[0m, in \u001B[0;36m<cell line: 1>\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0m model_center_fp \u001B[38;5;241m=\u001B[39m \u001B[43mpkl_load\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43msimple_cell_center_fp\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m)\u001B[49m\n",
      "File \u001B[0;32m~/dev/amgen/load.py:6\u001B[0m, in \u001B[0;36mpkl_load\u001B[0;34m(filename)\u001B[0m\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mpkl_load\u001B[39m(filename):\n\u001B[1;32m      5\u001B[0m     \u001B[38;5;28;01mwith\u001B[39;00m \u001B[38;5;28mopen\u001B[39m(filename, \u001B[38;5;124m'\u001B[39m\u001B[38;5;124mrb\u001B[39m\u001B[38;5;124m'\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m f:\n\u001B[0;32m----> 6\u001B[0m         \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mpickle\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mload\u001B[49m\u001B[43m(\u001B[49m\u001B[43mf\u001B[49m\u001B[43m)\u001B[49m\n",
      "\u001B[0;31mAttributeError\u001B[0m: Can't get attribute 'simple_cell_model' on <module '__main__'>"
     ]
    }
   ],
   "source": [
    "model_center_fp = pkl_load('simple_cell_center_fp')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/Decoded_Orientation.pdf
+++ b/Decoded_Orientation.pdf
--- a/NoisyCircle.ipynb
+++ b/NoisyCircle.ipynb
--- a/TestInteractivity.ipynb
+++ b/TestInteractivity.ipynb
--- a/TuningCurvesFull.ipynb
+++ b/TuningCurvesFull.ipynb
--- a/model/plotting_utils.py
+++ b/model/plotting_utils.py
--- a/model/sym_model.py
+++ b/model/sym_model.py
--- a/model/utils.py
+++ b/model/utils.py
--- a/numerical/init.py
+++ b/numerical/init.py
--- a/numerical/decoding.py
+++ b/numerical/decoding.py
@ -0,0 +1,189 @@
 # -*- coding: utf-8 -*-
 """
 Use cohomology to decode datasets with circular parameters
 Persistent homology from arxiv:1908.02518
 Homological decoding from DOI:10.1007/s00454-011-9344-x and arxiv:1711.07205
 """
 import math
 import numpy as np
 from scipy.optimize import least_squares
 import pandas as pd
 from tqdm import trange
 import ripser
 from persistence import persistence
 EPSILON = 0.0000000000001
 def shortest_cycle(graph, node2, node1):
    """
    Returns the shortest cycle going through an edge
    Used for computing weights in decode
    Parameters
    ----------
    graph: ndarray (n_nodes, n_nodes)
        A matrix containing the weights of the edges in the graph
    node1: int
        The index of the first node of the edge
    node2: int
        The index of the second node of the edge
    Returns
    -------
    cycle: list of ints
        A list of indices representing the nodes of the cycle in order
    """
    N = graph.shape[0]
    distances = np.inf * np.ones(N)
    distances[node2] = 0
    prev_nodes = np.zeros(N)
    prev_nodes[:] = np.nan
    prev_nodes[node2] = node1
    while (math.isnan(prev_nodes[node1])):
        distances_buffer = distances
        for j in range(N):
            possible_path_lengths = distances_buffer + graph[:, j]
            if (np.min(possible_path_lengths) < distances[j]):
                prev_nodes[j] = np.argmin(possible_path_lengths)
                distances[j] = np.min(possible_path_lengths)
    prev_nodes = prev_nodes.astype(int)
    cycle = [node1]
    while (cycle[0] != node2):
        cycle.insert(0, prev_nodes[cycle[0]])
    cycle.insert(0, node1)
    return cycle
 def cohomological_parameterization(X, cocycle_number=1, coeff=2, weighted=False):
    """
    Compute an angular parametrization on the data set corresponding to a given
    1-cycle
    Parameters
    ----------
    X: ndarray(n_datapoints, n_features):
        Array containing the data
    cocycle_number: int, optional, default 1
        An integer specifying the 1-cycle used
        The n-th most stable 1-cycle is used, where n = cocycle_number
    coeff: int prime, optional, default 1
        The coefficient basis in which we compute the cohomology
    weighted: bool, optional, default False
        When true use a weighted graph for smoother parameterization
        as proposed in arxiv:1711.07205
    Returns
    -------
    decoding: ndarray(n_datapoints)
        The parameterization of the dataset consisting of a number between
        0 and 1 for each datapoint, to be interpreted modulo 1
    """
    # Get the cocycle
    result = ripser.ripser(X, maxdim=1, coeff=coeff, do_cocycles=True)
    diagrams = result['dgms']
    cocycles = result['cocycles']
    D = result['dperm2all']
    dgm1 = diagrams[1]
    idx = np.argsort(dgm1[:, 1] - dgm1[:, 0])[-cocycle_number]
    cocycle = cocycles[1][idx]
    persistence(X, homdim=1, coeff=coeff, show_largest_homology=0,
                Nsubsamples=0, save_path=None, cycle=idx)
    thresh = dgm1[idx, 1] - EPSILON
    # Compute connectivity
    N = X.shape[0]
    connectivity = np.zeros([N, N])
    for i in range(N):
        for j in range(i):
            if D[i, j] <= thresh:
                connectivity[i, j] = 1
    cocycle_array = np.zeros([N, N])
    # Lift cocycle
    for i in range(cocycle.shape[0]):
        cocycle_array[cocycle[i, 0], cocycle[i, 1]] = (
                ((cocycle[i, 2] + coeff / 2) % coeff) - coeff / 2
        )
    # Weights
    if (weighted):
        def real_cocycle(x):
            real_cocycle = (
                    connectivity * (cocycle_array + np.subtract.outer(x, x))
            )
            return np.ravel(real_cocycle)
        # Compute graph
        x0 = np.zeros(N)
        res = least_squares(real_cocycle, x0)
        real_cocyle_array = res.fun
        real_cocyle_array = real_cocyle_array.reshape(N, N)
        real_cocyle_array = real_cocyle_array - np.transpose(real_cocyle_array)
        graph = np.array(real_cocyle_array > 0).astype(float)
        graph[graph == 0] = np.inf
        graph = (D + EPSILON) * graph  # Add epsilon to avoid NaNs
        # Compute weights
        cycle_counts = np.zeros([N, N])
        iterator = trange(0, N, position=0, leave=True)
        iterator.set_description("Computing weights for decoding")
        for i in iterator:
            for j in range(N):
                if (graph[i, j] != np.inf):
                    cycle = shortest_cycle(graph, j, i)
                    for k in range(len(cycle) - 1):
                        cycle_counts[cycle[k], cycle[k + 1]] += 1
        weights = cycle_counts / (D + EPSILON) ** 2
        weights = np.sqrt(weights)
    else:
        weights = np.outer(np.ones(N), np.ones(N))
    def real_cocycle(x):
        real_cocycle = (
                weights * connectivity * (cocycle_array + np.subtract.outer(x, x))
        )
        return np.ravel(real_cocycle)
    # Smooth cocycle
    print("Decoding...", end=" ")
    x0 = np.zeros(N)
    res = least_squares(real_cocycle, x0)
    decoding = res.x
    decoding = np.mod(decoding, 1)
    print("done")
    decoding = pd.DataFrame(decoding, columns=["decoding"])
    decoding = decoding.set_index(X.index)
    return decoding
 def remove_feature(X, decoding, shift=0, cut_amplitude=1.0):
    """
    Removes a decoded feature from a dataset by making a cut at a fixed value
    of the decoding
    Parameters
    ----------
    X: dataframe(n_datapoints, n_features):
        Array containing the data
    decoding : dataframe(n_datapoints)
        The decoded feature, assumed to be angular with periodicity 1
    shift : float between 0 and 1, optional, default 0
        The location of the cut
    cut_amplitude : float, optional, default 1
        Amplitude of the cut
    """
    cuts = np.zeros(X.shape)
    decoding = decoding.to_numpy()[:, 0]
    for i in range(X.shape[1]):
        effective_amplitude = cut_amplitude * (np.max(X[i]) - np.min(X[i]))
        cuts[:, i] = effective_amplitude * ((decoding - shift) % 1)
    reduced_data = X + cuts
    return reduced_data
--- a/numerical/decorators.py
+++ b/numerical/decorators.py
--- a/numerical/dimension.py
+++ b/numerical/dimension.py
--- a/numerical/example.py
+++ b/numerical/example.py
--- a/numerical/gratings.py
+++ b/numerical/gratings.py
--- a/numerical/load.py
+++ b/numerical/load.py
--- a/numerical/noisereduction.py
+++ b/numerical/noisereduction.py
--- a/numerical/persistence.py
+++ b/numerical/persistence.py
@ -0,0 +1,169 @@
 # -*- coding: utf-8 -*-
 """
 Tools to compute persistence diagrams
 Persistent homology from ripser and gudhi library
 Confidence sets from arxiv:1303.7117
 """
 import numpy as np
 from scipy.spatial.distance import directed_hausdorff
 import matplotlib.pyplot as plt
 from tqdm import trange
 import ripser
 from persim import plot_diagrams
 import gudhi
 from decorators import multi_input
 def hausdorff(data1, data2, homdim, coeff):
    """Hausdorff metric between two persistence diagrams"""
    dgm1 = (ripser.ripser(data1,maxdim=homdim,coeff=coeff))['dgms']
    dgm2 = (ripser.ripser(data2,maxdim=homdim,coeff=coeff))['dgms']
    distance = directed_hausdorff(dgm1[homdim], dgm2[homdim])[0]
    return distance
@multi_input
 def confidence(X, alpha=0.05, Nsubsamples=20, homdim=1, coeff=2):
    """
    Compute the confidence interval of the persistence diagram of a dataset
    Computation done by subsampling as in arxiv:1303.7117
    Parameters
    ----------
    X: dataframe(n_datapoints, n_features):
        Dataframe containing the data
    alpha : float between 0 and 1, optional, default 0.05
        1-alpha is the confidence
    Nsubsamples : int, optional, default 20
        The number of subsamples
    homdim : int, optional, default 1
        The dimension of the homology
    coeff : int prime, optional, default 2
        The coefficient basis
    """
    N = X.shape[0]
    distances = np.zeros(Nsubsamples)
    iterator = trange(0, Nsubsamples, position=0, leave=True)
    iterator.set_description("Computing confidence interval")
    for i in iterator:
        subsample = X.iloc[np.random.choice(N, N, replace=True)]
        distances[i] = hausdorff(X, subsample, homdim, coeff)
    distances.sort()
    confidence = np.sqrt(2) * 2 * distances[int(alpha*Nsubsamples)]
    return confidence
@multi_input
 def persistence(X, homdim=1, coeff=2, threshold=float('inf'),
                show_largest_homology=0, distance_matrix=False, Nsubsamples=0,
                alpha=0.05, cycle=None, save_path=None):
    """
    Plot the persistence diagram of a dataset using ripser
    Also prints the five largest homology components
    Parameters
    ----------
    X: dataframe(n_datapoints, n_features):
        Dataframe containing the data
    homdim : int, optional, default 1
        The dimension of the homology
    coeff : int prime, optional, default 2
        The coefficient basis
    threshold : float, optional, default infinity
        The maximum distance in the filtration
    show_largest_homology: int, optional, default 0
        Print this many of the largest homology components
    distance_matrix : bool, optional, default False
        When true X will be interepreted as a distance matrix
    Nsubsamples : int, optional, default 0
        The number of subsamples used in computing the confidence interval
        Does not compute the confidence interval when this is 0
    alpha : float between 0 and 1, optional, default 0.05
        1-alpha is the confidence
    cycle : int, optional, default None
        If given highlight the homology component in the plot corresponding to
        this cycle id
    save_path : str, optional, default None
        When given save the plot here
    """
    result = ripser.ripser(X, maxdim=homdim, coeff=coeff, do_cocycles=True,
                           distance_matrix=distance_matrix, thresh=threshold)
    diagrams = result['dgms']
    plot_diagrams(diagrams, show=False)
    if (Nsubsamples>0):
        conf = confidence(X, alpha, Nsubsamples, homdim, 2)
        line_length = 10000
        plt.plot([0, line_length], [conf, line_length + conf], color='green',
                 linestyle='dashed',linewidth=2)
    if cycle is not None:
        dgm1 = diagrams[1]
        plt.scatter(dgm1[cycle, 0], dgm1[cycle, 1], 20, 'k', 'x')
    if save_path is not None:
        path = save_path + 'Z' + str(coeff)
        if (Nsubsamples>0):
            path += '_confidence' + str(1-alpha)
        path += '.png'
        plt.savefig(path)
    plt.show()
    if show_largest_homology != 0:
        dgm = diagrams[homdim]
        largest_indices = np.argsort(dgm[:, 0] - dgm[:, 1])
        largest_components = dgm[largest_indices[:show_largest_homology]]
        print(f"Largest {homdim}-homology components:")
        print(largest_components)
    return
@multi_input
 def persistence_witness(X, number_of_landmarks=100, max_alpha_square=0.0,
                        homdim=1):
    """
    Plot the persistence diagram of a dataset using gudhi
    Uses a witness complex allowing it to be used on larger datasets
    Parameters
    ----------
    X: dataframe(n_datapoints, n_features):
        Dataframe containing the data
    number_of_landmarks : int, optional, default 100
        The number of landmarks in the witness complex
    max_alpha_square : double, optional, default 0.0
        Maximal squared relaxation parameter
    homdim : int, optional, default 1
        The dimension of the homology
    """
    print("Sampling landmarks...", end=" ")
    witnesses = X.to_numpy()
    landmarks = gudhi.pick_n_random_points(
        points=witnesses, nb_points=number_of_landmarks
    )
    print("done")
    message = (
        "EuclideanStrongWitnessComplex with max_edge_length="
        + repr(max_alpha_square)
        + " - Number of landmarks="
        + repr(number_of_landmarks)
    )
    print(message)
    witness_complex = gudhi.EuclideanStrongWitnessComplex(
        witnesses=witnesses, landmarks=landmarks
    )
    simplex_tree = witness_complex.create_simplex_tree(
        max_alpha_square=max_alpha_square,
        limit_dimension=homdim
    )
    message = "Number of simplices=" + repr(simplex_tree.num_simplices())
    print(message)
    diag = simplex_tree.persistence()
    print("betti_numbers()=")
    print(simplex_tree.betti_numbers())
    gudhi.plot_persistence_diagram(diag, band=0.0)
    plt.show()
    return
--- a/numerical/plotting.py
+++ b/numerical/plotting.py