"""
From the paper:
Evolutionary Clustering and
Community Detection Algorithms for Social Media Health Surveillance
Kyle Spurlock, Tanner Bogart, Heba Elgazzar
2020
Version 1.2 of an Evolutionary adaptation of the Louvain Method
Example
-------
df = pd.read_csv(r"encoded_twitter_dataset.csv")
X = df.iloc[:200,[3,4,5]]
t_labels = np.unique(X['Time'])
evo8 = EvoLouvain()
evo8.callLouvain(X, t_labels, alpha = .8, save_plot = "path/")
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import pairwise_distances
import community as community_louvain
import networkx as nx
[docs]class EvoLouvain():
"""Implementation of Evolutionary Louvain Method
Notes
-----
Wraps dynamic smoothing around community_louvain by Thomas Aynaud
Attributes
----------
modularities_ : list
stores modularities per generation
"""
def __init__(self):
self.modularities_ = [] # Modularities per timesteps
[docs] def showPlot(self, current_gen, time, partition, modularity, alpha,
save_plot = None):
"""Performs plotting of clusters at generation"""
# Plot partitions
plt.scatter(current_gen[:,0], current_gen[:,1],
c = list(partition.values()), cmap = 'tab20')
# Create graph title with params
title = 'Louvain Generation-{g} Alpha-{a} Modularity-{m}'.format(g = time,
a = alpha,
m = "%.5f" % modularity)
plt.title(title)
if not save_plot == None:
# Save fig as PNG
plt.savefig('{p}{t}.png'.format(p=save_plot, t = title))
plt.show() # Show fig
[docs] def applySmoothing(self, mat1, mat2, alpha):
""""Adds and applies smoothing effect to dist matrices"""
mat1, mat2 = (1-alpha)*mat1, alpha*mat2
ysize, xsize = mat2.shape # Get shape of second matrix-1
mat1[0:ysize, 0:xsize] += mat2 # Add elements of matrices on index (0,0)
return mat1
[docs] def sparsify(self, dist):
"""Introducing sparsity into distance matrix, loose similarity"""
dist[(dist < 10) & (dist != 0)] = 3
dist[(dist > 10) & (dist < 20)] = 2
dist[(dist > 20) & (dist < 30)] = 1
dist[dist > 30] = 0
return dist
[docs] def callLouvain(self, X, times, alpha, show_mod = False, plot_gens = None,
save_plot = None):
"""Perform Evolutionary Community Detection through the Louvain
Method
Parameters
----------
X : pd.DataFrame
Dataframe of tabular data samples
times : list
List containing times for X samples
alpha : float
Parameter used to modulate epsilon by snapshot vs. history
show_mod : bool, optional
Verbose for comptued modularity values
plot_gens : list, optional
Generations to show as plots
save_plot : str, optional
Directory path to save plot as image
Returns
-------
None
"""
previous_gen = None
seen_times = []
t_intervals = np.unique(times)
if plot_gens == None:
# Default generations to plot
plot_gens = [int(np.median(t_intervals)/2), # Quarter 1
int(np.median(t_intervals)), # Middle
t_intervals[-1]] # End
for time in t_intervals:
seen_times.append(time)
current_gen = X.loc[X['Time'].isin(seen_times)]
current_gen = current_gen.iloc[:,[0,1]].values
# Calculate the distance matrix of points
dist_cs = pairwise_distances(current_gen)
# Remove distant links and scale
dist_cs = self.sparsify(dist_cs)
if not time == 0:
# Time must be > 1 to consider history
dist_cs = self.applySmoothing(dist_cs, previous_gen, alpha)
# Get NetworkX graph
G = nx.from_numpy_matrix(dist_cs)
# Find the best partition at time step
partition = community_louvain.best_partition(G)
# Find the modularity
modularity = 0
try:
modularity = community_louvain.modularity(partition, G)
except(ValueError) as e:
# If G has incomplete linkage (due to sparsification)
# it will throw a ValueError
print(e)
print("Can't compute modularity for this partition.")
if (show_mod):
print('Generation {g} modularity: {m}'
.format(g = self.current_time, m = modularity))
# Save modularities for visualization
self.modularities_.append(modularity)
seen_times.append(time)
# Save previous generation as histroy
previous_gen = dist_cs
# Plotting
if time in plot_gens:
self.showPlot(current_gen, time, partition, modularity, alpha,
save_plot=save_plot)