from collections import deque, defaultdict import logging from cfg import config logger = logging.getLogger(__name__) class DijkstraHelper(object): def __init__(self, nodes, connections): self._nodes = {node: float("inf") for node in nodes} self._connections = connections self._todo_in_next_step = set() @classmethod def from_db(cls, connection): cursor = connection.cursor() cursor.execute("SELECT page_id FROM pages") nodes = [n[0] for n in cursor.fetchall()] connections = defaultdict(list) cursor.execute("SELECT source, destination FROM links") for source, destination in cursor: connections[source].append(destination) return cls(nodes, connections) def dijkstra(self, root): self.recursive_dijkstra([root], 0) def recursive_dijkstra(self, todos, depth): if(not todos): return logger.info("recursive_dijkstra(<{} nodes>, {})".format(len(todos), depth)) next_todos = deque() for todo in todos: next_todos.extend(self.dijkstra_one(todo, depth)) self.recursive_dijkstra(next_todos, depth + 1) def dijkstra_one(self, node, depth): for neighbor in self._connections[node]: if(self._nodes[neighbor] <= depth): continue self._nodes[neighbor] = depth yield neighbor def write_back(self, connection): cursor = connection.cursor() cursor.execute("DELETE FROM dijkstra_helper") cursor.executemany("INSERT INTO dijkstra_helper(page, value) VALUES(%s, %s)", list(self._nodes.items()))