from collections import deque, defaultdict import logging from cfg import config import sql logger = logging.getLogger(__name__) class DijkstraHelper(object): def __init__(self, nodes, connections): self._nodes = {node: float("inf") for node in nodes} self._connections = connections self._todo_in_next_step = set() @classmethod def from_db(cls, connection): cursor = connection.cursor() cursor.execute(sql.statements["get_all_page_ids"]) nodes = [n[0] for n in cursor.fetchall()] connections = defaultdict(list) cursor.execute(sql.statements["get_links"]) for source, destination in cursor: connections[source].append(destination) return cls(nodes, connections) def dijkstra(self, root): self._nodes[root] = 0 self.recursive_dijkstra([root], 1) def recursive_dijkstra(self, todos, depth): if(not todos): return logger.info("recursive_dijkstra(<{} nodes>, {})".format(len(todos), depth)) next_todos = deque() for todo in todos: next_todos.extend(self.dijkstra_one(todo, depth)) self.recursive_dijkstra(next_todos, depth + 1) def dijkstra_one(self, node, depth): for neighbor in self._connections[node]: if(self._nodes[neighbor] <= depth): continue self._nodes[neighbor] = depth yield neighbor def write_back(self, connection): cursor = connection.cursor() cursor.execute(sql.statements["delete_dijkstra"]) def sqlize(v): if(config["use_sqlite"]): return v if(v == float("inf")): return 2147483647 cursor.executemany(sql.statements["insert_dijkstra_values"], [(k, sqlize(v)) for k,v in self._nodes.items()]) connection.commit()