everything is working nicely

This commit is contained in:
Daniel Knüttel 2019-02-21 17:14:17 +01:00
parent 8598a9a64a
commit d2ad5bb1d2
4 changed files with 34 additions and 20 deletions

View File

@ -19,13 +19,13 @@ def shortest_path(center, title, connection):
center_page = get_page_id(center, connection) center_page = get_page_id(center, connection)
path = deque() path = deque()
while(current_page != center_page): while(current_page != center_page):
path.append(current_title) path.append(current_page)
cursor.execute('''SELECT links.source cursor.execute('''SELECT links.source
FROM links FROM links
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
WHERE links.destination=:page WHERE links.destination=%s
ORDER BY dijkstra_helper.value ASC ORDER BY dijkstra_helper.value ASC
LIMIT 1''', {"page": current_page}) LIMIT 1''', (current_page,))
current_title = cursor.fetchone()[0] current_page = cursor.fetchone()[0]
return list(reversed(path)) return list(reversed(path))

View File

@ -5,12 +5,15 @@ from db_util import get_page_id
def prepare_dijkstra(connection): def prepare_dijkstra(connection):
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page) if(config["use_sqlite"]):
SELECT destination FROM links cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
''') SELECT rowid FROM pages
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page) ''')
SELECT source FROM links else:
''') cursor.execute('''INSERT IGNORE INTO dijkstra_helper(page)
SELECT page_id FROM pages
''')
if(config["use_sqlite"]): if(config["use_sqlite"]):
cursor.execute("UPDATE dijkstra_helper SET value=1e1000") cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
@ -20,24 +23,24 @@ def prepare_dijkstra(connection):
def dijkstra_one(page, value, connection): def dijkstra_one(page, value, connection):
cursor = connection.cursor() cursor = connection.cursor()
if(isinstance(title, tuple)): if(isinstance(page, tuple)):
# Idk why this happens. # Idk why this happens.
title = title[0] title = title[0]
cursor.execute('''SELECT page cursor.execute('''SELECT page
FROM dijkstra_helper FROM dijkstra_helper
LEFT JOIN links ON links.destination=dijkstra_helper.page LEFT JOIN links ON links.destination=dijkstra_helper.page
WHERE links.source=:page WHERE links.source=%s
AND dijkstra_helper.value>:value''', {"pate": page, "value": value + 1}) AND dijkstra_helper.value>%s''', (page, value + 1))
# This is the list of nodes that have to be updated # This is the list of nodes that have to be updated
result = cursor.fetchall() result = cursor.fetchall()
cursor.execute('''UPDATE dijkstra_helper cursor.execute('''UPDATE dijkstra_helper
SET value=:value SET value=%s
WHERE name IN ( WHERE page IN (
SELECT destination SELECT destination
FROM links FROM links
WHERE source=:page) WHERE source=%s)
AND dijkstra_helper.value>:value''', {"value": value + 1, "page": page}) AND dijkstra_helper.value>%s''', (value + 1, page, value + 1))
connection.commit() connection.commit()
return result return result
@ -55,7 +58,7 @@ def recursive_dijkstra(titles, value, connection):
def dijkstra(title, connection): def dijkstra(title, connection):
page = get_page_id(title, connection) page = get_page_id(title, connection)
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE name=%s", (page,)) cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE page=%s", (page,))
todos = dijkstra_one(page, 1, connection) todos = dijkstra_one(page, 1, connection)
recursive_dijkstra(todos, 2, connection) recursive_dijkstra(todos, 2, connection)

View File

@ -8,6 +8,9 @@ from receive import receive_links, receive_link_graph
from dijkstra import prepare_dijkstra, dijkstra from dijkstra import prepare_dijkstra, dijkstra
from connectivity import shortest_path from connectivity import shortest_path
from graph import DijkstraHelper
from db_util import get_page_id
cache = get_cache("./cache/", "Angela_Merkel") cache = get_cache("./cache/", "Angela_Merkel")
receive_link_graph("Angela_Merkel", cache, 2) receive_link_graph("Angela_Merkel", cache, 2)
@ -17,5 +20,13 @@ print(cursor.fetchall())
#prepare_dijkstra(cache) #prepare_dijkstra(cache)
#dijkstra("Angela_Merkel", cache) #dijkstra("Angela_Merkel", cache)
#
#print(shortest_path("Angela_Merkel", "Germany", cache))
print(shortest_path("Angela_Merkel", "Germany", cache))
angela = get_page_id("Angela_Merkel", cache)
dijkstra = DijkstraHelper.from_db(cache)
dijkstra.dijkstra(angela)
dijkstra.write_back(cache)
print(dijkstra._nodes)

View File

@ -38,7 +38,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None):
(time.time(), i)) (time.time(), i))
continue continue
# Be nice to Wikipedia. # Be nice to Wikipedia.
time.sleep(0.3) time.sleep(0.1)
return response.json() return response.json()
raise NoMoreProxiesException("No more proxies left") raise NoMoreProxiesException("No more proxies left")