everything is working nicely

gol
Daniel Knüttel 2019-02-21 17:14:17 +01:00
parent 8598a9a64a
commit d2ad5bb1d2
4 changed files with 34 additions and 20 deletions

View File

@ -19,13 +19,13 @@ def shortest_path(center, title, connection):
center_page = get_page_id(center, connection)
path = deque()
while(current_page != center_page):
path.append(current_title)
path.append(current_page)
cursor.execute('''SELECT links.source
FROM links
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
WHERE links.destination=:page
WHERE links.destination=%s
ORDER BY dijkstra_helper.value ASC
LIMIT 1''', {"page": current_page})
current_title = cursor.fetchone()[0]
LIMIT 1''', (current_page,))
current_page = cursor.fetchone()[0]
return list(reversed(path))

View File

@ -5,12 +5,15 @@ from db_util import get_page_id
def prepare_dijkstra(connection):
cursor = connection.cursor()
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
SELECT destination FROM links
''')
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
SELECT source FROM links
''')
if(config["use_sqlite"]):
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
SELECT rowid FROM pages
''')
else:
cursor.execute('''INSERT IGNORE INTO dijkstra_helper(page)
SELECT page_id FROM pages
''')
if(config["use_sqlite"]):
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
@ -20,24 +23,24 @@ def prepare_dijkstra(connection):
def dijkstra_one(page, value, connection):
cursor = connection.cursor()
if(isinstance(title, tuple)):
if(isinstance(page, tuple)):
# Idk why this happens.
title = title[0]
cursor.execute('''SELECT page
FROM dijkstra_helper
LEFT JOIN links ON links.destination=dijkstra_helper.page
WHERE links.source=:page
AND dijkstra_helper.value>:value''', {"pate": page, "value": value + 1})
WHERE links.source=%s
AND dijkstra_helper.value>%s''', (page, value + 1))
# This is the list of nodes that have to be updated
result = cursor.fetchall()
cursor.execute('''UPDATE dijkstra_helper
SET value=:value
WHERE name IN (
SET value=%s
WHERE page IN (
SELECT destination
FROM links
WHERE source=:page)
AND dijkstra_helper.value>:value''', {"value": value + 1, "page": page})
WHERE source=%s)
AND dijkstra_helper.value>%s''', (value + 1, page, value + 1))
connection.commit()
return result
@ -55,7 +58,7 @@ def recursive_dijkstra(titles, value, connection):
def dijkstra(title, connection):
page = get_page_id(title, connection)
cursor = connection.cursor()
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE name=%s", (page,))
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE page=%s", (page,))
todos = dijkstra_one(page, 1, connection)
recursive_dijkstra(todos, 2, connection)

View File

@ -8,6 +8,9 @@ from receive import receive_links, receive_link_graph
from dijkstra import prepare_dijkstra, dijkstra
from connectivity import shortest_path
from graph import DijkstraHelper
from db_util import get_page_id
cache = get_cache("./cache/", "Angela_Merkel")
receive_link_graph("Angela_Merkel", cache, 2)
@ -17,5 +20,13 @@ print(cursor.fetchall())
#prepare_dijkstra(cache)
#dijkstra("Angela_Merkel", cache)
#
#print(shortest_path("Angela_Merkel", "Germany", cache))
print(shortest_path("Angela_Merkel", "Germany", cache))
angela = get_page_id("Angela_Merkel", cache)
dijkstra = DijkstraHelper.from_db(cache)
dijkstra.dijkstra(angela)
dijkstra.write_back(cache)
print(dijkstra._nodes)

View File

@ -38,7 +38,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None):
(time.time(), i))
continue
# Be nice to Wikipedia.
time.sleep(0.3)
time.sleep(0.1)
return response.json()
raise NoMoreProxiesException("No more proxies left")