everything is working nicely
This commit is contained in:
parent
8598a9a64a
commit
d2ad5bb1d2
|
@ -19,13 +19,13 @@ def shortest_path(center, title, connection):
|
||||||
center_page = get_page_id(center, connection)
|
center_page = get_page_id(center, connection)
|
||||||
path = deque()
|
path = deque()
|
||||||
while(current_page != center_page):
|
while(current_page != center_page):
|
||||||
path.append(current_title)
|
path.append(current_page)
|
||||||
cursor.execute('''SELECT links.source
|
cursor.execute('''SELECT links.source
|
||||||
FROM links
|
FROM links
|
||||||
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
|
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
|
||||||
WHERE links.destination=:page
|
WHERE links.destination=%s
|
||||||
ORDER BY dijkstra_helper.value ASC
|
ORDER BY dijkstra_helper.value ASC
|
||||||
LIMIT 1''', {"page": current_page})
|
LIMIT 1''', (current_page,))
|
||||||
current_title = cursor.fetchone()[0]
|
current_page = cursor.fetchone()[0]
|
||||||
return list(reversed(path))
|
return list(reversed(path))
|
||||||
|
|
||||||
|
|
|
@ -5,13 +5,16 @@ from db_util import get_page_id
|
||||||
|
|
||||||
def prepare_dijkstra(connection):
|
def prepare_dijkstra(connection):
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
if(config["use_sqlite"]):
|
||||||
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
|
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
|
||||||
SELECT destination FROM links
|
SELECT rowid FROM pages
|
||||||
''')
|
''')
|
||||||
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
|
else:
|
||||||
SELECT source FROM links
|
cursor.execute('''INSERT IGNORE INTO dijkstra_helper(page)
|
||||||
|
SELECT page_id FROM pages
|
||||||
''')
|
''')
|
||||||
|
|
||||||
|
|
||||||
if(config["use_sqlite"]):
|
if(config["use_sqlite"]):
|
||||||
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
||||||
else:
|
else:
|
||||||
|
@ -20,24 +23,24 @@ def prepare_dijkstra(connection):
|
||||||
|
|
||||||
def dijkstra_one(page, value, connection):
|
def dijkstra_one(page, value, connection):
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
if(isinstance(title, tuple)):
|
if(isinstance(page, tuple)):
|
||||||
# Idk why this happens.
|
# Idk why this happens.
|
||||||
title = title[0]
|
title = title[0]
|
||||||
cursor.execute('''SELECT page
|
cursor.execute('''SELECT page
|
||||||
FROM dijkstra_helper
|
FROM dijkstra_helper
|
||||||
LEFT JOIN links ON links.destination=dijkstra_helper.page
|
LEFT JOIN links ON links.destination=dijkstra_helper.page
|
||||||
WHERE links.source=:page
|
WHERE links.source=%s
|
||||||
AND dijkstra_helper.value>:value''', {"pate": page, "value": value + 1})
|
AND dijkstra_helper.value>%s''', (page, value + 1))
|
||||||
# This is the list of nodes that have to be updated
|
# This is the list of nodes that have to be updated
|
||||||
result = cursor.fetchall()
|
result = cursor.fetchall()
|
||||||
|
|
||||||
cursor.execute('''UPDATE dijkstra_helper
|
cursor.execute('''UPDATE dijkstra_helper
|
||||||
SET value=:value
|
SET value=%s
|
||||||
WHERE name IN (
|
WHERE page IN (
|
||||||
SELECT destination
|
SELECT destination
|
||||||
FROM links
|
FROM links
|
||||||
WHERE source=:page)
|
WHERE source=%s)
|
||||||
AND dijkstra_helper.value>:value''', {"value": value + 1, "page": page})
|
AND dijkstra_helper.value>%s''', (value + 1, page, value + 1))
|
||||||
connection.commit()
|
connection.commit()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -55,7 +58,7 @@ def recursive_dijkstra(titles, value, connection):
|
||||||
def dijkstra(title, connection):
|
def dijkstra(title, connection):
|
||||||
page = get_page_id(title, connection)
|
page = get_page_id(title, connection)
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE name=%s", (page,))
|
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE page=%s", (page,))
|
||||||
|
|
||||||
todos = dijkstra_one(page, 1, connection)
|
todos = dijkstra_one(page, 1, connection)
|
||||||
recursive_dijkstra(todos, 2, connection)
|
recursive_dijkstra(todos, 2, connection)
|
||||||
|
|
|
@ -8,6 +8,9 @@ from receive import receive_links, receive_link_graph
|
||||||
from dijkstra import prepare_dijkstra, dijkstra
|
from dijkstra import prepare_dijkstra, dijkstra
|
||||||
from connectivity import shortest_path
|
from connectivity import shortest_path
|
||||||
|
|
||||||
|
from graph import DijkstraHelper
|
||||||
|
from db_util import get_page_id
|
||||||
|
|
||||||
cache = get_cache("./cache/", "Angela_Merkel")
|
cache = get_cache("./cache/", "Angela_Merkel")
|
||||||
receive_link_graph("Angela_Merkel", cache, 2)
|
receive_link_graph("Angela_Merkel", cache, 2)
|
||||||
|
|
||||||
|
@ -17,5 +20,13 @@ print(cursor.fetchall())
|
||||||
|
|
||||||
#prepare_dijkstra(cache)
|
#prepare_dijkstra(cache)
|
||||||
#dijkstra("Angela_Merkel", cache)
|
#dijkstra("Angela_Merkel", cache)
|
||||||
|
#
|
||||||
|
#print(shortest_path("Angela_Merkel", "Germany", cache))
|
||||||
|
|
||||||
print(shortest_path("Angela_Merkel", "Germany", cache))
|
|
||||||
|
angela = get_page_id("Angela_Merkel", cache)
|
||||||
|
|
||||||
|
dijkstra = DijkstraHelper.from_db(cache)
|
||||||
|
dijkstra.dijkstra(angela)
|
||||||
|
dijkstra.write_back(cache)
|
||||||
|
print(dijkstra._nodes)
|
||||||
|
|
|
@ -38,7 +38,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None):
|
||||||
(time.time(), i))
|
(time.time(), i))
|
||||||
continue
|
continue
|
||||||
# Be nice to Wikipedia.
|
# Be nice to Wikipedia.
|
||||||
time.sleep(0.3)
|
time.sleep(0.1)
|
||||||
return response.json()
|
return response.json()
|
||||||
raise NoMoreProxiesException("No more proxies left")
|
raise NoMoreProxiesException("No more proxies left")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user