diff --git a/exam/ex01/cache.py b/exam/ex01/cache.py index e691b83..fe08665 100644 --- a/exam/ex01/cache.py +++ b/exam/ex01/cache.py @@ -1,26 +1,48 @@ import os import sqlite3 +from cfg import config +if(not config["use_sqlite"]): + import pymysql + from proxy import fetch_proxies def get_cache(directory, name): - cache_file = os.path.join(directory, "{}.sqlite".format(name)) - if(not os.path.exists(cache_file)): - with open(cache_file, "w") as fin: - pass + if(config["use_sqlite"]): + cache_file = os.path.join(directory, "{}.sqlite".format(name)) + if(not os.path.exists(cache_file)): + with open(cache_file, "w") as fin: + pass + db = sqlite3.connect(cache_file) + + cursor = db.cursor() + + cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)") + cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)") + cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)") + cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)") + + db.commit() db = sqlite3.connect(cache_file) + fetch_proxies(db) + return db + db = pymysql.connect( + host=config["mysql_server"] + , user=config["mysql_user"] + , password=config["mysql_password"] + , db=config["mysql_database"] + , charset="utf8") - cursor = db.cursor() + cursor = db.cursor() - cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)") - cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)") - cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)") - cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)") + cursor.execute("CREATE TABLE IF NOT EXISTS proxies(proxy varchar(100), lasttime_could_not_be_used DECIMAL)") + cursor.execute("CREATE TABLE IF NOT EXISTS links(source varchar(50), destination varchar(50))") + cursor.execute("CREATE TABLE IF NOT EXISTS dijkstra_helper(name varchar(50) UNIQUE, value INT)") + cursor.execute("CREATE TABLE IF NOT EXISTS failed_to_fetch(title varchar(50), depth INT)") - db.commit() - db = sqlite3.connect(cache_file) - fetch_proxies(db) + db.commit() return db + def clear_cache_data(connection): cursor = connection.cursor() diff --git a/exam/ex01/connectivity.py b/exam/ex01/connectivity.py index c7af3fe..b80cb06 100644 --- a/exam/ex01/connectivity.py +++ b/exam/ex01/connectivity.py @@ -1,5 +1,7 @@ from collections import deque +from cfg import config + def can_reach(title, connection): cursor = connection.cursor() cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, )) @@ -15,12 +17,20 @@ def shortest_path(center, title, connection): path = deque() while(current_title != center): path.append(current_title) - cursor.execute('''SELECT links.source - FROM links - LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name - WHERE links.destination=:title - ORDER BY dijkstra_helper.value ASC - LIMIT 1''', {"title": current_title}) + if(config["use_sqlite"]): + cursor.execute('''SELECT links.source + FROM links + LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name + WHERE links.destination=:title + ORDER BY dijkstra_helper.value ASC + LIMIT 1''', {"title": current_title}) + else: + cursor.execute('''SELECT links.source + FROM links + LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name + WHERE links.destination=:title + SORT BY dijkstra_helper.value ASC + LIMIT 1''', {"title": current_title}) current_title = cursor.fetchone()[0] return list(reversed(path)) diff --git a/exam/ex01/dijkstra.py b/exam/ex01/dijkstra.py index 65e1e82..3fe2b2c 100644 --- a/exam/ex01/dijkstra.py +++ b/exam/ex01/dijkstra.py @@ -1,3 +1,6 @@ +from collections import deque + +from cfg import config def prepare_dijkstra(connection): cursor = connection.cursor() @@ -5,7 +8,10 @@ def prepare_dijkstra(connection): SELECT destination FROM links ''') - cursor.execute("UPDATE dijkstra_helper SET value=1e1000") + if(config["use_sqlite"]): + cursor.execute("UPDATE dijkstra_helper SET value=1e1000") + else: + cursor.execute("UPDATE dijkstra_helper SET value=2147483647") connection.commit() def dijkstra_one(title, value, connection): @@ -35,9 +41,11 @@ def recursive_dijkstra(titles, value, connection): if(not titles): return + todos = deque() for title in titles: - todos = dijkstra_one(title, value, connection) - recursive_dijkstra(todos, value + 1, connection) + todos.extend(dijkstra_one(title, value, connection)) + + recursive_dijkstra(todos, value + 1, connection) def dijkstra(title, connection): diff --git a/exam/ex01/main.py b/exam/ex01/main.py index e49f408..46695f2 100644 --- a/exam/ex01/main.py +++ b/exam/ex01/main.py @@ -6,6 +6,7 @@ from cache import get_cache from receive import receive_links, receive_link_graph from dijkstra import prepare_dijkstra, dijkstra +from connectivity import shortest_path cache = get_cache("./cache/", "Angela_Merkel") receive_link_graph("Angela_Merkel", cache, 2) @@ -14,6 +15,7 @@ cursor = cache.cursor() cursor.execute("SELECT COUNT(source) FROM links") print(cursor.fetchall()) -prepare_dijkstra(cache) -dijkstra("Angela_Merkel", cache) +#prepare_dijkstra(cache) +#dijkstra("Angela_Merkel", cache) +print(shortest_path("Angela_Merkel", "Germany", cache)) diff --git a/exam/ex01/proxy.py b/exam/ex01/proxy.py index c4c1a51..e35b8f1 100644 --- a/exam/ex01/proxy.py +++ b/exam/ex01/proxy.py @@ -8,6 +8,8 @@ import logging import time from bs4 import BeautifulSoup +from cfg import config + logger = logging.getLogger(__name__) class NoMoreProxiesException(Exception): @@ -16,7 +18,10 @@ class NoMoreProxiesException(Exception): def get_data_with_proxy(url, conn_object, visit_first=None): cursor = conn_object.cursor() # Assume that table name is proxies - cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''') + if(config["use_sqlite"]): + cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''') + else: + cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies SORT BY lasttime_could_not_be_used ASC''') headers = {} for i, lasttime_could_not_be_used in cursor: session = requests.Session()