added mysql support (in theory)

gol
Daniel Knüttel 2019-02-15 11:47:50 +01:00
parent 635a9f7739
commit c958e44632
5 changed files with 71 additions and 24 deletions

View File

@ -1,26 +1,48 @@
import os import os
import sqlite3 import sqlite3
from cfg import config
if(not config["use_sqlite"]):
import pymysql
from proxy import fetch_proxies from proxy import fetch_proxies
def get_cache(directory, name): def get_cache(directory, name):
cache_file = os.path.join(directory, "{}.sqlite".format(name)) if(config["use_sqlite"]):
if(not os.path.exists(cache_file)): cache_file = os.path.join(directory, "{}.sqlite".format(name))
with open(cache_file, "w") as fin: if(not os.path.exists(cache_file)):
pass with open(cache_file, "w") as fin:
pass
db = sqlite3.connect(cache_file)
cursor = db.cursor()
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)")
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)")
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)")
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)")
db.commit()
db = sqlite3.connect(cache_file) db = sqlite3.connect(cache_file)
fetch_proxies(db)
return db
db = pymysql.connect(
host=config["mysql_server"]
, user=config["mysql_user"]
, password=config["mysql_password"]
, db=config["mysql_database"]
, charset="utf8")
cursor = db.cursor() cursor = db.cursor()
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)") cursor.execute("CREATE TABLE IF NOT EXISTS proxies(proxy varchar(100), lasttime_could_not_be_used DECIMAL)")
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)") cursor.execute("CREATE TABLE IF NOT EXISTS links(source varchar(50), destination varchar(50))")
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)") cursor.execute("CREATE TABLE IF NOT EXISTS dijkstra_helper(name varchar(50) UNIQUE, value INT)")
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)") cursor.execute("CREATE TABLE IF NOT EXISTS failed_to_fetch(title varchar(50), depth INT)")
db.commit() db.commit()
db = sqlite3.connect(cache_file)
fetch_proxies(db)
return db return db
def clear_cache_data(connection): def clear_cache_data(connection):
cursor = connection.cursor() cursor = connection.cursor()

View File

@ -1,5 +1,7 @@
from collections import deque from collections import deque
from cfg import config
def can_reach(title, connection): def can_reach(title, connection):
cursor = connection.cursor() cursor = connection.cursor()
cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, )) cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, ))
@ -15,12 +17,20 @@ def shortest_path(center, title, connection):
path = deque() path = deque()
while(current_title != center): while(current_title != center):
path.append(current_title) path.append(current_title)
cursor.execute('''SELECT links.source if(config["use_sqlite"]):
FROM links cursor.execute('''SELECT links.source
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name FROM links
WHERE links.destination=:title LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
ORDER BY dijkstra_helper.value ASC WHERE links.destination=:title
LIMIT 1''', {"title": current_title}) ORDER BY dijkstra_helper.value ASC
LIMIT 1''', {"title": current_title})
else:
cursor.execute('''SELECT links.source
FROM links
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
WHERE links.destination=:title
SORT BY dijkstra_helper.value ASC
LIMIT 1''', {"title": current_title})
current_title = cursor.fetchone()[0] current_title = cursor.fetchone()[0]
return list(reversed(path)) return list(reversed(path))

View File

@ -1,3 +1,6 @@
from collections import deque
from cfg import config
def prepare_dijkstra(connection): def prepare_dijkstra(connection):
cursor = connection.cursor() cursor = connection.cursor()
@ -5,7 +8,10 @@ def prepare_dijkstra(connection):
SELECT destination FROM links SELECT destination FROM links
''') ''')
cursor.execute("UPDATE dijkstra_helper SET value=1e1000") if(config["use_sqlite"]):
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
else:
cursor.execute("UPDATE dijkstra_helper SET value=2147483647")
connection.commit() connection.commit()
def dijkstra_one(title, value, connection): def dijkstra_one(title, value, connection):
@ -35,9 +41,11 @@ def recursive_dijkstra(titles, value, connection):
if(not titles): if(not titles):
return return
todos = deque()
for title in titles: for title in titles:
todos = dijkstra_one(title, value, connection) todos.extend(dijkstra_one(title, value, connection))
recursive_dijkstra(todos, value + 1, connection)
recursive_dijkstra(todos, value + 1, connection)
def dijkstra(title, connection): def dijkstra(title, connection):

View File

@ -6,6 +6,7 @@ from cache import get_cache
from receive import receive_links, receive_link_graph from receive import receive_links, receive_link_graph
from dijkstra import prepare_dijkstra, dijkstra from dijkstra import prepare_dijkstra, dijkstra
from connectivity import shortest_path
cache = get_cache("./cache/", "Angela_Merkel") cache = get_cache("./cache/", "Angela_Merkel")
receive_link_graph("Angela_Merkel", cache, 2) receive_link_graph("Angela_Merkel", cache, 2)
@ -14,6 +15,7 @@ cursor = cache.cursor()
cursor.execute("SELECT COUNT(source) FROM links") cursor.execute("SELECT COUNT(source) FROM links")
print(cursor.fetchall()) print(cursor.fetchall())
prepare_dijkstra(cache) #prepare_dijkstra(cache)
dijkstra("Angela_Merkel", cache) #dijkstra("Angela_Merkel", cache)
print(shortest_path("Angela_Merkel", "Germany", cache))

View File

@ -8,6 +8,8 @@ import logging
import time import time
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from cfg import config
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class NoMoreProxiesException(Exception): class NoMoreProxiesException(Exception):
@ -16,7 +18,10 @@ class NoMoreProxiesException(Exception):
def get_data_with_proxy(url, conn_object, visit_first=None): def get_data_with_proxy(url, conn_object, visit_first=None):
cursor = conn_object.cursor() cursor = conn_object.cursor()
# Assume that table name is proxies # Assume that table name is proxies
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''') if(config["use_sqlite"]):
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
else:
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies SORT BY lasttime_could_not_be_used ASC''')
headers = {} headers = {}
for i, lasttime_could_not_be_used in cursor: for i, lasttime_could_not_be_used in cursor:
session = requests.Session() session = requests.Session()