added mysql support (in theory)
This commit is contained in:
parent
635a9f7739
commit
c958e44632
|
@ -1,27 +1,49 @@
|
||||||
import os
|
import os
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
|
from cfg import config
|
||||||
|
if(not config["use_sqlite"]):
|
||||||
|
import pymysql
|
||||||
|
|
||||||
from proxy import fetch_proxies
|
from proxy import fetch_proxies
|
||||||
|
|
||||||
def get_cache(directory, name):
|
def get_cache(directory, name):
|
||||||
cache_file = os.path.join(directory, "{}.sqlite".format(name))
|
if(config["use_sqlite"]):
|
||||||
if(not os.path.exists(cache_file)):
|
cache_file = os.path.join(directory, "{}.sqlite".format(name))
|
||||||
with open(cache_file, "w") as fin:
|
if(not os.path.exists(cache_file)):
|
||||||
pass
|
with open(cache_file, "w") as fin:
|
||||||
|
pass
|
||||||
|
db = sqlite3.connect(cache_file)
|
||||||
|
|
||||||
|
cursor = db.cursor()
|
||||||
|
|
||||||
|
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)")
|
||||||
|
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)")
|
||||||
|
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)")
|
||||||
|
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)")
|
||||||
|
|
||||||
|
db.commit()
|
||||||
db = sqlite3.connect(cache_file)
|
db = sqlite3.connect(cache_file)
|
||||||
|
fetch_proxies(db)
|
||||||
|
return db
|
||||||
|
db = pymysql.connect(
|
||||||
|
host=config["mysql_server"]
|
||||||
|
, user=config["mysql_user"]
|
||||||
|
, password=config["mysql_password"]
|
||||||
|
, db=config["mysql_database"]
|
||||||
|
, charset="utf8")
|
||||||
|
|
||||||
cursor = db.cursor()
|
cursor = db.cursor()
|
||||||
|
|
||||||
cursor.execute("CREATE TABLE proxies(proxy TEXT, lasttime_could_not_be_used DECIMAL)")
|
cursor.execute("CREATE TABLE IF NOT EXISTS proxies(proxy varchar(100), lasttime_could_not_be_used DECIMAL)")
|
||||||
cursor.execute("CREATE TABLE links(source TEXT, destination TEXT)")
|
cursor.execute("CREATE TABLE IF NOT EXISTS links(source varchar(50), destination varchar(50))")
|
||||||
cursor.execute("CREATE TABLE dijkstra_helper(name TEXT UNIQUE, value INT)")
|
cursor.execute("CREATE TABLE IF NOT EXISTS dijkstra_helper(name varchar(50) UNIQUE, value INT)")
|
||||||
cursor.execute("CREATE TABLE failed_to_fetch(title TEXT, depth INT)")
|
cursor.execute("CREATE TABLE IF NOT EXISTS failed_to_fetch(title varchar(50), depth INT)")
|
||||||
|
|
||||||
db.commit()
|
db.commit()
|
||||||
db = sqlite3.connect(cache_file)
|
|
||||||
fetch_proxies(db)
|
|
||||||
return db
|
return db
|
||||||
|
|
||||||
|
|
||||||
def clear_cache_data(connection):
|
def clear_cache_data(connection):
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("DELETE FROM links")
|
cursor.execute("DELETE FROM links")
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
|
||||||
|
from cfg import config
|
||||||
|
|
||||||
def can_reach(title, connection):
|
def can_reach(title, connection):
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, ))
|
cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=?", (title, ))
|
||||||
|
@ -15,12 +17,20 @@ def shortest_path(center, title, connection):
|
||||||
path = deque()
|
path = deque()
|
||||||
while(current_title != center):
|
while(current_title != center):
|
||||||
path.append(current_title)
|
path.append(current_title)
|
||||||
cursor.execute('''SELECT links.source
|
if(config["use_sqlite"]):
|
||||||
FROM links
|
cursor.execute('''SELECT links.source
|
||||||
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
FROM links
|
||||||
WHERE links.destination=:title
|
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
||||||
ORDER BY dijkstra_helper.value ASC
|
WHERE links.destination=:title
|
||||||
LIMIT 1''', {"title": current_title})
|
ORDER BY dijkstra_helper.value ASC
|
||||||
|
LIMIT 1''', {"title": current_title})
|
||||||
|
else:
|
||||||
|
cursor.execute('''SELECT links.source
|
||||||
|
FROM links
|
||||||
|
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.name
|
||||||
|
WHERE links.destination=:title
|
||||||
|
SORT BY dijkstra_helper.value ASC
|
||||||
|
LIMIT 1''', {"title": current_title})
|
||||||
current_title = cursor.fetchone()[0]
|
current_title = cursor.fetchone()[0]
|
||||||
return list(reversed(path))
|
return list(reversed(path))
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
from cfg import config
|
||||||
|
|
||||||
def prepare_dijkstra(connection):
|
def prepare_dijkstra(connection):
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
@ -5,7 +8,10 @@ def prepare_dijkstra(connection):
|
||||||
SELECT destination FROM links
|
SELECT destination FROM links
|
||||||
''')
|
''')
|
||||||
|
|
||||||
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
if(config["use_sqlite"]):
|
||||||
|
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
|
||||||
|
else:
|
||||||
|
cursor.execute("UPDATE dijkstra_helper SET value=2147483647")
|
||||||
connection.commit()
|
connection.commit()
|
||||||
|
|
||||||
def dijkstra_one(title, value, connection):
|
def dijkstra_one(title, value, connection):
|
||||||
|
@ -35,9 +41,11 @@ def recursive_dijkstra(titles, value, connection):
|
||||||
if(not titles):
|
if(not titles):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
todos = deque()
|
||||||
for title in titles:
|
for title in titles:
|
||||||
todos = dijkstra_one(title, value, connection)
|
todos.extend(dijkstra_one(title, value, connection))
|
||||||
recursive_dijkstra(todos, value + 1, connection)
|
|
||||||
|
recursive_dijkstra(todos, value + 1, connection)
|
||||||
|
|
||||||
|
|
||||||
def dijkstra(title, connection):
|
def dijkstra(title, connection):
|
||||||
|
|
|
@ -6,6 +6,7 @@ from cache import get_cache
|
||||||
from receive import receive_links, receive_link_graph
|
from receive import receive_links, receive_link_graph
|
||||||
|
|
||||||
from dijkstra import prepare_dijkstra, dijkstra
|
from dijkstra import prepare_dijkstra, dijkstra
|
||||||
|
from connectivity import shortest_path
|
||||||
|
|
||||||
cache = get_cache("./cache/", "Angela_Merkel")
|
cache = get_cache("./cache/", "Angela_Merkel")
|
||||||
receive_link_graph("Angela_Merkel", cache, 2)
|
receive_link_graph("Angela_Merkel", cache, 2)
|
||||||
|
@ -14,6 +15,7 @@ cursor = cache.cursor()
|
||||||
cursor.execute("SELECT COUNT(source) FROM links")
|
cursor.execute("SELECT COUNT(source) FROM links")
|
||||||
print(cursor.fetchall())
|
print(cursor.fetchall())
|
||||||
|
|
||||||
prepare_dijkstra(cache)
|
#prepare_dijkstra(cache)
|
||||||
dijkstra("Angela_Merkel", cache)
|
#dijkstra("Angela_Merkel", cache)
|
||||||
|
|
||||||
|
print(shortest_path("Angela_Merkel", "Germany", cache))
|
||||||
|
|
|
@ -8,6 +8,8 @@ import logging
|
||||||
import time
|
import time
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from cfg import config
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class NoMoreProxiesException(Exception):
|
class NoMoreProxiesException(Exception):
|
||||||
|
@ -16,7 +18,10 @@ class NoMoreProxiesException(Exception):
|
||||||
def get_data_with_proxy(url, conn_object, visit_first=None):
|
def get_data_with_proxy(url, conn_object, visit_first=None):
|
||||||
cursor = conn_object.cursor()
|
cursor = conn_object.cursor()
|
||||||
# Assume that table name is proxies
|
# Assume that table name is proxies
|
||||||
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
|
if(config["use_sqlite"]):
|
||||||
|
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
|
||||||
|
else:
|
||||||
|
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies SORT BY lasttime_could_not_be_used ASC''')
|
||||||
headers = {}
|
headers = {}
|
||||||
for i, lasttime_could_not_be_used in cursor:
|
for i, lasttime_could_not_be_used in cursor:
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user