basically a final touch
This commit is contained in:
parent
e7aeaf94fa
commit
fdd5fbd888
7
ex_35.py
7
ex_35.py
|
@ -16,6 +16,13 @@ class counter_of_calls(object):
|
||||||
def f(x):
|
def f(x):
|
||||||
return x + 6
|
return x + 6
|
||||||
|
|
||||||
|
@counter_of_calls
|
||||||
|
def g(x):
|
||||||
|
return x + 7
|
||||||
|
|
||||||
if( __name__ == "__main__"):
|
if( __name__ == "__main__"):
|
||||||
print(f(3))
|
print(f(3))
|
||||||
print(f(5))
|
print(f(5))
|
||||||
|
|
||||||
|
print(g(3))
|
||||||
|
print(g(5))
|
||||||
|
|
6
ex_46.py
6
ex_46.py
|
@ -1,3 +1,4 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
def bisec(f, a, b, eps, nmax):
|
def bisec(f, a, b, eps, nmax):
|
||||||
"""
|
"""
|
||||||
|
@ -52,7 +53,10 @@ if( __name__ == "__main__"):
|
||||||
f2 = lambda x: x**3
|
f2 = lambda x: x**3
|
||||||
f3 = lambda x: -x + 1
|
f3 = lambda x: -x + 1
|
||||||
f4 = lambda x: -x**3 + 1
|
f4 = lambda x: -x**3 + 1
|
||||||
|
f4 = lambda x: (x - 2)*np.exp(-x**2)
|
||||||
|
|
||||||
fs = [f1, f2, f3, f4]
|
fs = [f1, f2, f3, f4]
|
||||||
for f in fs:
|
for f in fs:
|
||||||
print(bisec(f, -12, 10, 0.001, 100))
|
print(bisec(f, -12, 10, 0.0000001, 100))
|
||||||
|
|
||||||
|
print(bisec(f4, 1.2, 2.4, 0.001, 100))
|
||||||
|
|
4
ex_47.py
4
ex_47.py
|
@ -4,10 +4,6 @@ import matplotlib.pyplot as plt
|
||||||
|
|
||||||
f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2)
|
f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2)
|
||||||
|
|
||||||
x0 = f(np.arange(-20, 20, 0.1)).min()
|
|
||||||
|
|
||||||
m = minimize(f, x0, method="CG")
|
|
||||||
print(m)
|
|
||||||
|
|
||||||
|
|
||||||
g = lambda x0: minimize(f, x0, method="CG").x
|
g = lambda x0: minimize(f, x0, method="CG").x
|
||||||
|
|
|
@ -24,7 +24,7 @@ def get_data_with_proxy(url, conn_object, visit_first=None):
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.proxies = { 'http': i}
|
session.proxies = { 'http': i}
|
||||||
try:
|
try:
|
||||||
response = session.get(url, headers=headers, timeout=3)
|
response = session.get(url, headers=headers, timeout=1)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if(isinstance(e, KeyboardInterrupt)):
|
if(isinstance(e, KeyboardInterrupt)):
|
||||||
raise e
|
raise e
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
import logging
|
import logging
|
||||||
|
from collections import deque
|
||||||
|
|
||||||
|
from cfg import config
|
||||||
from url import construct_url
|
from url import construct_url
|
||||||
from proxy import get_data_with_proxy, NoMoreProxiesException
|
from proxy import get_data_with_proxy, NoMoreProxiesException
|
||||||
from db_util import get_page_id
|
from db_util import get_page_id, get_page_title
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -17,12 +20,14 @@ def ignore_title(title):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def _receive_links(title, connection):
|
def _receive_links(page, connection):
|
||||||
|
title = get_page_title(page, connection)
|
||||||
url = construct_url(title)
|
url = construct_url(title)
|
||||||
|
|
||||||
source = get_page_id(title, connection)
|
|
||||||
|
|
||||||
result = get_data_with_proxy(url, connection)
|
result = get_data_with_proxy(url, connection)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# This is basically because we don't know the page ID.
|
# This is basically because we don't know the page ID.
|
||||||
for k, page_data in result["query"]["pages"].items():
|
for k, page_data in result["query"]["pages"].items():
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
@ -31,45 +36,53 @@ def _receive_links(title, connection):
|
||||||
# avoid 1-loops
|
# avoid 1-loops
|
||||||
if(destination_title == title):
|
if(destination_title == title):
|
||||||
continue
|
continue
|
||||||
if(ignore_title(title)):
|
if(ignore_title(destination_title)):
|
||||||
continue
|
continue
|
||||||
destination = get_page_id(destination_title, connection)
|
destination = get_page_id(destination_title, connection)
|
||||||
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination))
|
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination))
|
||||||
yield destination_title
|
yield destination
|
||||||
|
|
||||||
else:
|
else:
|
||||||
for destination in page_data["links"]:
|
for destination in page_data["links"]:
|
||||||
if(ignore_title(title)):
|
|
||||||
continue
|
|
||||||
destination_title = destination["title"].replace(" ", "_")
|
destination_title = destination["title"].replace(" ", "_")
|
||||||
|
if(ignore_title(destination_title)):
|
||||||
|
continue
|
||||||
destination = get_page_id(destination_title, connection)
|
destination = get_page_id(destination_title, connection)
|
||||||
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination))
|
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (page, destination))
|
||||||
yield destination_title
|
yield destination
|
||||||
connection.commit()
|
connection.commit()
|
||||||
|
|
||||||
def receive_links(title, connection):
|
def receive_links(title, connection):
|
||||||
return list(_receive_links(title, connection))
|
return list(_receive_links(title, connection))
|
||||||
|
|
||||||
|
|
||||||
def receive_link_graph(title, connection, depth, fetch_missing=True):
|
def receive_link_graph(title, connection, depth):
|
||||||
|
page = get_page_id(title, connection)
|
||||||
|
do_receive_link_graph(page, connection, depth, fetch_missing=True)
|
||||||
|
|
||||||
|
cursor = connection.cursor()
|
||||||
|
cursor.execute("SELECT COUNT(page) FROM failed_to_fetch")
|
||||||
|
if(cursor.fetchone()[0]):
|
||||||
|
do_receive_link_graph(page, connection, depth, fetch_missing=True)
|
||||||
|
|
||||||
|
def do_receive_link_graph(page, connection, depth, fetch_missing=False):
|
||||||
|
if(depth < 0):
|
||||||
|
# end of recursion
|
||||||
|
return
|
||||||
|
logger.info("do_receive_link_graph(%d, <connection>, %d)" % (page, depth))
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
|
|
||||||
# Fetch the missing links.
|
# Fetch the missing links.
|
||||||
if(fetch_missing):
|
if(fetch_missing):
|
||||||
delete_cursor = connection.cursor()
|
delete_cursor = connection.cursor()
|
||||||
cursor.execute('''SELECT pages.title, failed_to_fetch.depth, failed_to_fetch.page
|
cursor.execute('''SELECT failed_to_fetch.depth, failed_to_fetch.page
|
||||||
FROM failed_to_fetch
|
FROM failed_to_fetch
|
||||||
LEFT JOIN pages ON pages.page_id=failed_to_fetch.page''')
|
''')
|
||||||
for t, d, p in cursor:
|
for d, p in cursor:
|
||||||
receive_link_graph(t, connection, d, fetch_missing=False)
|
do_receive_link_graph(p, connection, d, fetch_missing=False)
|
||||||
delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,))
|
delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,))
|
||||||
|
|
||||||
|
|
||||||
if(depth < 0):
|
|
||||||
# end of recursion
|
|
||||||
return
|
|
||||||
|
|
||||||
page = get_page_id(title, connection)
|
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,))
|
cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,))
|
||||||
|
@ -77,19 +90,18 @@ def receive_link_graph(title, connection, depth, fetch_missing=True):
|
||||||
# we fetched that title already
|
# we fetched that title already
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info("fetching links for {}".format(title))
|
logger.info("fetching links for {}".format(page))
|
||||||
|
|
||||||
for link in _receive_links(title, connection):
|
for link in _receive_links(page, connection):
|
||||||
try:
|
try:
|
||||||
receive_link_graph(link, connection, depth - 1)
|
do_receive_link_graph(link, connection, depth - 1)
|
||||||
except NoMoreProxiesException as e:
|
except NoMoreProxiesException as e:
|
||||||
logger.exception("All proxies are blocked")
|
logger.exception("All proxies are blocked")
|
||||||
# Wikipedia blocked all our proxies.
|
# Wikipedia blocked all our proxies.
|
||||||
# Retry later, so we have to store our list that is still to fetch.
|
# Retry later, so we have to store our list that is still to fetch.
|
||||||
|
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
failed_page = get_page_id(link, connection)
|
cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (link, depth - 1))
|
||||||
cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (failed_page, depth - 1))
|
|
||||||
connection.commit()
|
connection.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user