Compare commits

16 Commits
gol ... master

31 changed files with 13364 additions and 102 deletions

View File

@@ -16,6 +16,13 @@ class counter_of_calls(object):
def f(x):
return x + 6
@counter_of_calls
def g(x):
return x + 7
if( __name__ == "__main__"):
print(f(3))
print(f(5))
print(g(3))
print(g(5))

View File

@@ -1,3 +1,4 @@
import numpy as np
def bisec(f, a, b, eps, nmax):
"""
@@ -52,7 +53,10 @@ if( __name__ == "__main__"):
f2 = lambda x: x**3
f3 = lambda x: -x + 1
f4 = lambda x: -x**3 + 1
f4 = lambda x: (x - 2)*np.exp(-x**2)
fs = [f1, f2, f3, f4]
for f in fs:
print(bisec(f, -12, 10, 0.001, 100))
print(bisec(f, -12, 10, 0.0000001, 100))
print(bisec(f4, 1.2, 2.4, 0.001, 100))

View File

@@ -4,10 +4,6 @@ import matplotlib.pyplot as plt
f = lambda x: x*np.sin(7*x)*np.exp(-(x - 2)**2)
x0 = f(np.arange(-20, 20, 0.1)).min()
m = minimize(f, x0, method="CG")
print(m)
g = lambda x0: minimize(f, x0, method="CG").x

47
exam/ex01/README.rst Normal file
View File

@@ -0,0 +1,47 @@
Wikipedia Link Graph Analyzer
*****************************
.. contents::
Configuration
=============
Configuration is done in the file ``cfg.py``. There one can
specify whether the system should use a sqlite or a mysql
backend. Using the sqlite backend is faster for fetching the
data because sqlite omits implicit keys. However when one
wants to analyze the data using SQL instead of the pure
python implementation mysql is faster.
It is recommended to use sqlite for fetching the data, then
transferring it to a mysql database and use this database
for analyzing.
The main options in ``cfg.py`` are whether to use mysql or
sqlite and options for those systems.
Invocation
==========
Before invocating the program one should make sure that the
`configuration`_ is correct, in particular whether the cache
directory and cache name are set correctly for sqlite and
the mysql connection information is correct.
Then one must edit the name of the article to analyze around
and the depth to receive the links. After this is done the
link graph can be received (using ``python3 main.py``).
One can specify the language to use using a language
abbreviation in ``receive_link_graph``.
It might be necessary to run this part several times if the
program was unable to fetch all links. One can check for
unreceived data by executing ``SELECT COUNT(*) FROM
failed_to_fetch``. The result should be 0.
Then the script uses Dijkstra's Algorithm in width-first
mode to analyze the graph. By default this is done
in-memory, it is however possible to do it with SQL. Using
SQL is recommended only, if the data exceeds the RAM, as it
is way slower.

View File

@@ -7,8 +7,10 @@ if(not config["use_sqlite"]):
from proxy import fetch_proxies
def get_cache(directory, name):
def get_cache():
if(config["use_sqlite"]):
directory = config["sqlite_cache_directory"]
name = config["sqlite_cache_name"]
cache_file = os.path.join(directory, "{}.sqlite".format(name))
if(not os.path.exists(cache_file)):
with open(cache_file, "w") as fin:

BIN
exam/ex01/cache/Angela_Merkel.sqlite vendored Normal file

Binary file not shown.

View File

@@ -1,8 +1,15 @@
config = {
"use_sqlite": False
"use_sqlite": True
, "mysql_server": "172.17.0.2"
, "mysql_user": "wikipedia"
, "mysql_password": "wikipediastuff"
, "mysql_database": "wikipedia_link_db"
, "sqlite_cache_directory": "./cache/"
, "sqlite_cache_name": "Angela_Merkel"
}
if(config["use_sqlite"]):
config["sql_method"] = "sqlite"
else:
config["sql_method"] = "mysql"

View File

@@ -2,11 +2,12 @@ from collections import deque
from cfg import config
from db_util import get_page_id
import sql
def can_reach(title, connection):
page = get_page_id(title, connection)
cursor = connection.cursor()
cursor.execute("SELECT COUNT(destination) FROM links WHERE destination=%s", (page, ))
cursor.execute(sql.statements["count_links_to"], (page, ))
count = cursor.fetchone()[0]
return count > 0
@@ -20,12 +21,7 @@ def shortest_path(center, title, connection):
path = deque()
while(current_page != center_page):
path.append(current_page)
cursor.execute('''SELECT links.source
FROM links
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
WHERE links.destination=%s
ORDER BY dijkstra_helper.value ASC
LIMIT 1''', (current_page,))
cursor.execute(sql.statements["dijkstra_backtrack_one"], (current_page,))
current_page = cursor.fetchone()[0]
return list(reversed(path))

View File

@@ -1,11 +1,9 @@
from cfg import config
import sql
def _get_page_id(title, connection):
cursor = connection.cursor()
if(config["use_sqlite"]):
cursor.execute("SELECT rowid FROM pages WHERE title=%s", (title,))
else:
cursor.execute("SELECT page_id FROM pages WHERE title=%s", (title,))
cursor.execute(sql.statements["get_page_id"], (title,))
return cursor.fetchone()
def get_page_id(title, connection):
@@ -15,14 +13,11 @@ def get_page_id(title, connection):
return result[0]
cursor = connection.cursor()
cursor.execute("INSERT INTO pages(title) VALUES(%s)", (title,))
cursor.execute(sql.statements["insert_page"], (title,))
return _get_page_id(title, connection)[0]
def get_page_title(page_id, connection):
cursor = connection.cursor()
if(config["use_sqlite"]):
cursor.execute("SELECT title FROM pages WHERE rowid=%s", (page_id,))
else:
cursor.execute("SELECT title FROM pages WHERE page_id=%s", (page_id,))
cursor.execute(sql.statements["get_page_title"], (page_id,))
return cursor.fetchone()[0]

View File

@@ -2,23 +2,13 @@ from collections import deque
from cfg import config
from db_util import get_page_id
import sql
def prepare_dijkstra(connection):
cursor = connection.cursor()
if(config["use_sqlite"]):
cursor.execute('''INSERT OR IGNORE INTO dijkstra_helper(page)
SELECT rowid FROM pages
''')
else:
cursor.execute('''INSERT IGNORE INTO dijkstra_helper(page)
SELECT page_id FROM pages
''')
cursor.execute(sql.statements["dijkstra_insert_pages"])
if(config["use_sqlite"]):
cursor.execute("UPDATE dijkstra_helper SET value=1e1000")
else:
cursor.execute("UPDATE dijkstra_helper SET value=2147483647")
cursor.execute(sql.statements["dijkstra_set_infinity"])
connection.commit()
def dijkstra_one(page, value, connection):
@@ -26,21 +16,11 @@ def dijkstra_one(page, value, connection):
if(isinstance(page, tuple)):
# Idk why this happens.
title = title[0]
cursor.execute('''SELECT page
FROM dijkstra_helper
LEFT JOIN links ON links.destination=dijkstra_helper.page
WHERE links.source=%s
AND dijkstra_helper.value>%s''', (page, value + 1))
cursor.execute(sql.statements["dijkstra_get_to_update"], (page, value + 1))
# This is the list of nodes that have to be updated
result = cursor.fetchall()
cursor.execute('''UPDATE dijkstra_helper
SET value=%s
WHERE page IN (
SELECT destination
FROM links
WHERE source=%s)
AND dijkstra_helper.value>%s''', (value + 1, page, value + 1))
cursor.execute(sql.statements["dijkstra_update"], (value + 1, page, value + 1))
connection.commit()
return result
@@ -58,7 +38,7 @@ def recursive_dijkstra(titles, value, connection):
def dijkstra(title, connection):
page = get_page_id(title, connection)
cursor = connection.cursor()
cursor.execute("UPDATE dijkstra_helper SET value=0 WHERE page=%s", (page,))
cursor.execute(sql.statements["dijkstra_set_root"], (page,))
todos = dijkstra_one(page, 1, connection)
recursive_dijkstra(todos, 2, connection)

View File

@@ -2,6 +2,7 @@ from collections import deque, defaultdict
import logging
from cfg import config
import sql
logger = logging.getLogger(__name__)
@@ -15,18 +16,19 @@ class DijkstraHelper(object):
@classmethod
def from_db(cls, connection):
cursor = connection.cursor()
cursor.execute("SELECT page_id FROM pages")
cursor.execute(sql.statements["get_all_page_ids"])
nodes = [n[0] for n in cursor.fetchall()]
connections = defaultdict(list)
cursor.execute("SELECT source, destination FROM links")
cursor.execute(sql.statements["get_links"])
for source, destination in cursor:
connections[source].append(destination)
return cls(nodes, connections)
def dijkstra(self, root):
self.recursive_dijkstra([root], 0)
self._nodes[root] = 0
self.recursive_dijkstra([root], 1)
def recursive_dijkstra(self, todos, depth):
if(not todos):
@@ -47,8 +49,14 @@ class DijkstraHelper(object):
def write_back(self, connection):
cursor = connection.cursor()
cursor.execute("DELETE FROM dijkstra_helper")
cursor.executemany("INSERT INTO dijkstra_helper(page, value) VALUES(%s, %s)", list(self._nodes.items()))
cursor.execute(sql.statements["delete_dijkstra"])
def sqlize(v):
if(config["use_sqlite"]):
return v
if(v == float("inf")):
return 2147483647
cursor.executemany(sql.statements["insert_dijkstra_values"], [(k, sqlize(v)) for k,v in self._nodes.items()])
connection.commit()

View File

@@ -9,10 +9,10 @@ from dijkstra import prepare_dijkstra, dijkstra
from connectivity import shortest_path
from graph import DijkstraHelper
from db_util import get_page_id
from db_util import get_page_id, get_page_title
cache = get_cache("./cache/", "Angela_Merkel")
receive_link_graph("Angela_Merkel", cache, 2)
cache = get_cache()
receive_link_graph("Angela_Merkel", cache, 2, lang="en")
cursor = cache.cursor()
cursor.execute("SELECT COUNT(source) FROM links")
@@ -21,7 +21,6 @@ print(cursor.fetchall())
#prepare_dijkstra(cache)
#dijkstra("Angela_Merkel", cache)
#
#print(shortest_path("Angela_Merkel", "Germany", cache))
angela = get_page_id("Angela_Merkel", cache)
@@ -29,4 +28,7 @@ angela = get_page_id("Angela_Merkel", cache)
dijkstra = DijkstraHelper.from_db(cache)
dijkstra.dijkstra(angela)
dijkstra.write_back(cache)
print(dijkstra._nodes)
#print({k:v for k,v in dijkstra._nodes.items() if v != float("inf")})
print([get_page_title(id_, cache) for id_ in shortest_path("Angela_Merkel", "Germany", cache)])
print([get_page_title(id_, cache) for id_ in shortest_path("Angela_Merkel", "2012_Nobel_Peace_Prize", cache)])

View File

@@ -9,6 +9,7 @@ import time
from bs4 import BeautifulSoup
from cfg import config
import sql
logger = logging.getLogger(__name__)
@@ -17,25 +18,24 @@ class NoMoreProxiesException(Exception):
def get_data_with_proxy(url, conn_object, visit_first=None):
cursor = conn_object.cursor()
update_cursor = conn_object.cursor()
# Assume that table name is proxies
cursor.execute('''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC''')
cursor.execute(sql.statements["get_proxies"])
headers = {}
for i, lasttime_could_not_be_used in cursor:
session = requests.Session()
session.proxies = { 'http': i}
try:
response = session.get(url, headers=headers, timeout=3)
response = session.get(url, headers=headers, timeout=1)
except Exception as e:
if(isinstance(e, KeyboardInterrupt)):
raise e
# If proxy is invalid/inactive, update lasttime could not be used and go next proxy
cursor.execute('''UPDATE proxies SET lasttime_could_not_be_used = %s WHERE proxy = %s ''',
(time.time(), i))
update_cursor.execute(sql.statements["update_proxies"], (time.time(), i))
continue
# If text is empty, update lasttime could not be used and go next proxy
if not response.text or 399 < response.status_code < 600:
cursor.execute('''UPDATE proxies SET lasttime_could_not_be_used = %s WHERE proxy = %s ''',
(time.time(), i))
update_cursor.execute(sql.statements["update_proxies"], (time.time(), i))
continue
# Be nice to Wikipedia.
time.sleep(0.1)
@@ -63,7 +63,7 @@ def fetch_proxies(connection):
url = "http://{}:{}".format(ip_addr, port)
if(not proxy_is_in_db(url, connection)):
cursor.execute("INSERT INTO proxies VALUES(%s, 0)", (url,))
cursor.execute(sql.statements["insert_proxy"], (url,))
cnt += 1
logging.info("added {} new proxies".format(cnt))
connection.commit()
@@ -92,7 +92,7 @@ def _get_rows(soup):
def proxy_is_in_db(url, connection):
cursor = connection.cursor()
cursor.execute("SELECT proxy FROM proxies WHERE proxy = %s", (url,))
cursor.execute(sql.statements["proxy_in_db"], (url,))
return bool(cursor.fetchall())

View File

@@ -1,7 +1,11 @@
import logging
from collections import deque
from cfg import config
from url import construct_url
from proxy import get_data_with_proxy, NoMoreProxiesException
from db_util import get_page_id
from db_util import get_page_id, get_page_title
import sql
logger = logging.getLogger(__name__)
@@ -17,12 +21,14 @@ def ignore_title(title):
return True
return False
def _receive_links(title, connection):
url = construct_url(title)
source = get_page_id(title, connection)
def _receive_links(page, connection, lang="en"):
title = get_page_title(page, connection)
url = construct_url(title, lang=lang)
result = get_data_with_proxy(url, connection)
# This is basically because we don't know the page ID.
for k, page_data in result["query"]["pages"].items():
cursor = connection.cursor()
@@ -31,65 +37,70 @@ def _receive_links(title, connection):
# avoid 1-loops
if(destination_title == title):
continue
if(ignore_title(title)):
if(ignore_title(destination_title)):
continue
destination = get_page_id(destination_title, connection)
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination))
yield destination_title
cursor.execute(sql.statements["insert_link"], (page, destination))
yield destination
else:
for destination in page_data["links"]:
if(ignore_title(title)):
continue
destination_title = destination["title"].replace(" ", "_")
if(ignore_title(destination_title)):
continue
destination = get_page_id(destination_title, connection)
cursor.execute("INSERT INTO links(source, destination) VALUES(%s, %s)", (source, destination))
yield destination_title
cursor.execute(sql.statements["insert_link"], (page, destination))
yield destination
connection.commit()
def receive_links(title, connection):
return list(_receive_links(title, connection))
def receive_links(title, connection, lang="en"):
return list(_receive_links(title, connection, lang=lang))
def receive_link_graph(title, connection, depth, fetch_missing=True):
def receive_link_graph(title, connection, depth, lang="en"):
page = get_page_id(title, connection)
do_receive_link_graph(page, connection, depth, fetch_missing=True, lang=lang)
cursor = connection.cursor()
cursor.execute(sql.statements["count_failed_to_fetch"])
if(cursor.fetchone()[0]):
do_receive_link_graph(page, connection, depth, fetch_missing=True, lang=lang)
def do_receive_link_graph(page, connection, depth, fetch_missing=False, lang="en"):
if(depth < 0):
# end of recursion
return
logger.info("do_receive_link_graph(%d, <connection>, %d)" % (page, depth))
cursor = connection.cursor()
# Fetch the missing links.
if(fetch_missing):
delete_cursor = connection.cursor()
cursor.execute('''SELECT pages.title, failed_to_fetch.depth, failed_to_fetch.page
FROM failed_to_fetch
LEFT JOIN pages ON pages.page_id=failed_to_fetch.page''')
for t, d, p in cursor:
receive_link_graph(t, connection, d, fetch_missing=False)
delete_cursor.execute("DELETE FROM failed_to_fetch WHERE page=%s", (p,))
cursor.execute(sql.statements["get_failed_to_fetch"])
for d, p in cursor:
do_receive_link_graph(p, connection, d, fetch_missing=False, lang=lang)
delete_cursor.execute(sql.statements["delete_failed_to_fetch"], (p,))
if(depth < 0):
# end of recursion
return
page = get_page_id(title, connection)
cursor = connection.cursor()
cursor.execute("SELECT COUNT(source) FROM links WHERE source=%s", (page,))
cursor.execute(sql.statements["count_links_from"], (page,))
if(cursor.fetchone()[0] != 0):
# we fetched that title already
return
logger.info("fetching links for {}".format(title))
logger.info("fetching links for {}".format(page))
for link in _receive_links(title, connection):
for link in _receive_links(page, connection):
try:
receive_link_graph(link, connection, depth - 1)
do_receive_link_graph(link, connection, depth - 1, lang=lang)
except NoMoreProxiesException as e:
logger.exception("All proxies are blocked")
# Wikipedia blocked all our proxies.
# Retry later, so we have to store our list that is still to fetch.
cursor = connection.cursor()
failed_page = get_page_id(link, connection)
cursor.execute("INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)", (failed_page, depth - 1))
cursor.execute(sql.statements["insert_failed_to_fetch"], (link, depth - 1))
connection.commit()

99
exam/ex01/sql.py Normal file
View File

@@ -0,0 +1,99 @@
from cfg import config
sql_statements = {
"update_proxies": {"sqlite": '''UPDATE proxies SET lasttime_could_not_be_used = ? WHERE proxy = ? '''
, "mysql": '''UPDATE proxies SET lasttime_could_not_be_used = %s WHERE proxy = %s '''}
, "get_proxies": {"sqlite": '''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC'''
, "mysql": '''SELECT proxy, lasttime_could_not_be_used FROM proxies ORDER BY lasttime_could_not_be_used ASC'''}
, "insert_proxy": {"sqlite": "INSERT INTO proxies VALUES(?, 0)"
, "mysql": "INSERT INTO proxies VALUES(%s, 0)"}
, "proxy_in_db": {"sqlite": "SELECT proxy FROM proxies WHERE proxy = ?"
, "mysql": "SELECT proxy FROM proxies WHERE proxy = %s"}
, "insert_link": {"sqlite": "INSERT INTO links(source, destination) VALUES(?, ?)"
, "mysql": "INSERT INTO links(source, destination) VALUES(%s, %s)"}
, "count_failed_to_fetch": {"sqlite": "SELECT COUNT(page) FROM failed_to_fetch"
, "mysql": "SELECT COUNT(page) FROM failed_to_fetch"}
, "get_failed_to_fetch": {"sqlite": '''SELECT failed_to_fetch.depth, failed_to_fetch.page
FROM failed_to_fetch
'''
, "mysql": '''SELECT failed_to_fetch.depth, failed_to_fetch.page
FROM failed_to_fetch
'''}
, "delete_failed_to_fetch": {"sqlite": "DELETE FROM failed_to_fetch WHERE page=?"
, "mysql": "DELETE FROM failed_to_fetch WHERE page=%s"}
, "count_links_from": {"sqlite": "SELECT COUNT(source) FROM links WHERE source=?"
, "mysql": "SELECT COUNT(source) FROM links WHERE source=%s"}
, "insert_failed_to_fetch": {"sqlite": "INSERT INTO failed_to_fetch(page, depth) VALUES(?, ?)"
, "mysql": "INSERT INTO failed_to_fetch(page, depth) VALUES(%s, %s)"}
, "count_links_to": {"sqlite": "SELECT COUNT(destination) FROM links WHERE destination=?"
, "mysql": "SELECT COUNT(destination) FROM links WHERE destination=%s"}
, "dijkstra_backtrack_one": {"sqlite": '''SELECT links.source
FROM links
LEFT JOIN dijkstra_helper ON links.source=dijkstra_helper.page
WHERE links.destination=?
ORDER BY dijkstra_helper.value ASC
LIMIT 1'''
, "mysql": '''SELECT links.source
FROM links
LEFT JOIN dijkstra_helper ON links.destination=dijkstra_helper.page
WHERE links.destination=%s
ORDER BY dijkstra_helper.value ASC
LIMIT 1'''}
, "get_page_id": {"sqlite": "SELECT rowid FROM pages WHERE title=?"
, "mysql": "SELECT page_id FROM pages WHERE title=%s"}
, "insert_page": {"sqlite": "INSERT INTO pages(title) VALUES(?)"
, "mysql": "INSERT INTO pages(title) VALUES(%s)"}
, "get_page_title": {"sqlite": "SELECT title FROM pages WHERE rowid=?"
, "mysql": "SELECT title FROM pages WHERE page_id=%s"}
, "dijkstra_insert_pages": {"sqlite": '''INSERT OR IGNORE INTO dijkstra_helper(page)
SELECT rowid FROM pages
'''
, "mysql": '''INSERT IGNORE INTO dijkstra_helper(page)
SELECT page_id FROM pages
'''}
, "dijkstra_set_infinity": {"sqlite": "UPDATE dijkstra_helper SET value=1e1000"
, "mysql": "UPDATE dijkstra_helper SET value=2147483647"}
, "dijkstra_get_to_update": {"sqlite": '''SELECT page
FROM dijkstra_helper
LEFT JOIN links ON links.destination=dijkstra_helper.page
WHERE links.source=?
AND dijkstra_helper.value>?'''
, "mysql": '''SELECT page
FROM dijkstra_helper
LEFT JOIN links ON links.destination=dijkstra_helper.page
WHERE links.source=%s
AND dijkstra_helper.value>%s'''}
, "dijkstra_update": {"sqlite": '''UPDATE dijkstra_helper
SET value=?
WHERE page IN (
SELECT destination
FROM links
WHERE source=?)
AND dijkstra_helper.value>?'''
, "mysql": '''UPDATE dijkstra_helper
SET value=%s
WHERE page IN (
SELECT destination
FROM links
WHERE source=%s)
AND dijkstra_helper.value>%s'''}
, "dijkstra_set_root": {"sqlite": "UPDATE dijkstra_helper SET value=0 WHERE page=?"
, "mysql": "UPDATE dijkstra_helper SET value=0 WHERE page=%s"}
, "get_all_page_ids": {"sqlite": "SELECT rowid FROM pages"
, "mysql": "SELECT page_id FROM pages"}
, "get_links": {"sqlite": "SELECT source, destination FROM links"
, "mysql": "SELECT source, destination FROM links"}
, "delete_dijkstra": {"sqlite": "DELETE FROM dijkstra_helper"
, "mysql": "DELETE FROM dijkstra_helper"}
, "insert_dijkstra_values": {"sqlite": "INSERT INTO dijkstra_helper(page, value) VALUES(?, ?)"
, "mysql": "INSERT INTO dijkstra_helper(page, value) VALUES(%s, %s)"}
}
statements = {name: statement[config["sql_method"]] for name, statement in sql_statements.items()}

View File

@@ -0,0 +1,36 @@
class FiniteStateMachine(object):
def __init__(self, start: int, valid: list, default: int, transitions: dict):
self._start = start
self._valid = valid
self._transitions = dict()
for state, trans in transitions.items():
self._transitions[state] = dict()
for words, target in trans.items():
for word in words:
self._transitions[state][word] = target
self._default = default
self._state = start
def reset(self):
self._state = self._start
def make_transition(self, word):
if(not self._state in self._transitions):
self._state = self._default
if(not word in self._transitions[self._state]):
self._state = self._default
return
self._state = self._transitions[self._state][word]
def check(self, sequence):
for word in sequence:
self.make_transition(word)
is_valid = self._state in self._valid
self.reset()
return is_valid

16
exam/ex06/main.py Normal file
View File

@@ -0,0 +1,16 @@
from io import StringIO
from parser import Parser
from tokenio import TokenStream
from tokens import NumberTokenParser
texts = ["one plus one"
, "one plus two"
, "thirtytwo plus eleven"
, "four times four"
, "(eight plus eleven) times two"
, "twohundred through eleven"]
for text in texts:
print(text, "=", Parser(TokenStream(StringIO(text))).parse())

91
exam/ex06/parser.py Normal file
View File

@@ -0,0 +1,91 @@
from collections import deque
from tokens import NumberTokenParser, OperatorTokenParser
class ParsingException(Exception):
pass
class Parser(object):
def __init__(self, token_stream):
self._token_stream = token_stream
self._stack = deque()
self._current_list = deque()
def parse(self):
state = 0
while True:
token = self._token_stream.get_token()
if(token == "("):
if(state == 1):
raise ParsingException(
"expected operator, not parenthesis: {} (near '{}')".format(
self._token_stream._offset
, token))
self._stack.append(self._current_list)
continue
if(NumberTokenParser.can_parse(token)):
if(state == 1):
raise ParsingException(
"expected operator, not number: {} (near '{}')".format(
self._token_stream._offset
, token))
self._current_list.append(NumberTokenParser(token).parse())
state = 1
continue
if(OperatorTokenParser.can_parse(token)):
if(state != 1):
raise ParsingException(
"expected number or parenthesis, not operator: {} (near '{}')".format(
self._token_stream._offset
, token))
self._current_list.append(OperatorTokenParser(token).parse())
state = 0
continue
if(token == ")"):
#if(state == 1):
# raise ParsingException(
# "expected operator, not parenthesis: {} (near '{}')".format(
# self._token_stream._offset
# , token))
state = 1
result = self.execute_branch(self._current_list)
self._current_list = self._stack.pop()
continue
if(not token):
if(self._stack):
raise ParsingException("unexpected EOF while parsing")
return self.execute_branch(self._current_list)
raise ParsingException("unknown token: {} (near '{}')".format(self._token_stream._offset, token))
return self.execute_branch(self._current_list)
def execute_branch(self, branch):
result = None
current_operator = None
for element in branch:
if(result is None):
result = element
continue
if(not isinstance(element, (float, int, complex))):
# Operator
current_operator = element
continue
if(current_operator):
result = current_operator(result, element)
current_operator = None
return result

56
exam/ex06/tokenio.py Normal file
View File

@@ -0,0 +1,56 @@
from collections import deque
base_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
end_of_token_chars = "() \t"
whitespace_chars = " \t"
class UnexpectedCharacterException(Exception):
def __init__(self, msg, offset, char, *args):
Exception.__init__(self, *args)
self._msg = msg
self._offset = offset
self._char = char
class TokenStream(object):
def __init__(self, file_):
self._file = file_
self._file.seek(0, 0)
self._offset = 0
def _getc(self):
c = self._file.read(1)
if(c):
self._offset += 1
return c
def _ungetc(self):
self._file.seek(self._offset - 1, 0)
self._offset -= 1
def get_token(self):
result = deque()
while True:
c = self._getc()
if(not c):
# EOF.
break
if(c in base_chars):
result.append(c)
continue
if(c in end_of_token_chars):
if(not result):
# We are not inside a token.
if(c in whitespace_chars):
# Some whitespace. Ignore it.
continue
# A parenthesis.
return c
# End of token.
self._ungetc()
break
raise UnexpectedCharacterException("Unexpected character while tokenizing", self._offset, c)
return "".join(result)

155
exam/ex06/tokens.py Normal file
View File

@@ -0,0 +1,155 @@
from collections import deque
from abc import ABCMeta, abstractmethod
from finite_state_machine import FiniteStateMachine
BASE_NUMBER_TOKENS = {"one": 1
, "two": 2
, "three": 3
, "four": 4
, "five": 5
, "six": 6
, "seven": 7
, "eight": 8
, "nine": 9}
DECI_NUMBER_TOKENS = {"twenty": 20
, "thirty": 30
, "fourty": 40
, "fifty": 50
, "sixty": 60
, "secenty": 70
, "eigthy": 80
, "ninety": 90}
TEEN_NUMBER_TOKENS = {"ten": 10
, "eleven": 11
, "twelve": 12
, "thirteen": 13
, "fourteen": 14
, "fifteen": 15
, "sixteen": 16
, "seventeen": 17
, "eighteen": 18
, "nineteen": 19}
HUNDRED_NUMBER_TOKENS = {"hundred": 100}
ZERO_NUMBER_TOKENS = {"zero": 0
, "null": 0}
OPERATOR_TOKENS = { "plus": lambda x,y: x + y
, "minus": lambda x,y: x - y
, "times": lambda x,y: x * y
, "through": lambda x,y: x / y}
transitions = {
0: { tuple(ZERO_NUMBER_TOKENS) + tuple(TEEN_NUMBER_TOKENS): 1
, tuple(DECI_NUMBER_TOKENS): 2
, tuple(BASE_NUMBER_TOKENS): 3}
, 2: {tuple(BASE_NUMBER_TOKENS): 1}
, 3: {tuple(HUNDRED_NUMBER_TOKENS): 4}
, 4: {tuple(DECI_NUMBER_TOKENS): 2}
}
valid_states = [1, 2, 3, 4]
default_transition = -1
class TokenParsingException(Exception):
pass
class SubtokenizingException(TokenParsingException):
pass
class TokenParser(metaclass = ABCMeta):
@classmethod
def can_parse(cls, token):
try:
cls(token).parse()
return True
except TokenParsingException:
return False
@abstractmethod
def parse(self):
pass
class NumberTokenParser(TokenParser):
def __init__(self, token):
self._token = token.lower()
self._token_length = len(token)
self._finite_state_machine = FiniteStateMachine(0, valid_states, default_transition, transitions)
def get_token_of_class_or_none(self, offset, token_class):
for token in token_class:
if(len(token) + offset > self._token_length):
continue
if(self._token[offset: offset + len(token)] == token):
return token
return None
def get_next_token_part(self, offset):
token_classes = [ZERO_NUMBER_TOKENS
, HUNDRED_NUMBER_TOKENS
, TEEN_NUMBER_TOKENS
, DECI_NUMBER_TOKENS
, BASE_NUMBER_TOKENS]
result = None
for token_class in token_classes:
result = self.get_token_of_class_or_none(offset, token_class)
if(result):
break
return result
def subtokenize(self):
token_parts = deque()
offset = 0
while(True):
subtoken = self.get_next_token_part(offset)
if(subtoken is None):
if(offset != self._token_length):
raise SubtokenizingException("part of the token is dangling: '{}'".format(self._token[offset:]))
return list(token_parts)
offset += len(subtoken)
token_parts.append(subtoken)
def parse(self):
token_parts = self.subtokenize()
if(not self._finite_state_machine.check(token_parts)):
raise TokenParsingException("token '{}' is invalid".format(self._token))
# This is ugly but it works.
result = 0
for subtoken in token_parts:
if(subtoken in BASE_NUMBER_TOKENS):
result += BASE_NUMBER_TOKENS[subtoken]
if(subtoken in TEEN_NUMBER_TOKENS):
result += TEEN_NUMBER_TOKENS[subtoken]
if(subtoken in DECI_NUMBER_TOKENS):
result += DECI_NUMBER_TOKENS[subtoken]
if(subtoken in HUNDRED_NUMBER_TOKENS):
result *= HUNDRED_NUMBER_TOKENS[subtoken]
return result
class OperatorTokenParser(TokenParser):
def __init__(self, token):
self._token = token.lower()
def parse(self):
if(not self._token in OPERATOR_TOKENS):
raise TokenParsingException("token '{}' is not an operator".format(self._token))
return OPERATOR_TOKENS[self._token]

View File

@@ -26,9 +26,9 @@ class BrownIterator(object):
self._ys = self._ys + np.sin(theta)
# Reflect the particles
self._xs[self._xs > 3] += 1.5 * (3 - self._xs[self._xs > 3])
#self._xs[self._xs > 3] += 1.5 * (3 - self._xs[self._xs > 3])
self._ys[self._ys > 3] += 1.5 * (3 - self._ys[self._ys > 3])
self._xs[self._xs < -3] += 1.5 * (-3 - self._xs[self._xs < -3])
#self._xs[self._xs < -3] += 1.5 * (-3 - self._xs[self._xs < -3])
self._ys[self._ys < -3] += 1.5 * (-3 - self._ys[self._ys < -3])
return self._xs, self._ys

34
exam/ex14/main.py Normal file
View File

@@ -0,0 +1,34 @@
import matplotlib.pyplot as plt
import matplotlib.animation as ani
import numpy as np
import sys
sys.path.append("../ex13/")
from model import prepare_model
from backend import CELL_IS_ALIVE
from executor import execute_tick
fig = plt.figure(figsize=(7, 7))
ax = fig.add_axes([0, 0, 1, 1], frameon=False)
ax.set_xlim(0, 100)
ax.set_xticks([])
ax.set_ylim(0, 100)
ax.set_yticks([])
model = prepare_model(100, 0.2)
scat, = ax.plot(*np.where((model & CELL_IS_ALIVE) == 1), "s", color="black")
frames = 100
def update(i):
execute_tick(model)
scat.set_data(*np.where((model & CELL_IS_ALIVE) == 1))
print("%.2f" % ((i / frames) * 100), "%", end="\r")
animation = ani.FuncAnimation(fig, update, range(frames), interval=1)
animation.save("output/animation.gif", dpi=80, writer='imagemagick')
print("\noutput/animation.gif")

122
exam/ex16/geometry.py Normal file
View File

@@ -0,0 +1,122 @@
from abc import abstractmethod, ABCMeta
from itertools import permutations
import numpy as np
class Point(object):
__slots__ = ["x", "y"]
def __init__(self, x, y):
self.x = x
self.y = y
def __abs__(self):
return (self.x**2 + self.y**2)**0.5
def __add__(self, other):
if(isinstance(self, Point)):
return Point(self.x + other.x, self.y + other.y)
raise TypeError("cannot add {} and {}".format(type(other), type(self)))
def __sub__(self, other):
if(isinstance(self, Point)):
return Point(self.x - other.x, self.y - other.y)
raise TypeError("cannot subtract {} and {}".format(type(other), type(self)))
def rotate(self, angle):
return Point(self.x * np.cos(angle) - self.y * np.sin(angle)
, self.x * np.sin(angle) + self.y * np.cos(angle))
def __repr__(self):
return "{}({}, {})".format(type(self).__name__, self.x, self.y)
class TwoDimensionalObject(metaclass=ABCMeta):
@abstractmethod
def contains_point(self, point: Point):
pass
def __contains__(self, other):
if(isinstance(other, Point)):
return self.contains_point(other)
raise TypeError("unable to check if {} is in {}".format(type(other), type(self)))
class Circle(TwoDimensionalObject):
def __init__(self, origin: Point, radius):
self.origin = origin
self.radius = radius
def contains_point(self, point: Point):
return abs(self.origin - point) < self.radius
def __repr__(self):
return "{}({}, {})".format(type(self).__name__, self.origin, self.radius)
class Rectangle(TwoDimensionalObject):
"""
A Rectangle is constructed as follows:
The Points p1, p2 are connected using orthogonal lines::
p1 +-------+
| |
| |
| |
+-------+ p2
and then the Rectangle is rotated ``angle`` degrees around ``p1``.
"""
def __init__(self, p1: Point, p2: Point, angle=0):
self.p1 = p1
self.p2 = p2
self.local_p1 = Point(0, 0)
self.local_p2 = (p2 - p1).rotate(angle)
self.angle = angle
def contains_point(self, point: Point):
point_in_eigen_frame = (point - self.p1).rotate(self.angle)
if(self.local_p1.x < self.local_p2.x):
if(point_in_eigen_frame.x < self.local_p1.x or point_in_eigen_frame.x > self.local_p1.x):
return False
else:
if(point_in_eigen_frame.x > self.local_p1.x or point_in_eigen_frame.x < self.local_p1.x):
return False
if(self.local_p1.y < self.local_p2.y):
if(point_in_eigen_frame.y < self.local_p1.y or point_in_eigen_frame.y > self.local_p1.y):
return False
else:
if(point_in_eigen_frame.y > self.local_p1.y or point_in_eigen_frame.y < self.local_p1.y):
return False
return True
def __repr__(self):
return "{}({}, {}, angle={})".format(type(self).__name__, self.p1, self.p2, self.angle)
class Triangle(TwoDimensionalObject):
def __init__(self, p1: Point, p2: Point, p3: Point):
self.p1 = p1
self.p2 = p2
self.p3 = p3
def contains_point(self, point: Point):
points = [self.p1, self.p2, self.p3]
triangle_surface = abs(sum([points[i].x * (points[(i + 1) % 3].y - points[(i + 2) % 3].y) for i in range(3)]))
points = [point, self.p1, self.p2, self.p3]
surfaces = [abs(sum([p[i].x * (p[(i + 1) % 4].y - p[(i + 2) % 4].y) for i in range(4)])) for p in permutations(points)]
return max(surfaces) < triangle_surface
def __repr__(self):
return "{}({}, {}, {})".format(type(self).__name__, self.p1, self.p2, self.p3)
class CollectionOfFigures(object):
def __init__(self, figures):
self.figures = figures
def containing(self, point: Point):
return [f for f in self.figures if point in f]
def __repr__(self):
return "{}({})".format(type(self).__name__, repr(self.figures))

21
exam/ex16/main.py Normal file
View File

@@ -0,0 +1,21 @@
from geometry import Point, Rectangle, Triangle, Circle, CollectionOfFigures
c1 = Circle(Point(0, 0), 1)
c2 = Circle(Point(4, 4), 2)
r1 = Rectangle(Point(0, 0), Point(4, 4))
r2 = Rectangle(Point(-3, -3), Point(-1, -1))
t1 = Triangle(Point(-1, -1), Point(-1, 1), Point(0, 1))
t2 = Triangle(Point(0, 0), Point(6, 0), Point(0, 6))
t3 = Triangle(Point(-5, -5), Point(0, 6), Point(5, -5))
collection = CollectionOfFigures([c1, c2, r1, r2, t1, t2, t3])
p1 = Point(4, 4)
p2 = Point(-1, 0)
p3 = Point(0, 0)
print(p1, "is in", collection.containing(p1))
print(p2, "is in", collection.containing(p2))
print(p3, "is in", collection.containing(p3))

32
exam/ex19/caesar.py Normal file
View File

@@ -0,0 +1,32 @@
def internal_ord(c):
c = c.lower()
internal_ord = ord(c) - ord("a")
if(internal_ord < 0 or internal_ord > 25):
raise ValueError("'{}' is an unsupported character".format(c))
return internal_ord
def internal_chr(i):
return chr(ord("a") + i)
def encode_or_keeps_space(c):
if(c == " "):
return (False, c)
return (True, internal_ord(c))
def prepare_string(s):
return (encode_or_keeps_space(c) for c in s)
def _caesar(s, K):
for encode, i in prepare_string(s):
if(encode):
yield internal_chr((i + K) % 26)
else:
yield i
def caesar(s, K):
return "".join(_caesar(s, K))

21
exam/ex19/main.py Normal file
View File

@@ -0,0 +1,21 @@
from caesar import caesar
from statistical_attack import get_statistical_key
text1 = "hello world this is a test"
text2 = "this is a message that is obfuscated"
K2 = 4
K1 = 13
print(caesar(text1, K1))
print(caesar(caesar(text1, K1), K1))
print(caesar(text2, K2))
print(caesar(caesar(text2, K2), abs(K2 - 26)))
text4 = "In cryptography, a Caesar cipher, also known as Caesar's cipher, the shift cipher, Caesar's code or Caesar shift, is one of the simplest and most widely known encryption techniques. It is a type of substitution cipher in which each letter in the plaintext is replaced by a letter some fixed number of positions down the alphabet. For example, with a left shift of 3, D would be replaced by A, E would become B, and so on. The method is named after Julius Caesar, who used it in his private correspondence"
text4 = "".join((s for s in text4 if s in " abcdefghijklmnopqrstuvwxyz"))
print(get_statistical_key(caesar(text4, K2)))
print(get_statistical_key(caesar(text2, K2)))

12326
exam/ex19/sample.tx Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,29 @@
from collections import Counter
alphabet = "abcdefghijklmnopqrstuvwxyz"
reference = Counter((c for c in open("sample.tx").read().lower() if c in alphabet))
def get_statistics(text):
return Counter([c for c in text.lower() if c in alphabet])
def relative_most_common(statistics):
c, abs_probability = statistics.most_common(1)[0]
total_chrs = sum([v for k,v in statistics.most_common()])
return abs_probability / total_chrs
def get_statistical_key(text):
statistics = get_statistics(text)
quality = relative_most_common(statistics) / relative_most_common(reference)
c, abs_probability = statistics.most_common(1)[0]
K = abs(ord("e") - ord(c))
if(quality > 1):
quality = relative_most_common(reference) / relative_most_common(statistics)
return K, quality

13
exam/ex26/main.py Normal file
View File

@@ -0,0 +1,13 @@
from polynomial import Polynomial
p1 = Polynomial([0, 2, 3, 4, 0, 0, 9])
p2 = Polynomial([0, 0, 1])
p3 = Polynomial([1, 0, 0, 1])
print("p1 = ", p1)
print("p2 = ", p2)
print("p3 = ", p3)
print("p1 + p3 = ", p1 + p3)
print("p2 * p3 = ", p2 * p3)
print("p1 * p3 = ", p1 * p3)

55
exam/ex26/polynomial.py Normal file
View File

@@ -0,0 +1,55 @@
from collections import defaultdict
class Polynomial(object):
__slots__ = ["koefficients"]
def __init__(self, koefficients):
self.koefficients = list(koefficients)
# Remove trailing zeros.
while(self.koefficients and self.koefficients[-1] == 0):
self.koefficients.pop()
def __call__(self, x):
if(not isinstance(x, (float, int, complex))):
raise TypeError("unsupported type for {}: {}".format(x, type(x)))
result = 0
for i, a in enumerate(self.koefficients):
result += a * x**i
return result
def __add__(self, other):
if(not isinstance(other, Polynomial)):
raise TypeError("cannot add {} and {}".format(type(self), type(other)))
p1 = self.koefficients
p2 = other.koefficients
if(len(other.koefficients) > len(self.koefficients)):
p1, p2 = p2, p1
p2 += [0] * (len(p1) - len(p2))
return Polynomial([a + b for a,b in zip(p1, p2)])
def __mul__(self, other):
if(isinstance(other, (int, float, complex))):
return Polynomial([a * other for a in self.koefficients])
if(not isinstance(other, Polynomial)):
raise TypeError("cannot add {} and {}".format(type(self), type(other)))
result = defaultdict(int)
for i, k1 in enumerate(self.koefficients):
for j, k2 in enumerate(other.koefficients):
result[i + j] += k1 * k2
return Polynomial(result[i] for i in range(max(result)))
def __repr__(self):
return "{}({})".format(type(self).__name__, self.koefficients)
def __str__(self):
return " + ".join(["{}*x**{}".format(a, i) for i, a in enumerate(self.koefficients)])

101
exam/ex27/main.py Normal file
View File

@@ -0,0 +1,101 @@
import sys
sys.path.append("../../")
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
from util.io import readvalue
def positive_int(s):
i = int(s)
if(not i > 0):
raise ValueError("{} <= 0".format(i))
return i
def float_0_1(s):
i = float(s)
if(not i > 0):
raise ValueError("{} <= 0".format(i))
if(i > 1):
raise ValueError("{} > 1".format(i))
return i
IS_TREE = 1
TREE_IS_ON_FIRE = 2
TREE_WILL_BE_ON_FIRE = 4
def prepare_model(N, p):
model = np.zeros((N, N), dtype=np.int8)
model[np.random.uniform(size=(N,N)) < p] = IS_TREE
return model
def index_plus(i, j, add_i, add_j, model):
new_i = i + add_i
new_j = j + add_j
if(new_i >= model.shape[0]):
new_i -= model.shape[0]
if(new_i < 0):
new_i += model.shape[0]
if(new_j >= model.shape[1]):
new_j -= model.shape[1]
if(new_j < 0):
new_j += model.shape[1]
return new_i, new_j
def do_tick(model):
for i in range(model.shape[0]):
for j in range(model.shape[1]):
if((not model[i][j] & IS_TREE) or model[i][j] & TREE_IS_ON_FIRE):
continue
if(
(model[index_plus(i, j, 1, 0, model)] & TREE_IS_ON_FIRE)
or (model[index_plus(i, j, 0, 1, model)] & TREE_IS_ON_FIRE)
or (model[index_plus(i, j, -1, 0, model)] & TREE_IS_ON_FIRE)
or (model[index_plus(i, j, 0, -1, model)] & TREE_IS_ON_FIRE)
):
model[i][j] |= TREE_WILL_BE_ON_FIRE
model[(model & TREE_WILL_BE_ON_FIRE) != 0] |= TREE_IS_ON_FIRE
model[(model & TREE_WILL_BE_ON_FIRE) != 0] ^= TREE_WILL_BE_ON_FIRE
def trees_on_fire(model):
unique, counts = np.unique((model & TREE_IS_ON_FIRE) != 0, return_counts=True)
if(True not in unique):
return 0
return counts[np.where(unique == True)[0]][0]
def set_initial_on_fire(model):
i = np.random.randint(0, model.shape[0])
j = np.random.randint(0, model.shape[1])
while True:
if(model[i][j] & IS_TREE):
model[i][j] |= TREE_IS_ON_FIRE
break
i = (i + 1) % model.shape[0]
j = (j + 1) % model.shape[1]
N = readvalue("N > ", positive_int)
p = readvalue("p > ", float_0_1)
model = prepare_model(N, p)
set_initial_on_fire(model)
on_fire = deque()
last_on_fire = 1
while True:
do_tick(model)
now_on_fire = trees_on_fire(model)
on_fire.append(now_on_fire)
if(now_on_fire == last_on_fire):
break
last_on_fire = now_on_fire
plt.plot(*zip(*enumerate(on_fire)))
plt.show()