diff --git a/.gitignore b/.gitignore index 4f6813f976c2..96422cb2ec94 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,4 @@ ENV/ # Rope project settings .ropeproject .idea +.DS_Store \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index 25abc0577505..5fba6987bb66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,26 @@ language: python +cache: pip python: - - "3.2" - - "3.3" - - "3.4" - - "3.5" -install: - - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi - - if [ "$TRAVIS_PYTHON_VERSION" != "3.2" ]; then travis_retry pip install coverage; fi - - "pip install pytest pytest-cov" -script: py.test --doctest-modules --cov ./ \ No newline at end of file + - 2.7 + - 3.6 + #- nightly + #- pypy + #- pypy3 +matrix: + allow_failures: + - python: nightly + - python: pypy + - python: pypy3 +install: + #- pip install -r requirements.txt + - pip install flake8 # pytest # add another testing frameworks later +before_script: + # stop the build if there are Python syntax errors or undefined names + - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics +script: + - true # pytest --capture=sys # add other tests here +notifications: + on_success: change + on_failure: change # `always` will be the setting once code changes slow down diff --git a/Bisection.py b/Bisection.py new file mode 100644 index 000000000000..a6af547db03b --- /dev/null +++ b/Bisection.py @@ -0,0 +1,33 @@ +import math + + +def bisection(function, a, b): # finds where the function becomes 0 in [a,b] using bolzano + + start = a + end = b + if function(a) == 0: # one of the a or b is a root for the function + return a + elif function(b) == 0: + return b + elif function(a) * function(b) > 0: # if none of these are root and they are both positive or negative, + # then his algorithm can't find the root + print("couldn't find root in [a,b]") + return + else: + mid = (start + end) / 2 + while abs(start - mid) > 0.0000001: # until we achieve precise equals to 10^-7 + if function(mid) == 0: + return mid + elif function(mid) * function(start) < 0: + end = mid + else: + start = mid + mid = (start + end) / 2 + return mid + + +def f(x): + return math.pow(x, 3) - 2*x - 5 + + +print(bisection(f, 1, 1000)) diff --git a/File_Transfer_Protocol/ftp_client_server.py b/File_Transfer_Protocol/ftp_client_server.py new file mode 100644 index 000000000000..ff7a8ec2edf1 --- /dev/null +++ b/File_Transfer_Protocol/ftp_client_server.py @@ -0,0 +1,58 @@ +# server + +import socket # Import socket module + +port = 60000 # Reserve a port for your service. +s = socket.socket() # Create a socket object +host = socket.gethostname() # Get local machine name +s.bind((host, port)) # Bind to the port +s.listen(5) # Now wait for client connection. + +print 'Server listening....' + +while True: + conn, addr = s.accept() # Establish connection with client. + print 'Got connection from', addr + data = conn.recv(1024) + print('Server received', repr(data)) + + filename='mytext.txt' + f = open(filename,'rb') + l = f.read(1024) + while (l): + conn.send(l) + print('Sent ',repr(l)) + l = f.read(1024) + f.close() + + print('Done sending') + conn.send('Thank you for connecting') + conn.close() + + +# client side server + +import socket # Import socket module + +s = socket.socket() # Create a socket object +host = socket.gethostname() # Get local machine name +port = 60000 # Reserve a port for your service. + +s.connect((host, port)) +s.send("Hello server!") + +with open('received_file', 'wb') as f: + print 'file opened' + while True: + print('receiving data...') + data = s.recv(1024) + print('data=%s', (data)) + if not data: + break + # write data to a file + f.write(data) + +f.close() +print('Successfully get the file') +s.close() +print('connection closed') \ No newline at end of file diff --git a/File_Transfer_Protocol/ftp_send_receive.py b/File_Transfer_Protocol/ftp_send_receive.py new file mode 100644 index 000000000000..d4919158a02e --- /dev/null +++ b/File_Transfer_Protocol/ftp_send_receive.py @@ -0,0 +1,36 @@ +""" + File transfer protocol used to send and receive files using FTP server. + Use credentials to provide access to the FTP client + + Note: Do not use root username & password for security reasons + Create a seperate user and provide access to a home directory of the user + Use login id and password of the user created + cwd here stands for current working directory +""" + +from ftplib import FTP +ftp = FTP('xxx.xxx.x.x') # Enter the ip address or the domain name here +ftp.login(user='username', passwd='password') +ftp.cwd('/Enter the directory here/') + +""" + The file which will be received via the FTP server + Enter the location of the file where the file is received +""" + +def ReceiveFile(): + FileName = 'example.txt' """ Enter the location of the file """ + LocalFile = open(FileName, 'wb') + ftp.retrbinary('RETR ' + FileName, LocalFile.write, 1024) + ftp.quit() + LocalFile.close() + +""" + The file which will be sent via the FTP server + The file send will be send to the current working directory +""" + +def SendFile(): + FileName = 'example.txt' """ Enter the name of the file """ + ftp.storbinary('STOR ' + FileName, open(FileName, 'rb')) + ftp.quit() diff --git a/Graphs/a_star.py b/Graphs/a_star.py new file mode 100644 index 000000000000..584222e6f62b --- /dev/null +++ b/Graphs/a_star.py @@ -0,0 +1,102 @@ +from __future__ import print_function + +grid = [[0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0],#0 are free path whereas 1's are obstacles + [0, 1, 0, 0, 0, 0], + [0, 1, 0, 0, 1, 0], + [0, 0, 0, 0, 1, 0]] + +''' +heuristic = [[9, 8, 7, 6, 5, 4], + [8, 7, 6, 5, 4, 3], + [7, 6, 5, 4, 3, 2], + [6, 5, 4, 3, 2, 1], + [5, 4, 3, 2, 1, 0]]''' + +init = [0, 0] +goal = [len(grid)-1, len(grid[0])-1] #all coordinates are given in format [y,x] +cost = 1 + +#the cost map which pushes the path closer to the goal +heuristic = [[0 for row in range(len(grid[0]))] for col in range(len(grid))] +for i in range(len(grid)): + for j in range(len(grid[0])): + heuristic[i][j] = abs(i - goal[0]) + abs(j - goal[1]) + if grid[i][j] == 1: + heuristic[i][j] = 99 #added extra penalty in the heuristic map + + +#the actions we can take +delta = [[-1, 0 ], # go up + [ 0, -1], # go left + [ 1, 0 ], # go down + [ 0, 1 ]] # go right + + +#function to search the path +def search(grid,init,goal,cost,heuristic): + + closed = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]# the referrence grid + closed[init[0]][init[1]] = 1 + action = [[0 for col in range(len(grid[0]))] for row in range(len(grid))]#the action grid + + x = init[0] + y = init[1] + g = 0 + f = g + heuristic[init[0]][init[0]] + cell = [[f, g, x, y]] + + found = False # flag that is set when search is complete + resign = False # flag set if we can't find expand + + while not found and not resign: + if len(cell) == 0: + resign = True + return "FAIL" + else: + cell.sort()#to choose the least costliest action so as to move closer to the goal + cell.reverse() + next = cell.pop() + x = next[2] + y = next[3] + g = next[1] + f = next[0] + + + if x == goal[0] and y == goal[1]: + found = True + else: + for i in range(len(delta)):#to try out different valid actions + x2 = x + delta[i][0] + y2 = y + delta[i][1] + if x2 >= 0 and x2 < len(grid) and y2 >=0 and y2 < len(grid[0]): + if closed[x2][y2] == 0 and grid[x2][y2] == 0: + g2 = g + cost + f2 = g2 + heuristic[x2][y2] + cell.append([f2, g2, x2, y2]) + closed[x2][y2] = 1 + action[x2][y2] = i + invpath = [] + x = goal[0] + y = goal[1] + invpath.append([x, y])#we get the reverse path from here + while x != init[0] or y != init[1]: + x2 = x - delta[action[x][y]][0] + y2 = y - delta[action[x][y]][1] + x = x2 + y = y2 + invpath.append([x, y]) + + path = [] + for i in range(len(invpath)): + path.append(invpath[len(invpath) - 1 - i]) + print("ACTION MAP") + for i in range(len(action)): + print(action[i]) + + return path + +a = search(grid,init,goal,cost,heuristic) +for i in range(len(a)): + print(a[i]) + diff --git a/Graphs/basic-graphs.py b/Graphs/basic-graphs.py new file mode 100644 index 000000000000..6e433b5bd725 --- /dev/null +++ b/Graphs/basic-graphs.py @@ -0,0 +1,281 @@ +from __future__ import print_function + +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + +try: + xrange # Python 2 +except NameError: + xrange = range # Python 3 + +# Accept No. of Nodes and edges +n, m = map(int, raw_input().split(" ")) + +# Initialising Dictionary of edges +g = {} +for i in xrange(n): + g[i + 1] = [] + +""" +-------------------------------------------------------------------------------- + Accepting edges of Unweighted Directed Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y = map(int, raw_input().split(" ")) + g[x].append(y) + +""" +-------------------------------------------------------------------------------- + Accepting edges of Unweighted Undirected Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y = map(int, raw_input().split(" ")) + g[x].append(y) + g[y].append(x) + +""" +-------------------------------------------------------------------------------- + Accepting edges of Weighted Undirected Graphs +-------------------------------------------------------------------------------- +""" +for _ in xrange(m): + x, y, r = map(int, raw_input().split(" ")) + g[x].append([y, r]) + g[y].append([x, r]) + +""" +-------------------------------------------------------------------------------- + Depth First Search. + Args : G - Dictionary of edges + s - Starting Node + Vars : vis - Set of visited nodes + S - Traversal Stack +-------------------------------------------------------------------------------- +""" + + +def dfs(G, s): + vis, S = set([s]), [s] + print(s) + while S: + flag = 0 + for i in G[S[-1]]: + if i not in vis: + S.append(i) + vis.add(i) + flag = 1 + print(i) + break + if not flag: + S.pop() + + +""" +-------------------------------------------------------------------------------- + Breadth First Search. + Args : G - Dictionary of edges + s - Starting Node + Vars : vis - Set of visited nodes + Q - Traveral Stack +-------------------------------------------------------------------------------- +""" +from collections import deque + + +def bfs(G, s): + vis, Q = set([s]), deque([s]) + print(s) + while Q: + u = Q.popleft() + for v in G[u]: + if v not in vis: + vis.add(v) + Q.append(v) + print(v) + + +""" +-------------------------------------------------------------------------------- + Dijkstra's shortest path Algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to every other node + known - Set of knows nodes + path - Preceding node in path +-------------------------------------------------------------------------------- +""" + + +def dijk(G, s): + dist, known, path = {s: 0}, set(), {s: 0} + while True: + if len(known) == len(G) - 1: + break + mini = 100000 + for i in dist: + if i not in known and dist[i] < mini: + mini = dist[i] + u = i + known.add(u) + for v in G[u]: + if v[0] not in known: + if dist[u] + v[1] < dist.get(v[0], 100000): + dist[v[0]] = dist[u] + v[1] + path[v[0]] = u + for i in dist: + if i != s: + print(dist[i]) + + +""" +-------------------------------------------------------------------------------- + Topological Sort +-------------------------------------------------------------------------------- +""" +from collections import deque + + +def topo(G, ind=None, Q=[1]): + if ind == None: + ind = [0] * (len(G) + 1) # SInce oth Index is ignored + for u in G: + for v in G[u]: + ind[v] += 1 + Q = deque() + for i in G: + if ind[i] == 0: + Q.append(i) + if len(Q) == 0: + return + v = Q.popleft() + print(v) + for w in G[v]: + ind[w] -= 1 + if ind[w] == 0: + Q.append(w) + topo(G, ind, Q) + + +""" +-------------------------------------------------------------------------------- + Reading an Adjacency matrix +-------------------------------------------------------------------------------- +""" + + +def adjm(): + n, a = input(), [] + for i in xrange(n): + a.append(map(int, raw_input().split())) + return a, n + + +""" +-------------------------------------------------------------------------------- + Floyd Warshall's algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to every other node + known - Set of knows nodes + path - Preceding node in path + +-------------------------------------------------------------------------------- +""" + + +def floy(A_and_n): + (A, n) = A_and_n + dist = list(A) + path = [[0] * n for i in xrange(n)] + for k in xrange(n): + for i in xrange(n): + for j in xrange(n): + if dist[i][j] > dist[i][k] + dist[k][j]: + dist[i][j] = dist[i][k] + dist[k][j] + path[i][k] = k + print(dist) + + +""" +-------------------------------------------------------------------------------- + Prim's MST Algorithm + Args : G - Dictionary of edges + s - Starting Node + Vars : dist - Dictionary storing shortest distance from s to nearest node + known - Set of knows nodes + path - Preceding node in path +-------------------------------------------------------------------------------- +""" + + +def prim(G, s): + dist, known, path = {s: 0}, set(), {s: 0} + while True: + if len(known) == len(G) - 1: + break + mini = 100000 + for i in dist: + if i not in known and dist[i] < mini: + mini = dist[i] + u = i + known.add(u) + for v in G[u]: + if v[0] not in known: + if v[1] < dist.get(v[0], 100000): + dist[v[0]] = v[1] + path[v[0]] = u + + +""" +-------------------------------------------------------------------------------- + Accepting Edge list + Vars : n - Number of nodes + m - Number of edges + Returns : l - Edge list + n - Number of Nodes +-------------------------------------------------------------------------------- +""" + + +def edglist(): + n, m = map(int, raw_input().split(" ")) + l = [] + for i in xrange(m): + l.append(map(int, raw_input().split(' '))) + return l, n + + +""" +-------------------------------------------------------------------------------- + Kruskal's MST Algorithm + Args : E - Edge list + n - Number of Nodes + Vars : s - Set of all nodes as unique disjoint sets (initially) +-------------------------------------------------------------------------------- +""" + + +def krusk(E_and_n): + # Sort edges on the basis of distance + (E, n) = E_and_n + E.sort(reverse=True, key=lambda x: x[2]) + s = [set([i]) for i in range(1, n + 1)] + while True: + if len(s) == 1: + break + print(s) + x = E.pop() + for i in xrange(len(s)): + if x[0] in s[i]: + break + for j in xrange(len(s)): + if x[1] in s[j]: + if i == j: + break + s[j].update(s[i]) + s.pop(i) + break diff --git a/Graphs/minimum_spanning_tree_kruskal.py b/Graphs/minimum_spanning_tree_kruskal.py new file mode 100644 index 000000000000..81d64f421a31 --- /dev/null +++ b/Graphs/minimum_spanning_tree_kruskal.py @@ -0,0 +1,32 @@ +from __future__ import print_function +num_nodes, num_edges = list(map(int,input().split())) + +edges = [] + +for i in range(num_edges): + node1, node2, cost = list(map(int,input().split())) + edges.append((i,node1,node2,cost)) + +edges = sorted(edges, key=lambda edge: edge[3]) + +parent = [i for i in range(num_nodes)] + +def find_parent(i): + if(i != parent[i]): + parent[i] = find_parent(parent[i]) + return parent[i] + +minimum_spanning_tree_cost = 0 +minimum_spanning_tree = [] + +for edge in edges: + parent_a = find_parent(edge[1]) + parent_b = find_parent(edge[2]) + if(parent_a != parent_b): + minimum_spanning_tree_cost += edge[3] + minimum_spanning_tree.append(edge) + parent[parent_a] = parent_b + +print(minimum_spanning_tree_cost) +for edge in minimum_spanning_tree: + print(edge) diff --git a/Graphs/scc_kosaraju.py b/Graphs/scc_kosaraju.py new file mode 100644 index 000000000000..1f13ebaba36b --- /dev/null +++ b/Graphs/scc_kosaraju.py @@ -0,0 +1,46 @@ +from __future__ import print_function +# n - no of nodes, m - no of edges +n, m = list(map(int,input().split())) + +g = [[] for i in range(n)] #graph +r = [[] for i in range(n)] #reversed graph +# input graph data (edges) +for i in range(m): + u, v = list(map(int,input().split())) + g[u].append(v) + r[v].append(u) + +stack = [] +visit = [False]*n +scc = [] +component = [] + +def dfs(u): + global g, r, scc, component, visit, stack + if visit[u]: return + visit[u] = True + for v in g[u]: + dfs(v) + stack.append(u) + +def dfs2(u): + global g, r, scc, component, visit, stack + if visit[u]: return + visit[u] = True + component.append(u) + for v in r[u]: + dfs2(v) + +def kosaraju(): + global g, r, scc, component, visit, stack + for i in range(n): + dfs(i) + visit = [False]*n + for i in stack[::-1]: + if visit[i]: continue + component = [] + dfs2(i) + scc.append(component) + return scc + +print(kosaraju()) diff --git a/Graphs/tarjans_scc.py b/Graphs/tarjans_scc.py new file mode 100644 index 000000000000..89754e593508 --- /dev/null +++ b/Graphs/tarjans_scc.py @@ -0,0 +1,78 @@ +from collections import deque + + +def tarjan(g): + """ + Tarjan's algo for finding strongly connected components in a directed graph + + Uses two main attributes of each node to track reachability, the index of that node within a component(index), + and the lowest index reachable from that node(lowlink). + + We then perform a dfs of the each component making sure to update these parameters for each node and saving the + nodes we visit on the way. + + If ever we find that the lowest reachable node from a current node is equal to the index of the current node then it + must be the root of a strongly connected component and so we save it and it's equireachable vertices as a strongly + connected component. + + Complexity: strong_connect() is called at most once for each node and has a complexity of O(|E|) as it is DFS. + Therefore this has complexity O(|V| + |E|) for a graph G = (V, E) + + """ + + n = len(g) + stack = deque() + on_stack = [False for _ in range(n)] + index_of = [-1 for _ in range(n)] + lowlink_of = index_of[:] + + def strong_connect(v, index, components): + index_of[v] = index # the number when this node is seen + lowlink_of[v] = index # lowest rank node reachable from here + index += 1 + stack.append(v) + on_stack[v] = True + + for w in g[v]: + if index_of[w] == -1: + index = strong_connect(w, index, components) + lowlink_of[v] = lowlink_of[w] if lowlink_of[w] < lowlink_of[v] else lowlink_of[v] + elif on_stack[w]: + lowlink_of[v] = lowlink_of[w] if lowlink_of[w] < lowlink_of[v] else lowlink_of[v] + + if lowlink_of[v] == index_of[v]: + component = [] + w = stack.pop() + on_stack[w] = False + component.append(w) + while w != v: + w = stack.pop() + on_stack[w] = False + component.append(w) + components.append(component) + return index + + components = [] + for v in range(n): + if index_of[v] == -1: + strong_connect(v, 0, components) + + return components + + +def create_graph(n, edges): + g = [[] for _ in range(n)] + for u, v in edges: + g[u].append(v) + return g + + +if __name__ == '__main__': + # Test + n_vertices = 7 + source = [0, 0, 1, 2, 3, 3, 4, 4, 6] + target = [1, 3, 2, 0, 1, 4, 5, 6, 5] + edges = [(u, v) for u, v in zip(source, target)] + g = create_graph(n_vertices, edges) + + assert [[5], [6], [4], [3, 2, 1, 0]] == tarjan(g) diff --git a/Intersection.py b/Intersection.py new file mode 100644 index 000000000000..22c50f2ecafd --- /dev/null +++ b/Intersection.py @@ -0,0 +1,16 @@ +import math + +def intersection(function,x0,x1): #function is the f we want to find its root and x0 and x1 are two random starting points + x_n = x0 + x_n1 = x1 + while True: + x_n2 = x_n1-(function(x_n1)/((function(x_n1)-function(x_n))/(x_n1-x_n))) + if abs(x_n2 - x_n1)<0.00001 : + return x_n2 + x_n=x_n1 + x_n1=x_n2 + +def f(x): + return math.pow(x,3)-2*x-5 + +print(intersection(f,3,3.5)) diff --git a/Maths/ModularExponential.py b/Maths/ModularExponential.py new file mode 100644 index 000000000000..b3f4c00bd5d8 --- /dev/null +++ b/Maths/ModularExponential.py @@ -0,0 +1,20 @@ +def modularExponential(base, power, mod): + if power < 0: + return -1 + base %= mod + result = 1 + + while power > 0: + if power & 1: + result = (result * base) % mod + power = power >> 1 + base = (base * base) % mod + return result + + +def main(): + print(modularExponential(3, 200, 13)) + + +if __name__ == '__main__': + main() diff --git a/Multi_Hueristic_Astar.py b/Multi_Hueristic_Astar.py new file mode 100644 index 000000000000..7fbd2ff04542 --- /dev/null +++ b/Multi_Hueristic_Astar.py @@ -0,0 +1,268 @@ +from __future__ import print_function +import heapq +import numpy as np +import math +import copy + +try: + xrange # Python 2 +except NameError: + xrange = range # Python 3 + + +class PriorityQueue: + def __init__(self): + self.elements = [] + self.set = set() + + def minkey(self): + if not self.empty(): + return self.elements[0][0] + else: + return float('inf') + + def empty(self): + return len(self.elements) == 0 + + def put(self, item, priority): + if item not in self.set: + heapq.heappush(self.elements, (priority, item)) + self.set.add(item) + else: + # update + # print("update", item) + temp = [] + (pri, x) = heapq.heappop(self.elements) + while x != item: + temp.append((pri, x)) + (pri, x) = heapq.heappop(self.elements) + temp.append((priority, item)) + for (pro, xxx) in temp: + heapq.heappush(self.elements, (pro, xxx)) + + def remove_element(self, item): + if item in self.set: + self.set.remove(item) + temp = [] + (pro, x) = heapq.heappop(self.elements) + while x != item: + temp.append((pro, x)) + (pro, x) = heapq.heappop(self.elements) + for (prito, yyy) in temp: + heapq.heappush(self.elements, (prito, yyy)) + + def top_show(self): + return self.elements[0][1] + + def get(self): + (priority, item) = heapq.heappop(self.elements) + self.set.remove(item) + return (priority, item) + +def consistent_hueristic(P, goal): + # euclidean distance + a = np.array(P) + b = np.array(goal) + return np.linalg.norm(a - b) + +def hueristic_2(P, goal): + # integer division by time variable + return consistent_hueristic(P, goal) // t + +def hueristic_1(P, goal): + # manhattan distance + return abs(P[0] - goal[0]) + abs(P[1] - goal[1]) + +def key(start, i, goal, g_function): + ans = g_function[start] + W1 * hueristics[i](start, goal) + return ans + +def do_something(back_pointer, goal, start): + grid = np.chararray((n, n)) + for i in range(n): + for j in range(n): + grid[i][j] = '*' + + for i in range(n): + for j in range(n): + if (j, (n-1)-i) in blocks: + grid[i][j] = "#" + + grid[0][(n-1)] = "-" + x = back_pointer[goal] + while x != start: + (x_c, y_c) = x + # print(x) + grid[(n-1)-y_c][x_c] = "-" + x = back_pointer[x] + grid[(n-1)][0] = "-" + + + for i in xrange(n): + for j in range(n): + if (i, j) == (0, n-1): + print(grid[i][j], end=' ') + print("<-- End position", end=' ') + else: + print(grid[i][j], end=' ') + print() + print("^") + print("Start position") + print() + print("# is an obstacle") + print("- is the path taken by algorithm") + print("PATH TAKEN BY THE ALGORITHM IS:-") + x = back_pointer[goal] + while x != start: + print(x, end=' ') + x = back_pointer[x] + print(x) + quit() + +def valid(p): + if p[0] < 0 or p[0] > n-1: + return False + if p[1] < 0 or p[1] > n-1: + return False + return True + +def expand_state(s, j, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer): + for itera in range(n_hueristic): + open_list[itera].remove_element(s) + # print("s", s) + # print("j", j) + (x, y) = s + left = (x-1, y) + right = (x+1, y) + up = (x, y+1) + down = (x, y-1) + + for neighbours in [left, right, up, down]: + if neighbours not in blocks: + if valid(neighbours) and neighbours not in visited: + # print("neighbour", neighbours) + visited.add(neighbours) + back_pointer[neighbours] = -1 + g_function[neighbours] = float('inf') + + if valid(neighbours) and g_function[neighbours] > g_function[s] + 1: + g_function[neighbours] = g_function[s] + 1 + back_pointer[neighbours] = s + if neighbours not in close_list_anchor: + open_list[0].put(neighbours, key(neighbours, 0, goal, g_function)) + if neighbours not in close_list_inad: + for var in range(1,n_hueristic): + if key(neighbours, var, goal, g_function) <= W2 * key(neighbours, 0, goal, g_function): + # print("why not plssssssssss") + open_list[j].put(neighbours, key(neighbours, var, goal, g_function)) + + + # print + +def make_common_ground(): + some_list = [] + # block 1 + for x in range(1, 5): + for y in range(1, 6): + some_list.append((x, y)) + + # line + for x in range(15, 20): + some_list.append((x, 17)) + + # block 2 big + for x in range(10, 19): + for y in range(1, 15): + some_list.append((x, y)) + + # L block + for x in range(1, 4): + for y in range(12, 19): + some_list.append((x, y)) + for x in range(3, 13): + for y in range(16, 19): + some_list.append((x, y)) + return some_list + +hueristics = {0: consistent_hueristic, 1: hueristic_1, 2: hueristic_2} + +blocks_blk = [(0, 1),(1, 1),(2, 1),(3, 1),(4, 1),(5, 1),(6, 1),(7, 1),(8, 1),(9, 1),(10, 1),(11, 1),(12, 1),(13, 1),(14, 1),(15, 1),(16, 1),(17, 1),(18, 1), (19, 1)] +blocks_no = [] +blocks_all = make_common_ground() + + + + +blocks = blocks_blk +# hyper parameters +W1 = 1 +W2 = 1 +n = 20 +n_hueristic = 3 # one consistent and two other inconsistent + +# start and end destination +start = (0, 0) +goal = (n-1, n-1) + +t = 1 +def multi_a_star(start, goal, n_hueristic): + g_function = {start: 0, goal: float('inf')} + back_pointer = {start:-1, goal:-1} + open_list = [] + visited = set() + + for i in range(n_hueristic): + open_list.append(PriorityQueue()) + open_list[i].put(start, key(start, i, goal, g_function)) + + close_list_anchor = [] + close_list_inad = [] + while open_list[0].minkey() < float('inf'): + for i in range(1, n_hueristic): + # print("i", i) + # print(open_list[0].minkey(), open_list[i].minkey()) + if open_list[i].minkey() <= W2 * open_list[0].minkey(): + global t + t += 1 + # print("less prio") + if g_function[goal] <= open_list[i].minkey(): + if g_function[goal] < float('inf'): + do_something(back_pointer, goal, start) + else: + _, get_s = open_list[i].top_show() + visited.add(get_s) + expand_state(get_s, i, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer) + close_list_inad.append(get_s) + else: + # print("more prio") + if g_function[goal] <= open_list[0].minkey(): + if g_function[goal] < float('inf'): + do_something(back_pointer, goal, start) + else: + # print("hoolla") + get_s = open_list[0].top_show() + visited.add(get_s) + expand_state(get_s, 0, visited, g_function, close_list_anchor, close_list_inad, open_list, back_pointer) + close_list_anchor.append(get_s) + print("No path found to goal") + print() + for i in range(n-1,-1, -1): + for j in range(n): + if (j, i) in blocks: + print('#', end=' ') + elif (j, i) in back_pointer: + if (j, i) == (n-1, n-1): + print('*', end=' ') + else: + print('-', end=' ') + else: + print('*', end=' ') + if (j, i) == (n-1, n-1): + print('<-- End position', end=' ') + print() + print("^") + print("Start position") + print() + print("# is an obstacle") + print("- is the path taken by algorithm") +multi_a_star(start, goal, n_hueristic) diff --git a/Neural_Network/bpnn.py b/Neural_Network/bpnn.py new file mode 100644 index 000000000000..ed5d4c8cbf79 --- /dev/null +++ b/Neural_Network/bpnn.py @@ -0,0 +1,190 @@ +''' + +A Framework of Back Propagation Neural Network(BP) model + +Easy to use: + * add many layers as you want !!! + * clearly see how the loss decreasing +Easy to expand: + * more activation functions + * more loss functions + * more optimization method + +Author: Stephen Lee +Github : https://github.com/RiptideBo +Date: 2017.11.23 + +''' + +import numpy as np +import matplotlib.pyplot as plt + + +def sigmoid(x): + return 1 / (1 + np.exp(-1 * x)) + +class DenseLayer(): + ''' + Layers of BP neural network + ''' + def __init__(self,units,activation=None,learning_rate=None,is_input_layer=False): + ''' + common connected layer of bp network + :param units: numbers of neural units + :param activation: activation function + :param learning_rate: learning rate for paras + :param is_input_layer: whether it is input layer or not + ''' + self.units = units + self.weight = None + self.bias = None + self.activation = activation + if learning_rate is None: + learning_rate = 0.3 + self.learn_rate = learning_rate + self.is_input_layer = is_input_layer + + def initializer(self,back_units): + self.weight = np.asmatrix(np.random.normal(0,0.5,(self.units,back_units))) + self.bias = np.asmatrix(np.random.normal(0,0.5,self.units)).T + if self.activation is None: + self.activation = sigmoid + + def cal_gradient(self): + if self.activation == sigmoid: + gradient_mat = np.dot(self.output ,(1- self.output).T) + gradient_activation = np.diag(np.diag(gradient_mat)) + else: + gradient_activation = 1 + return gradient_activation + + def forward_propagation(self,xdata): + self.xdata = xdata + if self.is_input_layer: + # input layer + self.wx_plus_b = xdata + self.output = xdata + return xdata + else: + self.wx_plus_b = np.dot(self.weight,self.xdata) - self.bias + self.output = self.activation(self.wx_plus_b) + return self.output + + def back_propagation(self,gradient): + + gradient_activation = self.cal_gradient() # i * i 维 + gradient = np.asmatrix(np.dot(gradient.T,gradient_activation)) + + self._gradient_weight = np.asmatrix(self.xdata) + self._gradient_bias = -1 + self._gradient_x = self.weight + + self.gradient_weight = np.dot(gradient.T,self._gradient_weight.T) + self.gradient_bias = gradient * self._gradient_bias + self.gradient = np.dot(gradient,self._gradient_x).T + # ----------------------upgrade + # -----------the Negative gradient direction -------- + self.weight = self.weight - self.learn_rate * self.gradient_weight + self.bias = self.bias - self.learn_rate * self.gradient_bias.T + + return self.gradient + + +class BPNN(): + ''' + Back Propagation Neural Network model + ''' + def __init__(self): + self.layers = [] + self.train_mse = [] + self.fig_loss = plt.figure() + self.ax_loss = self.fig_loss.add_subplot(1,1,1) + + def add_layer(self,layer): + self.layers.append(layer) + + def build(self): + for i,layer in enumerate(self.layers[:]): + if i < 1: + layer.is_input_layer = True + else: + layer.initializer(self.layers[i-1].units) + + def summary(self): + for i,layer in enumerate(self.layers[:]): + print('------- layer %d -------'%i) + print('weight.shape ',np.shape(layer.weight)) + print('bias.shape ',np.shape(layer.bias)) + + def train(self,xdata,ydata,train_round,accuracy): + self.train_round = train_round + self.accuracy = accuracy + + self.ax_loss.hlines(self.accuracy, 0, self.train_round * 1.1) + + x_shape = np.shape(xdata) + for round_i in range(train_round): + all_loss = 0 + for row in range(x_shape[0]): + _xdata = np.asmatrix(xdata[row,:]).T + _ydata = np.asmatrix(ydata[row,:]).T + + # forward propagation + for layer in self.layers: + _xdata = layer.forward_propagation(_xdata) + + loss, gradient = self.cal_loss(_ydata, _xdata) + all_loss = all_loss + loss + + # back propagation + # the input_layer does not upgrade + for layer in self.layers[:0:-1]: + gradient = layer.back_propagation(gradient) + + mse = all_loss/x_shape[0] + self.train_mse.append(mse) + + self.plot_loss() + + if mse < self.accuracy: + print('----达到精度----') + return mse + + def cal_loss(self,ydata,ydata_): + self.loss = np.sum(np.power((ydata - ydata_),2)) + self.loss_gradient = 2 * (ydata_ - ydata) + # vector (shape is the same as _ydata.shape) + return self.loss,self.loss_gradient + + def plot_loss(self): + if self.ax_loss.lines: + self.ax_loss.lines.remove(self.ax_loss.lines[0]) + self.ax_loss.plot(self.train_mse, 'r-') + plt.ion() + plt.show() + plt.pause(0.1) + + + + +def example(): + + x = np.random.randn(10,10) + y = np.asarray([[0.8,0.4],[0.4,0.3],[0.34,0.45],[0.67,0.32], + [0.88,0.67],[0.78,0.77],[0.55,0.66],[0.55,0.43],[0.54,0.1], + [0.1,0.5]]) + + model = BPNN() + model.add_layer(DenseLayer(10)) + model.add_layer(DenseLayer(20)) + model.add_layer(DenseLayer(30)) + model.add_layer(DenseLayer(2)) + + model.build() + + model.summary() + + model.train(xdata=x,ydata=y,train_round=100,accuracy=0.01) + +if __name__ == '__main__': + example() diff --git a/Neural_Network/convolution_neural_network.py b/Neural_Network/convolution_neural_network.py new file mode 100644 index 000000000000..0dca2bc485d1 --- /dev/null +++ b/Neural_Network/convolution_neural_network.py @@ -0,0 +1,306 @@ +#-*- coding: utf-8 -*- + +''' + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + Name - - CNN - Convolution Neural Network For Photo Recognizing + Goal - - Recognize Handing Writting Word Photo + Detail:Total 5 layers neural network + * Convolution layer + * Pooling layer + * Input layer layer of BP + * Hiden layer of BP + * Output layer of BP + Author: Stephen Lee + Github: 245885195@qq.com + Date: 2017.9.20 + - - - - - -- - - - - - - - - - - - - - - - - - - - - - - + ''' +from __future__ import print_function + +import numpy as np +import matplotlib.pyplot as plt + +class CNN(): + + def __init__(self,conv1_get,size_p1,bp_num1,bp_num2,bp_num3,rate_w=0.2,rate_t=0.2): + ''' + :param conv1_get: [a,c,d],size, number, step of convolution kernel + :param size_p1: pooling size + :param bp_num1: units number of flatten layer + :param bp_num2: units number of hidden layer + :param bp_num3: units number of output layer + :param rate_w: rate of weight learning + :param rate_t: rate of threshold learning + ''' + self.num_bp1 = bp_num1 + self.num_bp2 = bp_num2 + self.num_bp3 = bp_num3 + self.conv1 = conv1_get[:2] + self.step_conv1 = conv1_get[2] + self.size_pooling1 = size_p1 + self.rate_weight = rate_w + self.rate_thre = rate_t + self.w_conv1 = [np.mat(-1*np.random.rand(self.conv1[0],self.conv1[0])+0.5) for i in range(self.conv1[1])] + self.wkj = np.mat(-1 * np.random.rand(self.num_bp3, self.num_bp2) + 0.5) + self.vji = np.mat(-1*np.random.rand(self.num_bp2, self.num_bp1)+0.5) + self.thre_conv1 = -2*np.random.rand(self.conv1[1])+1 + self.thre_bp2 = -2*np.random.rand(self.num_bp2)+1 + self.thre_bp3 = -2*np.random.rand(self.num_bp3)+1 + + + def save_model(self,save_path): + #save model dict with pickle + import pickle + model_dic = {'num_bp1':self.num_bp1, + 'num_bp2':self.num_bp2, + 'num_bp3':self.num_bp3, + 'conv1':self.conv1, + 'step_conv1':self.step_conv1, + 'size_pooling1':self.size_pooling1, + 'rate_weight':self.rate_weight, + 'rate_thre':self.rate_thre, + 'w_conv1':self.w_conv1, + 'wkj':self.wkj, + 'vji':self.vji, + 'thre_conv1':self.thre_conv1, + 'thre_bp2':self.thre_bp2, + 'thre_bp3':self.thre_bp3} + with open(save_path, 'wb') as f: + pickle.dump(model_dic, f) + + print('Model saved: %s'% save_path) + + @classmethod + def ReadModel(cls,model_path): + #read saved model + import pickle + with open(model_path, 'rb') as f: + model_dic = pickle.load(f) + + conv_get= model_dic.get('conv1') + conv_get.append(model_dic.get('step_conv1')) + size_p1 = model_dic.get('size_pooling1') + bp1 = model_dic.get('num_bp1') + bp2 = model_dic.get('num_bp2') + bp3 = model_dic.get('num_bp3') + r_w = model_dic.get('rate_weight') + r_t = model_dic.get('rate_thre') + #create model instance + conv_ins = CNN(conv_get,size_p1,bp1,bp2,bp3,r_w,r_t) + #modify model parameter + conv_ins.w_conv1 = model_dic.get('w_conv1') + conv_ins.wkj = model_dic.get('wkj') + conv_ins.vji = model_dic.get('vji') + conv_ins.thre_conv1 = model_dic.get('thre_conv1') + conv_ins.thre_bp2 = model_dic.get('thre_bp2') + conv_ins.thre_bp3 = model_dic.get('thre_bp3') + return conv_ins + + + def sig(self,x): + return 1 / (1 + np.exp(-1*x)) + + def do_round(self,x): + return round(x, 3) + + def convolute(self,data,convs,w_convs,thre_convs,conv_step): + #convolution process + size_conv = convs[0] + num_conv =convs[1] + size_data = np.shape(data)[0] + #get the data slice of original image data, data_focus + data_focus = [] + for i_focus in range(0, size_data - size_conv + 1, conv_step): + for j_focus in range(0, size_data - size_conv + 1, conv_step): + focus = data[i_focus:i_focus + size_conv, j_focus:j_focus + size_conv] + data_focus.append(focus) + #caculate the feature map of every single kernel, and saved as list of matrix + data_featuremap = [] + Size_FeatureMap = int((size_data - size_conv) / conv_step + 1) + for i_map in range(num_conv): + featuremap = [] + for i_focus in range(len(data_focus)): + net_focus = np.sum(np.multiply(data_focus[i_focus], w_convs[i_map])) - thre_convs[i_map] + featuremap.append(self.sig(net_focus)) + featuremap = np.asmatrix(featuremap).reshape(Size_FeatureMap, Size_FeatureMap) + data_featuremap.append(featuremap) + + #expanding the data slice to One dimenssion + focus1_list = [] + for each_focus in data_focus: + focus1_list.extend(self.Expand_Mat(each_focus)) + focus_list = np.asarray(focus1_list) + return focus_list,data_featuremap + + def pooling(self,featuremaps,size_pooling,type='average_pool'): + #pooling process + size_map = len(featuremaps[0]) + size_pooled = int(size_map/size_pooling) + featuremap_pooled = [] + for i_map in range(len(featuremaps)): + map = featuremaps[i_map] + map_pooled = [] + for i_focus in range(0,size_map,size_pooling): + for j_focus in range(0, size_map, size_pooling): + focus = map[i_focus:i_focus + size_pooling, j_focus:j_focus + size_pooling] + if type == 'average_pool': + #average pooling + map_pooled.append(np.average(focus)) + elif type == 'max_pooling': + #max pooling + map_pooled.append(np.max(focus)) + map_pooled = np.asmatrix(map_pooled).reshape(size_pooled,size_pooled) + featuremap_pooled.append(map_pooled) + return featuremap_pooled + + def _expand(self,datas): + #expanding three dimension data to one dimension list + data_expanded = [] + for i in range(len(datas)): + shapes = np.shape(datas[i]) + data_listed = datas[i].reshape(1,shapes[0]*shapes[1]) + data_listed = data_listed.getA().tolist()[0] + data_expanded.extend(data_listed) + data_expanded = np.asarray(data_expanded) + return data_expanded + + def _expand_mat(self,data_mat): + #expanding matrix to one dimension list + data_mat = np.asarray(data_mat) + shapes = np.shape(data_mat) + data_expanded = data_mat.reshape(1,shapes[0]*shapes[1]) + return data_expanded + + def _calculate_gradient_from_pool(self,out_map,pd_pool,num_map,size_map,size_pooling): + ''' + calcluate the gradient from the data slice of pool layer + pd_pool: list of matrix + out_map: the shape of data slice(size_map*size_map) + return: pd_all: list of matrix, [num, size_map, size_map] + ''' + pd_all = [] + i_pool = 0 + for i_map in range(num_map): + pd_conv1 = np.ones((size_map, size_map)) + for i in range(0, size_map, size_pooling): + for j in range(0, size_map, size_pooling): + pd_conv1[i:i + size_pooling, j:j + size_pooling] = pd_pool[i_pool] + i_pool = i_pool + 1 + pd_conv2 = np.multiply(pd_conv1,np.multiply(out_map[i_map],(1-out_map[i_map]))) + pd_all.append(pd_conv2) + return pd_all + + def trian(self,patterns,datas_train, datas_teach, n_repeat, error_accuracy,draw_e = bool): + #model traning + print('----------------------Start Training-------------------------') + print((' - - Shape: Train_Data ',np.shape(datas_train))) + print((' - - Shape: Teach_Data ',np.shape(datas_teach))) + rp = 0 + all_mse = [] + mse = 10000 + while rp < n_repeat and mse >= error_accuracy: + alle = 0 + print('-------------Learning Time %d--------------'%rp) + for p in range(len(datas_train)): + #print('------------Learning Image: %d--------------'%p) + data_train = np.asmatrix(datas_train[p]) + data_teach = np.asarray(datas_teach[p]) + data_focus1,data_conved1 = self.convolute(data_train,self.conv1,self.w_conv1, + self.thre_conv1,conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1,self.size_pooling1) + shape_featuremap1 = np.shape(data_conved1) + ''' + print(' -----original shape ', np.shape(data_train)) + print(' ---- after convolution ',np.shape(data_conv1)) + print(' -----after pooling ',np.shape(data_pooled1)) + ''' + data_bp_input = self._expand(data_pooled1) + bp_out1 = data_bp_input + + bp_net_j = np.dot(bp_out1,self.vji.T) - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = np.dot(bp_out2 ,self.wkj.T) - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + + #--------------Model Leaning ------------------------ + # calcluate error and gradient--------------- + pd_k_all = np.multiply((data_teach - bp_out3), np.multiply(bp_out3, (1 - bp_out3))) + pd_j_all = np.multiply(np.dot(pd_k_all,self.wkj), np.multiply(bp_out2, (1 - bp_out2))) + pd_i_all = np.dot(pd_j_all,self.vji) + + pd_conv1_pooled = pd_i_all / (self.size_pooling1*self.size_pooling1) + pd_conv1_pooled = pd_conv1_pooled.T.getA().tolist() + pd_conv1_all = self._calculate_gradient_from_pool(data_conved1,pd_conv1_pooled,shape_featuremap1[0], + shape_featuremap1[1],self.size_pooling1) + #weight and threshold learning process--------- + #convolution layer + for k_conv in range(self.conv1[1]): + pd_conv_list = self._expand_mat(pd_conv1_all[k_conv]) + delta_w = self.rate_weight * np.dot(pd_conv_list,data_focus1) + + self.w_conv1[k_conv] = self.w_conv1[k_conv] + delta_w.reshape((self.conv1[0],self.conv1[0])) + + self.thre_conv1[k_conv] = self.thre_conv1[k_conv] - np.sum(pd_conv1_all[k_conv]) * self.rate_thre + #all connected layer + self.wkj = self.wkj + pd_k_all.T * bp_out2 * self.rate_weight + self.vji = self.vji + pd_j_all.T * bp_out1 * self.rate_weight + self.thre_bp3 = self.thre_bp3 - pd_k_all * self.rate_thre + self.thre_bp2 = self.thre_bp2 - pd_j_all * self.rate_thre + # calculate the sum error of all single image + errors = np.sum(abs((data_teach - bp_out3))) + alle = alle + errors + #print(' ----Teach ',data_teach) + #print(' ----BP_output ',bp_out3) + rp = rp + 1 + mse = alle/patterns + all_mse.append(mse) + def draw_error(): + yplot = [error_accuracy for i in range(int(n_repeat * 1.2))] + plt.plot(all_mse, '+-') + plt.plot(yplot, 'r--') + plt.xlabel('Learning Times') + plt.ylabel('All_mse') + plt.grid(True, alpha=0.5) + plt.show() + print('------------------Training Complished---------------------') + print((' - - Training epoch: ', rp, ' - - Mse: %.6f' % mse)) + if draw_e: + draw_error() + return mse + + def predict(self,datas_test): + #model predict + produce_out = [] + print('-------------------Start Testing-------------------------') + print((' - - Shape: Test_Data ',np.shape(datas_test))) + for p in range(len(datas_test)): + data_test = np.asmatrix(datas_test[p]) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + data_bp_input = self._expand(data_pooled1) + + bp_out1 = data_bp_input + bp_net_j = bp_out1 * self.vji.T - self.thre_bp2 + bp_out2 = self.sig(bp_net_j) + bp_net_k = bp_out2 * self.wkj.T - self.thre_bp3 + bp_out3 = self.sig(bp_net_k) + produce_out.extend(bp_out3.getA().tolist()) + res = [list(map(self.do_round,each)) for each in produce_out] + return np.asarray(res) + + def convolution(self,data): + #return the data of image after convoluting process so we can check it out + data_test = np.asmatrix(data) + data_focus1, data_conved1 = self.convolute(data_test, self.conv1, self.w_conv1, + self.thre_conv1, conv_step=self.step_conv1) + data_pooled1 = self.pooling(data_conved1, self.size_pooling1) + + return data_conved1,data_pooled1 + + +if __name__ == '__main__': + pass + ''' + I will put the example on other file + ''' \ No newline at end of file diff --git a/Neural_Network/perceptron.py b/Neural_Network/perceptron.py new file mode 100644 index 000000000000..8ac3e8fc69e9 --- /dev/null +++ b/Neural_Network/perceptron.py @@ -0,0 +1,124 @@ +''' + + Perceptron + w = w + N * (d(k) - y) * x(k) + + Using perceptron network for oil analysis, + with Measuring of 3 parameters that represent chemical characteristics we can classify the oil, in p1 or p2 + p1 = -1 + p2 = 1 + +''' +from __future__ import print_function + +import random + + +class Perceptron: + def __init__(self, sample, exit, learn_rate=0.01, epoch_number=1000, bias=-1): + self.sample = sample + self.exit = exit + self.learn_rate = learn_rate + self.epoch_number = epoch_number + self.bias = bias + self.number_sample = len(sample) + self.col_sample = len(sample[0]) + self.weight = [] + + def trannig(self): + for sample in self.sample: + sample.insert(0, self.bias) + + for i in range(self.col_sample): + self.weight.append(random.random()) + + self.weight.insert(0, self.bias) + + epoch_count = 0 + + while True: + erro = False + for i in range(self.number_sample): + u = 0 + for j in range(self.col_sample + 1): + u = u + self.weight[j] * self.sample[i][j] + y = self.sign(u) + if y != self.exit[i]: + + for j in range(self.col_sample + 1): + + self.weight[j] = self.weight[j] + self.learn_rate * (self.exit[i] - y) * self.sample[i][j] + erro = True + #print('Epoch: \n',epoch_count) + epoch_count = epoch_count + 1 + # if you want controle the epoch or just by erro + if erro == False: + print(('\nEpoch:\n',epoch_count)) + print('------------------------\n') + #if epoch_count > self.epoch_number or not erro: + break + + def sort(self, sample): + sample.insert(0, self.bias) + u = 0 + for i in range(self.col_sample + 1): + u = u + self.weight[i] * sample[i] + + y = self.sign(u) + + if y == -1: + print(('Sample: ', sample)) + print('classification: P1') + else: + print(('Sample: ', sample)) + print('classification: P2') + + def sign(self, u): + return 1 if u >= 0 else -1 + + +samples = [ + [-0.6508, 0.1097, 4.0009], + [-1.4492, 0.8896, 4.4005], + [2.0850, 0.6876, 12.0710], + [0.2626, 1.1476, 7.7985], + [0.6418, 1.0234, 7.0427], + [0.2569, 0.6730, 8.3265], + [1.1155, 0.6043, 7.4446], + [0.0914, 0.3399, 7.0677], + [0.0121, 0.5256, 4.6316], + [-0.0429, 0.4660, 5.4323], + [0.4340, 0.6870, 8.2287], + [0.2735, 1.0287, 7.1934], + [0.4839, 0.4851, 7.4850], + [0.4089, -0.1267, 5.5019], + [1.4391, 0.1614, 8.5843], + [-0.9115, -0.1973, 2.1962], + [0.3654, 1.0475, 7.4858], + [0.2144, 0.7515, 7.1699], + [0.2013, 1.0014, 6.5489], + [0.6483, 0.2183, 5.8991], + [-0.1147, 0.2242, 7.2435], + [-0.7970, 0.8795, 3.8762], + [-1.0625, 0.6366, 2.4707], + [0.5307, 0.1285, 5.6883], + [-1.2200, 0.7777, 1.7252], + [0.3957, 0.1076, 5.6623], + [-0.1013, 0.5989, 7.1812], + [2.4482, 0.9455, 11.2095], + [2.0149, 0.6192, 10.9263], + [0.2012, 0.2611, 5.4631] + +] + +exit = [-1, -1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 1] + +network = Perceptron(sample=samples, exit = exit, learn_rate=0.01, epoch_number=1000, bias=-1) + +network.trannig() + +while True: + sample = [] + for i in range(3): + sample.insert(i, float(input('value: '))) + network.sort(sample) \ No newline at end of file diff --git a/NeutonMethod.py b/NeutonMethod.py new file mode 100644 index 000000000000..c3d5efb47d01 --- /dev/null +++ b/NeutonMethod.py @@ -0,0 +1,15 @@ +def newton(function,function1,startingInt): #function is the f(x) and function1 is the f'(x) + x_n=startingInt + while True: + x_n1=x_n-function(x_n)/function1(x_n) + if abs(x_n-x_n1)<0.00001: + return x_n1 + x_n=x_n1 + +def f(x): + return (x**3)-2*x-5 + +def f1(x): + return 3*(x**2)-2 + +print(newton(f,f1,3)) diff --git a/Project Euler/Problem 01/sol1.py b/Project Euler/Problem 01/sol1.py new file mode 100644 index 000000000000..27031c3cfa9a --- /dev/null +++ b/Project Euler/Problem 01/sol1.py @@ -0,0 +1,17 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +from __future__ import print_function +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 +n = int(raw_input().strip()) +sum=0 +for a in range(3,n): + if(a%3==0 or a%5==0): + sum+=a +print(sum) diff --git a/Project Euler/Problem 01/sol2.py b/Project Euler/Problem 01/sol2.py new file mode 100644 index 000000000000..d330387e98ab --- /dev/null +++ b/Project Euler/Problem 01/sol2.py @@ -0,0 +1,20 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +from __future__ import print_function +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 +n = int(raw_input().strip()) +sum = 0 +terms = (n-1)/3 +sum+= ((terms)*(6+(terms-1)*3))/2 #sum of an A.P. +terms = (n-1)/5 +sum+= ((terms)*(10+(terms-1)*5))/2 +terms = (n-1)/15 +sum-= ((terms)*(30+(terms-1)*15))/2 +print(sum) diff --git a/Project Euler/Problem 01/sol3.py b/Project Euler/Problem 01/sol3.py new file mode 100644 index 000000000000..78b4d0e93519 --- /dev/null +++ b/Project Euler/Problem 01/sol3.py @@ -0,0 +1,48 @@ +''' +Problem Statement: +If we list all the natural numbers below 10 that are multiples of 3 or 5, +we get 3,5,6 and 9. The sum of these multiples is 23. +Find the sum of all the multiples of 3 or 5 below N. +''' +''' +This solution is based on the pattern that the successive numbers in the series follow: 0+3,+2,+1,+3,+1,+2,+3. +''' +from __future__ import print_function +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 +n = int(raw_input().strip()) +sum=0 +num=0 +while(1): + num+=3 + if(num>=n): + break + sum+=num + num+=2 + if(num>=n): + break + sum+=num + num+=1 + if(num>=n): + break + sum+=num + num+=3 + if(num>=n): + break + sum+=num + num+=1 + if(num>=n): + break + sum+=num + num+=2 + if(num>=n): + break + sum+=num + num+=3 + if(num>=n): + break + sum+=num + +print(sum); diff --git a/Project Euler/Problem 02/sol1.py b/Project Euler/Problem 02/sol1.py new file mode 100644 index 000000000000..f8257fb615fb --- /dev/null +++ b/Project Euler/Problem 02/sol1.py @@ -0,0 +1,26 @@ +''' +Problem: +Each new term in the Fibonacci sequence is generated by adding the previous two terms. By starting with 1 and 2, +the first 10 terms will be: + 1,2,3,5,8,13,21,34,55,89,.. +By considering the terms in the Fibonacci sequence whose values do not exceed n, find the sum of the even-valued terms. +e.g. for n=10, we have {2,8}, sum is 10. +''' +from __future__ import print_function + +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + +n = int(raw_input().strip()) +i=1 +j=2 +sum=0 +while(j<=n): + if((j&1)==0): #can also use (j%2==0) + sum+=j + temp=i + i=j + j=temp+i +print(sum) diff --git a/Project Euler/Problem 03/sol1.py b/Project Euler/Problem 03/sol1.py new file mode 100644 index 000000000000..ed83e87d9a5b --- /dev/null +++ b/Project Euler/Problem 03/sol1.py @@ -0,0 +1,39 @@ +''' +Problem: +The prime factors of 13195 are 5,7,13 and 29. What is the largest prime factor of a given number N? +e.g. for 10, largest prime factor = 5. For 17, largest prime factor = 17. +''' +from __future__ import print_function + +import math + +def isprime(no): + if(no==2): + return True + elif (no%2==0): + return False + sq = int(math.sqrt(no))+1 + for i in range(3,sq,2): + if(no%i==0): + return False + return True + +maxNumber = 0 +n=int(input()) +if(isprime(n)): + print(n) +else: + while (n%2==0): + n=n/2 + if(isprime(n)): + print(n) + else: + n1 = int(math.sqrt(n))+1 + for i in range(3,n1,2): + if(n%i==0): + if(isprime(n/i)): + maxNumber = n/i + break + elif(isprime(i)): + maxNumber = i + print(maxNumber) diff --git a/Project Euler/Problem 03/sol2.py b/Project Euler/Problem 03/sol2.py new file mode 100644 index 000000000000..af4365b7b18f --- /dev/null +++ b/Project Euler/Problem 03/sol2.py @@ -0,0 +1,17 @@ +''' +Problem: +The prime factors of 13195 are 5,7,13 and 29. What is the largest prime factor of a given number N? +e.g. for 10, largest prime factor = 5. For 17, largest prime factor = 17. +''' +from __future__ import print_function +n=int(input()) +prime=1 +i=2 +while(i*i<=n): + while(n%i==0): + prime=i + n/=i + i+=1 +if(n>1): + prime=n +print(prime) diff --git a/Project Euler/Problem 04/sol1.py b/Project Euler/Problem 04/sol1.py new file mode 100644 index 000000000000..77135ec7fc1a --- /dev/null +++ b/Project Euler/Problem 04/sol1.py @@ -0,0 +1,29 @@ +''' +Problem: +A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 x 99. +Find the largest palindrome made from the product of two 3-digit numbers which is less than N. +''' +from __future__ import print_function +limit = int(input("limit? ")) + +# fetchs the next number +for number in range(limit-1,10000,-1): + + # converts number into string. + strNumber = str(number) + + # checks whether 'strNumber' is a palindrome. + if(strNumber == strNumber[::-1]): + + divisor = 999 + + # if 'number' is a product of two 3-digit numbers + # then number is the answer otherwise fetch next number. + while(divisor != 99): + + if((number % divisor == 0) and (len(str(number / divisor)) == 3)): + + print(number) + exit(0) + + divisor -=1 \ No newline at end of file diff --git a/Project Euler/Problem 04/sol2.py b/Project Euler/Problem 04/sol2.py new file mode 100644 index 000000000000..e27e7d30471e --- /dev/null +++ b/Project Euler/Problem 04/sol2.py @@ -0,0 +1,19 @@ +''' +Problem: +A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 x 99. +Find the largest palindrome made from the product of two 3-digit numbers which is less than N. +''' +from __future__ import print_function +arr = [] +for i in range(999,100,-1): + for j in range(999,100,-1): + t = str(i*j) + if t == t[::-1]: + arr.append(i*j) +arr.sort() + +n=int(input()) +for i in arr[::-1]: + if(i 1: + if number % 2 == 0: + number /=2 + counter += 1 + else: + number = (3*number)+1 + counter += 1 + + if counter > pre_counter: + largest_number = input1 + pre_counter = counter + +print(('Largest Number:',largest_number,'->',pre_counter,'digits')) diff --git a/Project Euler/Problem 16/sol1.py b/Project Euler/Problem 16/sol1.py new file mode 100644 index 000000000000..05c7916bd10a --- /dev/null +++ b/Project Euler/Problem 16/sol1.py @@ -0,0 +1,15 @@ +power = int(input("Enter the power of 2: ")) +num = 2**power + +string_num = str(num) + +list_num = list(string_num) + +sum_of_num = 0 + +print("2 ^",power,"=",num) + +for i in list_num: + sum_of_num += int(i) + +print("Sum of the digits are:",sum_of_num) diff --git a/Project Euler/Problem 20/sol1.py b/Project Euler/Problem 20/sol1.py new file mode 100644 index 000000000000..73e41d5cc8fa --- /dev/null +++ b/Project Euler/Problem 20/sol1.py @@ -0,0 +1,27 @@ +# Finding the factorial. +def factorial(n): + fact = 1 + for i in range(1,n+1): + fact *= i + return fact + +# Spliting the digits and adding it. +def split_and_add(number): + sum_of_digits = 0 + while(number>0): + last_digit = number % 10 + sum_of_digits += last_digit + number = int(number/10) # Removing the last_digit from the given number. + return sum_of_digits + +# Taking the user input. +number = int(input("Enter the Number: ")) + +# Assigning the factorial from the factorial function. +factorial = factorial(number) + +# Spliting and adding the factorial into answer. +answer = split_and_add(factorial) + +# Printing the answer. +print(answer) diff --git a/Project Euler/Problem 29/solution.py b/Project Euler/Problem 29/solution.py new file mode 100644 index 000000000000..9d6148da3d87 --- /dev/null +++ b/Project Euler/Problem 29/solution.py @@ -0,0 +1,34 @@ +def main(): + """ + Consider all integer combinations of ab for 2 <= a <= 5 and 2 <= b <= 5: + + 22=4, 23=8, 24=16, 25=32 + 32=9, 33=27, 34=81, 35=243 + 42=16, 43=64, 44=256, 45=1024 + 52=25, 53=125, 54=625, 55=3125 + If they are then placed in numerical order, with any repeats removed, we get the following sequence of 15 distinct terms: + + 4, 8, 9, 16, 25, 27, 32, 64, 81, 125, 243, 256, 625, 1024, 3125 + + How many distinct terms are in the sequence generated by ab for 2 <= a <= 100 and 2 <= b <= 100? + """ + + collectPowers = set() + + currentPow = 0 + + N = 101 # maximum limit + + for a in range(2,N): + + for b in range (2,N): + + currentPow = a**b # calculates the current power + collectPowers.add(currentPow) # adds the result to the set + + + print "Number of terms ", len(collectPowers) + + +if __name__ == '__main__': + main() diff --git a/Project Euler/Problem 9/sol1.py b/Project Euler/Problem 9/sol1.py new file mode 100644 index 000000000000..e54c543b4721 --- /dev/null +++ b/Project Euler/Problem 9/sol1.py @@ -0,0 +1,15 @@ +from __future__ import print_function +# Program to find the product of a,b,c which are Pythagorean Triplet that satisfice the following: +# 1. a < b < c +# 2. a**2 + b**2 = c**2 +# 3. a + b + c = 1000 + +print("Please Wait...") +for a in range(300): + for b in range(400): + for c in range(500): + if(a < b < c): + if((a**2) + (b**2) == (c**2)): + if((a+b+c) == 1000): + print(("Product of",a,"*",b,"*",c,"=",(a*b*c))) + break diff --git a/Project Euler/README.md b/Project Euler/README.md new file mode 100644 index 000000000000..9f77f719f0f1 --- /dev/null +++ b/Project Euler/README.md @@ -0,0 +1,58 @@ +# ProjectEuler + +Problems are taken from https://projecteuler.net/. + +Project Euler is a series of challenging mathematical/computer programming problems that will require more than just mathematical +insights to solve. Project Euler is ideal for mathematicians who are learning to code. + +Here the efficiency of your code is also checked. +I've tried to provide all the best possible solutions. + +PROBLEMS: + +1. If we list all the natural numbers below 10 that are multiples of 3 or 5, we get 3,5,6 and 9. The sum of these multiples is 23. + Find the sum of all the multiples of 3 or 5 below N. + +2. Each new term in the Fibonacci sequence is generated by adding the previous two terms. By starting with 1 and 2, + the first 10 terms will be: + 1,2,3,5,8,13,21,34,55,89,.. + By considering the terms in the Fibonacci sequence whose values do not exceed n, find the sum of the even-valued terms. + e.g. for n=10, we have {2,8}, sum is 10. + +3. The prime factors of 13195 are 5,7,13 and 29. What is the largest prime factor of a given number N? + e.g. for 10, largest prime factor = 5. For 17, largest prime factor = 17. + +4. A palindromic number reads the same both ways. The largest palindrome made from the product of two 2-digit numbers is 9009 = 91 × 99. + Find the largest palindrome made from the product of two 3-digit numbers which is less than N. + +5. 2520 is the smallest number that can be divided by each of the numbers from 1 to 10 without any remainder. + What is the smallest positive number that is evenly divisible(divisible with no remainder) by all of the numbers from 1 to N? + +6. The sum of the squares of the first ten natural numbers is, + 1^2 + 2^2 + ... + 10^2 = 385 + The square of the sum of the first ten natural numbers is, + (1 + 2 + ... + 10)^2 = 552 = 3025 + Hence the difference between the sum of the squares of the first ten natural numbers and the square of the sum is 3025 − 385 = 2640. + Find the difference between the sum of the squares of the first N natural numbers and the square of the sum. + +7. By listing the first six prime numbers: 2, 3, 5, 7, 11, and 13, we can see that the 6th prime is 13. + What is the Nth prime number? + +9. A Pythagorean triplet is a set of three natural numbers, a < b < c, for which, + a^2 + b^2 = c^2 + There exists exactly one Pythagorean triplet for which a + b + c = 1000. + Find the product abc. + +14. The following iterative sequence is defined for the set of positive integers: + n → n/2 (n is even) + n → 3n + 1 (n is odd) + Using the rule above and starting with 13, we generate the following sequence: + 13 → 40 → 20 → 10 → 5 → 16 → 8 → 4 → 2 → 1 + Which starting number, under one million, produces the longest chain? + +16. 2^15 = 32768 and the sum of its digits is 3 + 2 + 7 + 6 + 8 = 26. + What is the sum of the digits of the number 2^1000? +20. n! means n × (n − 1) × ... × 3 × 2 × 1 + For example, 10! = 10 × 9 × ... × 3 × 2 × 1 = 3628800, + and the sum of the digits in the number 10! is 3 + 6 + 2 + 8 + 8 + 0 + 0 = 27. + Find the sum of the digits in the number 100! diff --git a/README.md b/README.md index 1d8d3386ed91..70077e98fd5a 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# The Algorithms - Python [![Build Status](https://travis-ci.org/TheAlgorithms/Python.svg)](https://travis-ci.org/TheAlgorithms/Python) +# The Algorithms - Python ### All algorithms implemented in Python (for education) @@ -74,7 +74,7 @@ __Properties__ ### Shell ![alt text][shell-image] -From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywherem considereing every nth element gives a sorted list. Such a list is said to be h-sorted. Equivanelty, it can be thought of as h intterleaved lists, each individually sorted. +From [Wikipedia][shell-wiki]: Shellsort is a generalization of insertion sort that allows the exchange of items that are far apart. The idea is to arrange the list of elements so that, starting anywhere, considering every nth element gives a sorted list. Such a list is said to be h-sorted. Equivalently, it can be thought of as h interleaved lists, each individually sorted. __Properties__ * Worst case performance O(nlog2 2n) @@ -83,7 +83,7 @@ __Properties__ ###### View the algorithm in [action][shell-toptal] -###Time-Compexity Graphs +### Time-Compexity Graphs Comparing the complexity of sorting algorithms (Bubble Sort, Insertion Sort, Selection Sort) @@ -128,6 +128,13 @@ The method is named after **Julius Caesar**, who used it in his private correspo The encryption step performed by a Caesar cipher is often incorporated as part of more complex schemes, such as the Vigenère cipher, and still has modern application in the ROT13 system. As with all single-alphabet substitution ciphers, the Caesar cipher is easily broken and in modern practice offers essentially no communication security. ###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Caesar_cipher) +### Vigenère +The **Vigenère cipher** is a method of encrypting alphabetic text by using a series of **interwoven Caesar ciphers** based on the letters of a keyword. It is **a form of polyalphabetic substitution**.
+The Vigenère cipher has been reinvented many times. The method was originally described by Giovan Battista Bellaso in his 1553 book La cifra del. Sig. Giovan Battista Bellaso; however, the scheme was later misattributed to Blaise de Vigenère in the 19th century, and is now widely known as the "Vigenère cipher".
+Though the cipher is easy to understand and implement, for three centuries it resisted all attempts to break it; this earned it the description **le chiffre indéchiffrable**(French for 'the indecipherable cipher'). +Many people have tried to implement encryption schemes that are essentially Vigenère ciphers. Friedrich Kasiski was the first to publish a general method of deciphering a Vigenère cipher in 1863. +###### Source: [Wikipedia](https://en.wikipedia.org/wiki/Vigen%C3%A8re_cipher) + ### Transposition In cryptography, a **transposition cipher** is a method of encryption by which the positions held by units of plaintext (which are commonly characters or groups of characters) are shifted according to a regular system, so that the ciphertext constitutes a permutation of the plaintext. That is, the order of the units is changed (the plaintext is reordered).
Mathematically a bijective function is used on the characters' positions to encrypt and an inverse function to decrypt. diff --git a/ciphers/XOR_cipher.py b/ciphers/XOR_cipher.py new file mode 100644 index 000000000000..727fac3b0703 --- /dev/null +++ b/ciphers/XOR_cipher.py @@ -0,0 +1,209 @@ +""" + author: Christian Bender + date: 21.12.2017 + class: XORCipher + + This class implements the XOR-cipher algorithm and provides + some useful methods for encrypting and decrypting strings and + files. + + Overview about methods + + - encrypt : list of char + - decrypt : list of char + - encrypt_string : str + - decrypt_string : str + - encrypt_file : boolean + - decrypt_file : boolean +""" +class XORCipher(object): + + def __init__(self, key = 0): + """ + simple constructor that receives a key or uses + default key = 0 + """ + + #private field + self.__key = key + + def encrypt(self, content, key): + """ + input: 'content' of type string and 'key' of type int + output: encrypted string 'content' as a list of chars + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + # precondition + assert (isinstance(key,int) and isinstance(content,str)) + + key = key or self.__key or 1 + + # make sure key can be any size + while (key > 255): + key -= 255 + + # This will be returned + ans = [] + + for ch in content: + ans.append(chr(ord(ch) ^ key)) + + return ans + + def decrypt(self,content,key): + """ + input: 'content' of type list and 'key' of type int + output: decrypted string 'content' as a list of chars + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + # precondition + assert (isinstance(key,int) and isinstance(content,list)) + + key = key or self.__key or 1 + + # make sure key can be any size + while (key > 255): + key -= 255 + + # This will be returned + ans = [] + + for ch in content: + ans.append(chr(ord(ch) ^ key)) + + return ans + + + def encrypt_string(self,content, key = 0): + """ + input: 'content' of type string and 'key' of type int + output: encrypted string 'content' + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + # precondition + assert (isinstance(key,int) and isinstance(content,str)) + + key = key or self.__key or 1 + + # make sure key can be any size + while (key > 255): + key -= 255 + + # This will be returned + ans = "" + + for ch in content: + ans += chr(ord(ch) ^ key) + + return ans + + def decrypt_string(self,content,key = 0): + """ + input: 'content' of type string and 'key' of type int + output: decrypted string 'content' + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + # precondition + assert (isinstance(key,int) and isinstance(content,str)) + + key = key or self.__key or 1 + + # make sure key can be any size + while (key > 255): + key -= 255 + + # This will be returned + ans = "" + + for ch in content: + ans += chr(ord(ch) ^ key) + + return ans + + + def encrypt_file(self, file, key = 0): + """ + input: filename (str) and a key (int) + output: returns true if encrypt process was + successful otherwise false + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + #precondition + assert (isinstance(file,str) and isinstance(key,int)) + + try: + with open(file,"r") as fin: + with open("encrypt.out","w+") as fout: + + # actual encrypt-process + for line in fin: + fout.write(self.encrypt_string(line,key)) + + except: + return False + + return True + + + def decrypt_file(self,file, key): + """ + input: filename (str) and a key (int) + output: returns true if decrypt process was + successful otherwise false + if key not passed the method uses the key by the constructor. + otherwise key = 1 + """ + + #precondition + assert (isinstance(file,str) and isinstance(key,int)) + + try: + with open(file,"r") as fin: + with open("decrypt.out","w+") as fout: + + # actual encrypt-process + for line in fin: + fout.write(self.decrypt_string(line,key)) + + except: + return False + + return True + + + + +# Tests +# crypt = XORCipher() +# key = 67 + +# # test enrcypt +# print crypt.encrypt("hallo welt",key) +# # test decrypt +# print crypt.decrypt(crypt.encrypt("hallo welt",key), key) + +# # test encrypt_string +# print crypt.encrypt_string("hallo welt",key) + +# # test decrypt_string +# print crypt.decrypt_string(crypt.encrypt_string("hallo welt",key),key) + +# if (crypt.encrypt_file("test.txt",key)): +# print "encrypt successful" +# else: +# print "encrypt unsuccessful" + +# if (crypt.decrypt_file("encrypt.out",key)): +# print "decrypt successful" +# else: +# print "decrypt unsuccessful" \ No newline at end of file diff --git a/ciphers/affine_cipher.py b/ciphers/affine_cipher.py index b74ec6f4ed60..6c1ba06f6850 100644 --- a/ciphers/affine_cipher.py +++ b/ciphers/affine_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys, random, cryptomath_module as cryptoMath SYMBOLS = """ !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~""" diff --git a/ciphers/brute-force_caesar_cipher.py b/ciphers/brute-force_caesar_cipher.py index 3e6e975c8297..3b0716442fc5 100644 --- a/ciphers/brute-force_caesar_cipher.py +++ b/ciphers/brute-force_caesar_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function def decrypt(message): """ >>> decrypt('TMDETUX PMDVU') diff --git a/ciphers/caesar_cipher.py b/ciphers/caesar_cipher.py index b590f81f901b..a53dc5857fca 100644 --- a/ciphers/caesar_cipher.py +++ b/ciphers/caesar_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function # The Caesar Cipher Algorithm def main(): @@ -12,9 +13,9 @@ def main(): translated = encdec(message, key, mode) if mode == "encrypt": - print("Encryption:", translated) + print(("Encryption:", translated)) elif mode == "decrypt": - print("Decryption:", translated) + print(("Decryption:", translated)) def encdec(message, key, mode): message = message.upper() diff --git a/ciphers/playfair_cipher.py b/ciphers/playfair_cipher.py new file mode 100644 index 000000000000..20449b161963 --- /dev/null +++ b/ciphers/playfair_cipher.py @@ -0,0 +1,102 @@ +import string +import itertools + +def chunker(seq, size): + it = iter(seq) + while True: + chunk = tuple(itertools.islice(it, size)) + if not chunk: + return + yield chunk + + + +def prepare_input(dirty): + """ + Prepare the plaintext by up-casing it + and separating repeated letters with X's + """ + + dirty = ''.join([c.upper() for c in dirty if c in string.ascii_letters]) + clean = "" + + if len(dirty) < 2: + return dirty + + for i in range(len(dirty)-1): + clean += dirty[i] + + if dirty[i] == dirty[i+1]: + clean += 'X' + + clean += dirty[-1] + + if len(clean) & 1: + clean += 'X' + + return clean + +def generate_table(key): + + # I and J are used interchangeably to allow + # us to use a 5x5 table (25 letters) + alphabet = "ABCDEFGHIKLMNOPQRSTUVWXYZ" + # we're using a list instead of a '2d' array because it makes the math + # for setting up the table and doing the actual encoding/decoding simpler + table = [] + + # copy key chars into the table if they are in `alphabet` ignoring duplicates + for char in key.upper(): + if char not in table and char in alphabet: + table.append(char) + + # fill the rest of the table in with the remaining alphabet chars + for char in alphabet: + if char not in table: + table.append(char) + + return table + +def encode(plaintext, key): + table = generate_table(key) + plaintext = prepare_input(plaintext) + ciphertext = "" + + # https://en.wikipedia.org/wiki/Playfair_cipher#Description + for char1, char2 in chunker(plaintext, 2): + row1, col1 = divmod(table.index(char1), 5) + row2, col2 = divmod(table.index(char2), 5) + + if row1 == row2: + ciphertext += table[row1*5+(col1+1)%5] + ciphertext += table[row2*5+(col2+1)%5] + elif col1 == col2: + ciphertext += table[((row1+1)%5)*5+col1] + ciphertext += table[((row2+1)%5)*5+col2] + else: # rectangle + ciphertext += table[row1*5+col2] + ciphertext += table[row2*5+col1] + + return ciphertext + + +def decode(ciphertext, key): + table = generate_table(key) + plaintext = "" + + # https://en.wikipedia.org/wiki/Playfair_cipher#Description + for char1, char2 in chunker(ciphertext, 2): + row1, col1 = divmod(table.index(char1), 5) + row2, col2 = divmod(table.index(char2), 5) + + if row1 == row2: + plaintext += table[row1*5+(col1-1)%5] + plaintext += table[row2*5+(col2-1)%5] + elif col1 == col2: + plaintext += table[((row1-1)%5)*5+col1] + plaintext += table[((row2-1)%5)*5+col2] + else: # rectangle + plaintext += table[row1*5+col2] + plaintext += table[row2*5+col1] + + return plaintext diff --git a/ciphers/rabin_miller.py b/ciphers/rabin_miller.py index d6e5cbe8f19f..f71fb03c0051 100644 --- a/ciphers/rabin_miller.py +++ b/ciphers/rabin_miller.py @@ -1,3 +1,4 @@ +from __future__ import print_function # Primality Testing with the Rabin-Miller Algorithm import random @@ -59,5 +60,5 @@ def generateLargePrime(keysize = 1024): if __name__ == '__main__': num = generateLargePrime() - print('Prime number:', num) - print('isPrime:', isPrime(num)) + print(('Prime number:', num)) + print(('isPrime:', isPrime(num))) diff --git a/ciphers/rot13.py b/ciphers/rot13.py new file mode 100644 index 000000000000..2abf981e9d7d --- /dev/null +++ b/ciphers/rot13.py @@ -0,0 +1,25 @@ +from __future__ import print_function +def dencrypt(s, n): + out = '' + for c in s: + if c >= 'A' and c <= 'Z': + out += chr(ord('A') + (ord(c) - ord('A') + n) % 26) + elif c >= 'a' and c <= 'z': + out += chr(ord('a') + (ord(c) - ord('a') + n) % 26) + else: + out += c + return out + + +def main(): + s0 = 'HELLO' + + s1 = dencrypt(s0, 13) + print(s1) # URYYB + + s2 = dencrypt(s1, 13) + print(s2) # HELLO + + +if __name__ == '__main__': + main() diff --git a/ciphers/rsa_cipher.py b/ciphers/rsa_cipher.py index 7e2dc5fd1228..94f69ddc2533 100644 --- a/ciphers/rsa_cipher.py +++ b/ciphers/rsa_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys, rsa_key_generator as rkg, os DEFAULT_BLOCK_SIZE = 128 diff --git a/ciphers/rsa_key_generator.py b/ciphers/rsa_key_generator.py index 7cd7163b68d5..541e90d6e884 100644 --- a/ciphers/rsa_key_generator.py +++ b/ciphers/rsa_key_generator.py @@ -1,3 +1,4 @@ +from __future__ import print_function import random, sys, os import rabin_miller as rabinMiller, cryptomath_module as cryptoMath diff --git a/ciphers/simple_substitution_cipher.py b/ciphers/simple_substitution_cipher.py index 41ac4a6a7b98..1bdd7dc04a57 100644 --- a/ciphers/simple_substitution_cipher.py +++ b/ciphers/simple_substitution_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys, random LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' diff --git a/ciphers/transposition_cipher.py b/ciphers/transposition_cipher.py index 1c2ed0aa0452..dbb358315d22 100644 --- a/ciphers/transposition_cipher.py +++ b/ciphers/transposition_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function import math def main(): diff --git a/ciphers/transposition_cipher_encrypt-decrypt_file.py b/ciphers/transposition_cipher_encrypt-decrypt_file.py index f2252de8a2e2..57620d83948c 100644 --- a/ciphers/transposition_cipher_encrypt-decrypt_file.py +++ b/ciphers/transposition_cipher_encrypt-decrypt_file.py @@ -1,3 +1,4 @@ +from __future__ import print_function import time, os, sys import transposition_cipher as transCipher @@ -29,7 +30,7 @@ def main(): outputObj.close() totalTime = round(time.time() - startTime, 2) - print('Done (', totalTime, 'seconds )') + print(('Done (', totalTime, 'seconds )')) if __name__ == '__main__': main() diff --git a/ciphers/vigenere_cipher.py b/ciphers/vigenere_cipher.py index 95eeb431109f..5d5be0792835 100644 --- a/ciphers/vigenere_cipher.py +++ b/ciphers/vigenere_cipher.py @@ -1,3 +1,4 @@ +from __future__ import print_function LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' def main(): diff --git a/data_structures/AVL/AVL.py b/data_structures/AVL/AVL.py new file mode 100644 index 000000000000..d01e8f825368 --- /dev/null +++ b/data_structures/AVL/AVL.py @@ -0,0 +1,181 @@ +""" +An AVL tree +""" +from __future__ import print_function + + +class Node: + + def __init__(self, label): + self.label = label + self._parent = None + self._left = None + self._right = None + self.height = 0 + + @property + def right(self): + return self._right + + @right.setter + def right(self, node): + if node is not None: + node._parent = self + self._right = node + + @property + def left(self): + return self._left + + @left.setter + def left(self, node): + if node is not None: + node._parent = self + self._left = node + + @property + def parent(self): + return self._parent + + @parent.setter + def parent(self, node): + if node is not None: + self._parent = node + self.height = self.parent.height + 1 + else: + self.height = 0 + + +class AVL: + + def __init__(self): + self.root = None + self.size = 0 + + def insert(self, value): + node = Node(value) + + if self.root is None: + self.root = node + self.root.height = 0 + self.size = 1 + else: + # Same as Binary Tree + dad_node = None + curr_node = self.root + + while True: + if curr_node is not None: + + dad_node = curr_node + + if node.label < curr_node.label: + curr_node = curr_node.left + else: + curr_node = curr_node.right + else: + node.height = dad_node.height + dad_node.height += 1 + if node.label < dad_node.label: + dad_node.left = node + else: + dad_node.right = node + self.rebalance(node) + self.size += 1 + break + + def rebalance(self, node): + n = node + + while n is not None: + height_right = n.height + height_left = n.height + + if n.right is not None: + height_right = n.right.height + + if n.left is not None: + height_left = n.left.height + + if abs(height_left - height_right) > 1: + if height_left > height_right: + left_child = n.left + if left_child is not None: + h_right = (left_child.right.height + if (left_child.right is not None) else 0) + h_left = (left_child.left.height + if (left_child.left is not None) else 0) + if (h_left > h_right): + self.rotate_left(n) + break + else: + self.double_rotate_right(n) + break + else: + right_child = n.right + if right_child is not None: + h_right = (right_child.right.height + if (right_child.right is not None) else 0) + h_left = (right_child.left.height + if (right_child.left is not None) else 0) + if (h_left > h_right): + self.double_rotate_left(n) + break + else: + self.rotate_right(n) + break + n = n.parent + + def rotate_left(self, node): + aux = node.parent.label + node.parent.label = node.label + node.parent.right = Node(aux) + node.parent.right.height = node.parent.height + 1 + node.parent.left = node.right + + + def rotate_right(self, node): + aux = node.parent.label + node.parent.label = node.label + node.parent.left = Node(aux) + node.parent.left.height = node.parent.height + 1 + node.parent.right = node.right + + def double_rotate_left(self, node): + self.rotate_right(node.getRight().getRight()) + self.rotate_left(node) + + def double_rotate_right(self, node): + self.rotate_left(node.getLeft().getLeft()) + self.rotate_right(node) + + def empty(self): + if self.root is None: + return True + return False + + def preShow(self, curr_node): + if curr_node is not None: + self.preShow(curr_node.left) + print(curr_node.label, end=" ") + self.preShow(curr_node.right) + + def preorder(self, curr_node): + if curr_node is not None: + self.preShow(curr_node.left) + self.preShow(curr_node.right) + print(curr_node.label, end=" ") + + def getRoot(self): + return self.root + +t = AVL() +t.insert(1) +t.insert(2) +t.insert(3) +# t.preShow(t.root) +# print("\n") +# t.insert(4) +# t.insert(5) +# t.preShow(t.root) +# t.preorden(t.root) diff --git a/data_structures/Arrays b/data_structures/Arrays new file mode 100644 index 000000000000..e2c1243f5f96 --- /dev/null +++ b/data_structures/Arrays @@ -0,0 +1 @@ +Arrays implementation using python programming. diff --git a/data_structures/Binary Tree/FenwickTree.py b/data_structures/Binary Tree/FenwickTree.py new file mode 100644 index 000000000000..f429161c8c36 --- /dev/null +++ b/data_structures/Binary Tree/FenwickTree.py @@ -0,0 +1,29 @@ +from __future__ import print_function +class FenwickTree: + + def __init__(self, SIZE): # create fenwick tree with size SIZE + self.Size = SIZE + self.ft = [0 for i in range (0,SIZE)] + + def update(self, i, val): # update data (adding) in index i in O(lg N) + while (i < self.Size): + self.ft[i] += val + i += i & (-i) + + def query(self, i): # query cumulative data from index 0 to i in O(lg N) + ret = 0 + while (i > 0): + ret += self.ft[i] + i -= i & (-i) + return ret + +if __name__ == '__main__': + f = FenwickTree(100) + f.update(1,20) + f.update(4,4) + print (f.query(1)) + print (f.query(3)) + print (f.query(4)) + f.update(2,-5) + print (f.query(1)) + print (f.query(3)) diff --git a/data_structures/Binary Tree/LazySegmentTree.py b/data_structures/Binary Tree/LazySegmentTree.py new file mode 100644 index 000000000000..9b14b24e81fa --- /dev/null +++ b/data_structures/Binary Tree/LazySegmentTree.py @@ -0,0 +1,91 @@ +from __future__ import print_function +import math + +class SegmentTree: + + def __init__(self, N): + self.N = N + self.st = [0 for i in range(0,4*N)] # approximate the overall size of segment tree with array N + self.lazy = [0 for i in range(0,4*N)] # create array to store lazy update + self.flag = [0 for i in range(0,4*N)] # flag for lazy update + + def left(self, idx): + return idx*2 + + def right(self, idx): + return idx*2 + 1 + + def build(self, idx, l, r, A): + if l==r: + self.st[idx] = A[l-1] + else : + mid = (l+r)//2 + self.build(self.left(idx),l,mid, A) + self.build(self.right(idx),mid+1,r, A) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + + # update with O(lg N) (Normal segment tree without lazy update will take O(Nlg N) for each update) + def update(self, idx, l, r, a, b, val): # update(1, 1, N, a, b, v) for update val v to [a,b] + if self.flag[idx] == True: + self.st[idx] = self.lazy[idx] + self.flag[idx] = False + if l!=r: + self.lazy[self.left(idx)] = self.lazy[idx] + self.lazy[self.right(idx)] = self.lazy[idx] + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + + if r < a or l > b: + return True + if l >= a and r <= b : + self.st[idx] = val + if l!=r: + self.lazy[self.left(idx)] = val + self.lazy[self.right(idx)] = val + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + return True + mid = (l+r)//2 + self.update(self.left(idx),l,mid,a,b,val) + self.update(self.right(idx),mid+1,r,a,b,val) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + return True + + # query with O(lg N) + def query(self, idx, l, r, a, b): #query(1, 1, N, a, b) for query max of [a,b] + if self.flag[idx] == True: + self.st[idx] = self.lazy[idx] + self.flag[idx] = False + if l != r: + self.lazy[self.left(idx)] = self.lazy[idx] + self.lazy[self.right(idx)] = self.lazy[idx] + self.flag[self.left(idx)] = True + self.flag[self.right(idx)] = True + if r < a or l > b: + return -math.inf + if l >= a and r <= b: + return self.st[idx] + mid = (l+r)//2 + q1 = self.query(self.left(idx),l,mid,a,b) + q2 = self.query(self.right(idx),mid+1,r,a,b) + return max(q1,q2) + + def showData(self): + showList = [] + for i in range(1,N+1): + showList += [self.query(1, 1, self.N, i, i)] + print (showList) + + +if __name__ == '__main__': + A = [1,2,-4,7,3,-5,6,11,-20,9,14,15,5,2,-8] + N = 15 + segt = SegmentTree(N) + segt.build(1,1,N,A) + print (segt.query(1,1,N,4,6)) + print (segt.query(1,1,N,7,11)) + print (segt.query(1,1,N,7,12)) + segt.update(1,1,N,1,3,111) + print (segt.query(1,1,N,1,15)) + segt.update(1,1,N,7,8,235) + segt.showData() diff --git a/data_structures/Binary Tree/SegmentTree.py b/data_structures/Binary Tree/SegmentTree.py new file mode 100644 index 000000000000..a3b128c9d8b9 --- /dev/null +++ b/data_structures/Binary Tree/SegmentTree.py @@ -0,0 +1,65 @@ +from __future__ import print_function +import math + +class SegmentTree: + + def __init__(self, N): + self.N = N + self.st = [0 for i in range(0,4*N)] # approximate the overall size of segment tree with array N + + def left(self, idx): + return idx*2 + + def right(self, idx): + return idx*2 + 1 + + def build(self, idx, l, r, A): + if l==r: + self.st[idx] = A[l-1] + else : + mid = (l+r)//2 + self.build(self.left(idx),l,mid, A) + self.build(self.right(idx),mid+1,r, A) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + + def update(self, idx, l, r, a, b, val): # update(1, 1, N, a, b, v) for update val v to [a,b] + if r < a or l > b: + return True + if l == r : + self.st[idx] = val + return True + mid = (l+r)//2 + self.update(self.left(idx),l,mid,a,b,val) + self.update(self.right(idx),mid+1,r,a,b,val) + self.st[idx] = max(self.st[self.left(idx)] , self.st[self.right(idx)]) + return True + + def query(self, idx, l, r, a, b): #query(1, 1, N, a, b) for query max of [a,b] + if r < a or l > b: + return -math.inf + if l >= a and r <= b: + return self.st[idx] + mid = (l+r)//2 + q1 = self.query(self.left(idx),l,mid,a,b) + q2 = self.query(self.right(idx),mid+1,r,a,b) + return max(q1,q2) + + def showData(self): + showList = [] + for i in range(1,N+1): + showList += [self.query(1, 1, self.N, i, i)] + print (showList) + + +if __name__ == '__main__': + A = [1,2,-4,7,3,-5,6,11,-20,9,14,15,5,2,-8] + N = 15 + segt = SegmentTree(N) + segt.build(1,1,N,A) + print (segt.query(1,1,N,4,6)) + print (segt.query(1,1,N,7,11)) + print (segt.query(1,1,N,7,12)) + segt.update(1,1,N,1,3,111) + print (segt.query(1,1,N,1,15)) + segt.update(1,1,N,7,8,235) + segt.showData() diff --git a/data_structures/Binary Tree/binary_search_tree.py b/data_structures/Binary Tree/binary_search_tree.py new file mode 100644 index 000000000000..b4021e4f861f --- /dev/null +++ b/data_structures/Binary Tree/binary_search_tree.py @@ -0,0 +1,258 @@ +''' +A binary search Tree +''' +from __future__ import print_function +class Node: + + def __init__(self, label, parent): + self.label = label + self.left = None + self.right = None + #Added in order to delete a node easier + self.parent = parent + + def getLabel(self): + return self.label + + def setLabel(self, label): + self.label = label + + def getLeft(self): + return self.left + + def setLeft(self, left): + self.left = left + + def getRight(self): + return self.right + + def setRight(self, right): + self.right = right + + def getParent(self): + return self.parent + + def setParent(self, parent): + self.parent = parent + +class BinarySearchTree: + + def __init__(self): + self.root = None + + def insert(self, label): + # Create a new Node + new_node = Node(label, None) + # If Tree is empty + if self.empty(): + self.root = new_node + else: + #If Tree is not empty + curr_node = self.root + #While we don't get to a leaf + while curr_node is not None: + #We keep reference of the parent node + parent_node = curr_node + #If node label is less than current node + if new_node.getLabel() < curr_node.getLabel(): + #We go left + curr_node = curr_node.getLeft() + else: + #Else we go right + curr_node = curr_node.getRight() + #We insert the new node in a leaf + if new_node.getLabel() < parent_node.getLabel(): + parent_node.setLeft(new_node) + else: + parent_node.setRight(new_node) + #Set parent to the new node + new_node.setParent(parent_node) + + def delete(self, label): + if (not self.empty()): + #Look for the node with that label + node = self.getNode(label) + #If the node exists + if(node is not None): + #If it has no children + if(node.getLeft() is None and node.getRight() is None): + self.__reassignNodes(node, None) + node = None + #Has only right children + elif(node.getLeft() is None and node.getRight() is not None): + self.__reassignNodes(node, node.getRight()) + #Has only left children + elif(node.getLeft() is not None and node.getRight() is None): + self.__reassignNodes(node, node.getLeft()) + #Has two children + else: + #Gets the max value of the left branch + tmpNode = self.getMax(node.getLeft()) + #Deletes the tmpNode + self.delete(tmpNode.getLabel()) + #Assigns the value to the node to delete and keesp tree structure + node.setLabel(tmpNode.getLabel()) + + def getNode(self, label): + curr_node = None + #If the tree is not empty + if(not self.empty()): + #Get tree root + curr_node = self.getRoot() + #While we don't find the node we look for + #I am using lazy evaluation here to avoid NoneType Attribute error + while curr_node is not None and curr_node.getLabel() is not label: + #If node label is less than current node + if label < curr_node.getLabel(): + #We go left + curr_node = curr_node.getLeft() + else: + #Else we go right + curr_node = curr_node.getRight() + return curr_node + + def getMax(self, root = None): + if(root is not None): + curr_node = root + else: + #We go deep on the right branch + curr_node = self.getRoot() + if(not self.empty()): + while(curr_node.getRight() is not None): + curr_node = curr_node.getRight() + return curr_node + + def getMin(self, root = None): + if(root is not None): + curr_node = root + else: + #We go deep on the left branch + curr_node = self.getRoot() + if(not self.empty()): + curr_node = self.getRoot() + while(curr_node.getLeft() is not None): + curr_node = curr_node.getLeft() + return curr_node + + def empty(self): + if self.root is None: + return True + return False + + def __InOrderTraversal(self, curr_node): + nodeList = [] + if curr_node is not None: + nodeList.insert(0, curr_node) + nodeList = nodeList + self.__InOrderTraversal(curr_node.getLeft()) + nodeList = nodeList + self.__InOrderTraversal(curr_node.getRight()) + return nodeList + + def getRoot(self): + return self.root + + def __isRightChildren(self, node): + if(node == node.getParent().getRight()): + return True + return False + + def __reassignNodes(self, node, newChildren): + if(newChildren is not None): + newChildren.setParent(node.getParent()) + if(node.getParent() is not None): + #If it is the Right Children + if(self.__isRightChildren(node)): + node.getParent().setRight(newChildren) + else: + #Else it is the left children + node.getParent().setLeft(newChildren) + + #This function traversal the tree. By default it returns an + #In order traversal list. You can pass a function to traversal + #The tree as needed by client code + def traversalTree(self, traversalFunction = None, root = None): + if(traversalFunction is None): + #Returns a list of nodes in preOrder by default + return self.__InOrderTraversal(self.root) + else: + #Returns a list of nodes in the order that the users wants to + return traversalFunction(self.root) + + #Returns an string of all the nodes labels in the list + #In Order Traversal + def __str__(self): + list = self.__InOrderTraversal(self.root) + str = "" + for x in list: + str = str + " " + x.getLabel().__str__() + return str + +def InPreOrder(curr_node): + nodeList = [] + if curr_node is not None: + nodeList = nodeList + InPreOrder(curr_node.getLeft()) + nodeList.insert(0, curr_node.getLabel()) + nodeList = nodeList + InPreOrder(curr_node.getRight()) + return nodeList + +def testBinarySearchTree(): + ''' + Example + 8 + / \ + 3 10 + / \ \ + 1 6 14 + / \ / + 4 7 13 + ''' + + ''' + Example After Deletion + 7 + / \ + 1 4 + + ''' + t = BinarySearchTree() + t.insert(8) + t.insert(3) + t.insert(6) + t.insert(1) + t.insert(10) + t.insert(14) + t.insert(13) + t.insert(4) + t.insert(7) + + #Prints all the elements of the list in order traversal + print(t.__str__()) + + if(t.getNode(6) is not None): + print("The label 6 exists") + else: + print("The label 6 doesn't exist") + + if(t.getNode(-1) is not None): + print("The label -1 exists") + else: + print("The label -1 doesn't exist") + + if(not t.empty()): + print(("Max Value: ", t.getMax().getLabel())) + print(("Min Value: ", t.getMin().getLabel())) + + t.delete(13) + t.delete(10) + t.delete(8) + t.delete(3) + t.delete(6) + t.delete(14) + + #Gets all the elements of the tree In pre order + #And it prints them + list = t.traversalTree(InPreOrder, t.root) + for x in list: + print(x) + +if __name__ == "__main__": + testBinarySearchTree() diff --git a/data_structures/Graph/BellmanFord.py b/data_structures/Graph/BellmanFord.py new file mode 100644 index 000000000000..82db80546b94 --- /dev/null +++ b/data_structures/Graph/BellmanFord.py @@ -0,0 +1,54 @@ +from __future__ import print_function + +def printDist(dist, V): + print("\nVertex Distance") + for i in range(V): + if dist[i] != float('inf') : + print(i,"\t",int(dist[i]),end = "\t") + else: + print(i,"\t","INF",end="\t") + print() + +def BellmanFord(graph, V, E, src): + mdist=[float('inf') for i in range(V)] + mdist[src] = 0.0 + + for i in range(V-1): + for j in range(V): + u = graph[j]["src"] + v = graph[j]["dst"] + w = graph[j]["weight"] + + if mdist[u] != float('inf') and mdist[u] + w < mdist[v]: + mdist[v] = mdist[u] + w + for j in range(V): + u = graph[j]["src"] + v = graph[j]["dst"] + w = graph[j]["weight"] + + if mdist[u] != float('inf') and mdist[u] + w < mdist[v]: + print("Negative cycle found. Solution not possible.") + return + + printDist(mdist, V) + + + +#MAIN +V = int(input("Enter number of vertices: ")) +E = int(input("Enter number of edges: ")) + +graph = [dict() for j in range(E)] + +for i in range(V): + graph[i][i] = 0.0 + +for i in range(E): + print("\nEdge ",i+1) + src = int(input("Enter source:")) + dst = int(input("Enter destination:")) + weight = float(input("Enter weight:")) + graph[i] = {"src": src,"dst": dst, "weight": weight} + +gsrc = int(input("\nEnter shortest path source:")) +BellmanFord(graph, V, E, gsrc) diff --git a/data_structures/Graph/BreadthFirstSearch.py b/data_structures/Graph/BreadthFirstSearch.py new file mode 100644 index 000000000000..02f6af83ff66 --- /dev/null +++ b/data_structures/Graph/BreadthFirstSearch.py @@ -0,0 +1,63 @@ +# Author: OMKAR PATHAK +from __future__ import print_function + + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def BFS(self, startVertex): + # Take a list for stoting already visited vertexes + visited = [False] * len(self.vertex) + + # create a list to store all the vertexes for BFS + queue = [] + + # mark the source node as visited and enqueue it + visited[startVertex] = True + queue.append(startVertex) + + while queue: + startVertex = queue.pop(0) + print(startVertex, end = ' ') + + # mark all adjacent nodes as visited and print them + for i in self.vertex[startVertex]: + if visited[i] == False: + queue.append(i) + visited[i] = True + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('BFS:') + g.BFS(2) + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # BFS: + # 2 0 3 1 diff --git a/data_structures/Graph/DepthFirstSearch.py b/data_structures/Graph/DepthFirstSearch.py new file mode 100644 index 000000000000..0f10a8600099 --- /dev/null +++ b/data_structures/Graph/DepthFirstSearch.py @@ -0,0 +1,63 @@ +# Author: OMKAR PATHAK +from __future__ import print_function + + +class Graph(): + def __init__(self): + self.vertex = {} + + # for printing the Graph vertexes + def printGraph(self): + print(self.vertex) + for i in self.vertex.keys(): + print(i,' -> ', ' -> '.join([str(j) for j in self.vertex[i]])) + + # for adding the edge beween two vertexes + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present, + if fromVertex in self.vertex.keys(): + self.vertex[fromVertex].append(toVertex) + else: + # else make a new vertex + self.vertex[fromVertex] = [toVertex] + + def DFS(self): + # visited array for storing already visited nodes + visited = [False] * len(self.vertex) + + # call the recursive helper function + for i in range(len(self.vertex)): + if visited[i] == False: + self.DFSRec(i, visited) + + def DFSRec(self, startVertex, visited): + # mark start vertex as visited + visited[startVertex] = True + + print(startVertex, end = ' ') + + # Recur for all the vertexes that are adjacent to this node + for i in self.vertex.keys(): + if visited[i] == False: + self.DFSRec(i, visited) + +if __name__ == '__main__': + g = Graph() + g.addEdge(0, 1) + g.addEdge(0, 2) + g.addEdge(1, 2) + g.addEdge(2, 0) + g.addEdge(2, 3) + g.addEdge(3, 3) + + g.printGraph() + print('DFS:') + g.DFS() + + # OUTPUT: + # 0  ->  1 -> 2 + # 1  ->  2 + # 2  ->  0 -> 3 + # 3  ->  3 + # DFS: + # 0 1 2 3 diff --git a/data_structures/Graph/Dijkstra.py b/data_structures/Graph/Dijkstra.py new file mode 100644 index 000000000000..8917171417c4 --- /dev/null +++ b/data_structures/Graph/Dijkstra.py @@ -0,0 +1,57 @@ +from __future__ import print_function + +def printDist(dist, V): + print("\nVertex Distance") + for i in range(V): + if dist[i] != float('inf') : + print(i,"\t",int(dist[i]),end = "\t") + else: + print(i,"\t","INF",end="\t") + print() + +def minDist(mdist, vset, V): + minVal = float('inf') + minInd = -1 + for i in range(V): + if (not vset[i]) and mdist[i] < minVal : + minInd = i + minVal = mdist[i] + return minInd + +def Dijkstra(graph, V, src): + mdist=[float('inf') for i in range(V)] + vset = [False for i in range(V)] + mdist[src] = 0.0; + + for i in range(V-1): + u = minDist(mdist, vset, V) + vset[u] = True + + for v in range(V): + if (not vset[v]) and graph[u][v]!=float('inf') and mdist[u] + graph[u][v] < mdist[v]: + mdist[v] = mdist[u] + graph[u][v] + + + + printDist(mdist, V) + + + +#MAIN +V = int(input("Enter number of vertices: ")) +E = int(input("Enter number of edges: ")) + +graph = [[float('inf') for i in range(V)] for j in range(V)] + +for i in range(V): + graph[i][i] = 0.0 + +for i in range(E): + print("\nEdge ",i+1) + src = int(input("Enter source:")) + dst = int(input("Enter destination:")) + weight = float(input("Enter weight:")) + graph[src][dst] = weight + +gsrc = int(input("\nEnter shortest path source:")) +Dijkstra(graph, V, gsrc) diff --git a/data_structures/Graph/FloydWarshall.py b/data_structures/Graph/FloydWarshall.py new file mode 100644 index 000000000000..fae8b19b351a --- /dev/null +++ b/data_structures/Graph/FloydWarshall.py @@ -0,0 +1,48 @@ +from __future__ import print_function + +def printDist(dist, V): + print("\nThe shortest path matrix using Floyd Warshall algorithm\n") + for i in range(V): + for j in range(V): + if dist[i][j] != float('inf') : + print(int(dist[i][j]),end = "\t") + else: + print("INF",end="\t") + print() + + + +def FloydWarshall(graph, V): + dist=[[float('inf') for i in range(V)] for j in range(V)] + + for i in range(V): + for j in range(V): + dist[i][j] = graph[i][j] + + for k in range(V): + for i in range(V): + for j in range(V): + if dist[i][k]!=float('inf') and dist[k][j]!=float('inf') and dist[i][k]+dist[k][j] < dist[i][j]: + dist[i][j] = dist[i][k] + dist[k][j] + + printDist(dist, V) + + + +#MAIN +V = int(input("Enter number of vertices: ")) +E = int(input("Enter number of edges: ")) + +graph = [[float('inf') for i in range(V)] for j in range(V)] + +for i in range(V): + graph[i][i] = 0.0 + +for i in range(E): + print("\nEdge ",i+1) + src = int(input("Enter source:")) + dst = int(input("Enter destination:")) + weight = float(input("Enter weight:")) + graph[src][dst] = weight + +FloydWarshall(graph, V) diff --git a/data_structures/Graph/Graph.py b/data_structures/Graph/Graph.py new file mode 100644 index 000000000000..d091f713b8d9 --- /dev/null +++ b/data_structures/Graph/Graph.py @@ -0,0 +1,41 @@ +from __future__ import print_function +# Author: OMKAR PATHAK + +# We can use Python's dictionary for constructing the graph + +class AdjacencyList(object): + def __init__(self): + self.List = {} + + def addEdge(self, fromVertex, toVertex): + # check if vertex is already present + if fromVertex in self.List.keys(): + self.List[fromVertex].append(toVertex) + else: + self.List[fromVertex] = [toVertex] + + def printList(self): + for i in self.List: + print((i,'->',' -> '.join([str(j) for j in self.List[i]]))) + +if __name__ == '__main__': + al = AdjacencyList() + al.addEdge(0, 1) + al.addEdge(0, 4) + al.addEdge(4, 1) + al.addEdge(4, 3) + al.addEdge(1, 0) + al.addEdge(1, 4) + al.addEdge(1, 3) + al.addEdge(1, 2) + al.addEdge(2, 3) + al.addEdge(3, 4) + + al.printList() + + # OUTPUT: + # 0 -> 1 -> 4 + # 1 -> 0 -> 4 -> 3 -> 2 + # 2 -> 3 + # 3 -> 4 + # 4 -> 1 -> 3 diff --git a/data_structures/Graph/Graph_list.py b/data_structures/Graph/Graph_list.py new file mode 100644 index 000000000000..d67bc96c4a81 --- /dev/null +++ b/data_structures/Graph/Graph_list.py @@ -0,0 +1,31 @@ +from __future__ import print_function + + +class Graph: + def __init__(self, vertex): + self.vertex = vertex + self.graph = [[0] for i in range(vertex)] + + def add_edge(self, u, v): + self.graph[u - 1].append(v - 1) + + def show(self): + for i in range(self.vertex): + print('%d: '% (i + 1), end=' ') + for j in self.graph[i]: + print('%d-> '% (j + 1), end=' ') + print(' ') + + + +g = Graph(100) + +g.add_edge(1,3) +g.add_edge(2,3) +g.add_edge(3,4) +g.add_edge(3,5) +g.add_edge(4,5) + + +g.show() + diff --git a/data_structures/Graph/Graph_matrix.py b/data_structures/Graph/Graph_matrix.py new file mode 100644 index 000000000000..de25301d6dd1 --- /dev/null +++ b/data_structures/Graph/Graph_matrix.py @@ -0,0 +1,32 @@ +from __future__ import print_function + + +class Graph: + + def __init__(self, vertex): + self.vertex = vertex + self.graph = [[0] * vertex for i in range(vertex) ] + + def add_edge(self, u, v): + self.graph[u - 1][v - 1] = 1 + self.graph[v - 1][u - 1] = 1 + + def show(self): + + for i in self.graph: + for j in i: + print(j, end=' ') + print(' ') + + + + +g = Graph(100) + +g.add_edge(1,4) +g.add_edge(4,2) +g.add_edge(4,5) +g.add_edge(2,5) +g.add_edge(5,3) +g.show() + diff --git a/data_structures/Graph/dijkstra_algorithm.py b/data_structures/Graph/dijkstra_algorithm.py new file mode 100644 index 000000000000..985c7f6c1301 --- /dev/null +++ b/data_structures/Graph/dijkstra_algorithm.py @@ -0,0 +1,212 @@ +# Title: Dijkstra's Algorithm for finding single source shortest path from scratch +# Author: Shubham Malik +# References: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm + +from __future__ import print_function +import math +import sys +# For storing the vertex set to retreive node with the lowest distance + + +class PriorityQueue: + # Based on Min Heap + def __init__(self): + self.cur_size = 0 + self.array = [] + self.pos = {} # To store the pos of node in array + + def isEmpty(self): + return self.cur_size == 0 + + def min_heapify(self, idx): + lc = self.left(idx) + rc = self.right(idx) + if lc < self.cur_size and self.array(lc)[0] < self.array(idx)[0]: + smallest = lc + else: + smallest = idx + if rc < self.cur_size and self.array(rc)[0] < self.array(smallest)[0]: + smallest = rc + if smallest != idx: + self.swap(idx, smallest) + self.min_heapify(smallest) + + def insert(self, tup): + # Inserts a node into the Priority Queue + self.pos[tup[1]] = self.cur_size + self.cur_size += 1 + self.array.append((sys.maxsize, tup[1])) + self.decrease_key((sys.maxsize, tup[1]), tup[0]) + + def extract_min(self): + # Removes and returns the min element at top of priority queue + min_node = self.array[0][1] + self.array[0] = self.array[self.cur_size - 1] + self.cur_size -= 1 + self.min_heapify(1) + del self.pos[min_node] + return min_node + + def left(self, i): + # returns the index of left child + return 2 * i + 1 + + def right(self, i): + # returns the index of right child + return 2 * i + 2 + + def par(self, i): + # returns the index of parent + return math.floor(i / 2) + + def swap(self, i, j): + # swaps array elements at indices i and j + # update the pos{} + self.pos[self.array[i][1]] = j + self.pos[self.array[j][1]] = i + temp = self.array[i] + self.array[i] = self.array[j] + self.array[j] = temp + + def decrease_key(self, tup, new_d): + idx = self.pos[tup[1]] + # assuming the new_d is atmost old_d + self.array[idx] = (new_d, tup[1]) + while idx > 0 and self.array[self.par(idx)][0] > self.array[idx][0]: + self.swap(idx, self.par(idx)) + idx = self.par(idx) + + +class Graph: + def __init__(self, num): + self.adjList = {} # To store graph: u -> (v,w) + self.num_nodes = num # Number of nodes in graph + # To store the distance from source vertex + self.dist = [0] * self.num_nodes + self.par = [-1] * self.num_nodes # To store the path + + def add_edge(self, u, v, w): + # Edge going from node u to v and v to u with weight w + # u (w)-> v, v (w) -> u + # Check if u already in graph + if u in self.adjList.keys(): + self.adjList[u].append((v, w)) + else: + self.adjList[u] = [(v, w)] + + # Assuming undirected graph + if v in self.adjList.keys(): + self.adjList[v].append((u, w)) + else: + self.adjList[v] = [(u, w)] + + def show_graph(self): + # u -> v(w) + for u in self.adjList: + print(u, '->', ' -> '.join(str("{}({})".format(v, w)) + for v, w in self.adjList[u])) + + def dijkstra(self, src): + # Flush old junk values in par[] + self.par = [-1] * self.num_nodes + # src is the source node + self.dist[src] = 0 + Q = PriorityQueue() + Q.insert((0, src)) # (dist from src, node) + for u in self.adjList.keys(): + if u != src: + self.dist[u] = sys.maxsize # Infinity + self.par[u] = -1 + + while not Q.isEmpty(): + u = Q.extract_min() # Returns node with the min dist from source + # Update the distance of all the neighbours of u and + # if their prev dist was INFINITY then push them in Q + for v, w in self.adjList[u]: + new_dist = self.dist[u] + w + if self.dist[v] > new_dist: + if self.dist[v] == sys.maxsize: + Q.insert((new_dist, v)) + else: + Q.decrease_key((self.dist[v], v), new_dist) + self.dist[v] = new_dist + self.par[v] = u + + # Show the shortest distances from src + self.show_distances(src) + + def show_distances(self, src): + print("Distance from node: {}".format(src)) + for u in range(self.num_nodes): + print('Node {} has distance: {}'.format(u, self.dist[u])) + + def show_path(self, src, dest): + # To show the shortest path from src to dest + # WARNING: Use it *after* calling dijkstra + path = [] + cost = 0 + temp = dest + # Backtracking from dest to src + while self.par[temp] != -1: + path.append(temp) + if temp != src: + for v, w in self.adjList[temp]: + if v == self.par[temp]: + cost += w + break + temp = self.par[temp] + path.append(src) + path.reverse() + + print('----Path to reach {} from {}----'.format(dest, src)) + for u in path: + print('{}'.format(u), end=' ') + if u != dest: + print('-> ', end='') + + print('\nTotal cost of path: ', cost) + + +if __name__ == '__main__': + graph = Graph(9) + graph.add_edge(0, 1, 4) + graph.add_edge(0, 7, 8) + graph.add_edge(1, 2, 8) + graph.add_edge(1, 7, 11) + graph.add_edge(2, 3, 7) + graph.add_edge(2, 8, 2) + graph.add_edge(2, 5, 4) + graph.add_edge(3, 4, 9) + graph.add_edge(3, 5, 14) + graph.add_edge(4, 5, 10) + graph.add_edge(5, 6, 2) + graph.add_edge(6, 7, 1) + graph.add_edge(6, 8, 6) + graph.add_edge(7, 8, 7) + graph.show_graph() + graph.dijkstra(0) + graph.show_path(0, 4) + +# OUTPUT +# 0 -> 1(4) -> 7(8) +# 1 -> 0(4) -> 2(8) -> 7(11) +# 7 -> 0(8) -> 1(11) -> 6(1) -> 8(7) +# 2 -> 1(8) -> 3(7) -> 8(2) -> 5(4) +# 3 -> 2(7) -> 4(9) -> 5(14) +# 8 -> 2(2) -> 6(6) -> 7(7) +# 5 -> 2(4) -> 3(14) -> 4(10) -> 6(2) +# 4 -> 3(9) -> 5(10) +# 6 -> 5(2) -> 7(1) -> 8(6) +# Distance from node: 0 +# Node 0 has distance: 0 +# Node 1 has distance: 4 +# Node 2 has distance: 12 +# Node 3 has distance: 19 +# Node 4 has distance: 21 +# Node 5 has distance: 11 +# Node 6 has distance: 9 +# Node 7 has distance: 8 +# Node 8 has distance: 14 +# ----Path to reach 4 from 0---- +# 0 -> 7 -> 6 -> 5 -> 4 +# Total cost of path: 21 diff --git a/data_structures/Graph/even_tree.py b/data_structures/Graph/even_tree.py new file mode 100644 index 000000000000..9383ea9a13c1 --- /dev/null +++ b/data_structures/Graph/even_tree.py @@ -0,0 +1,70 @@ +""" +You are given a tree(a simple connected graph with no cycles). The tree has N +nodes numbered from 1 to N and is rooted at node 1. + +Find the maximum number of edges you can remove from the tree to get a forest +such that each connected component of the forest contains an even number of +nodes. + +Constraints +2 <= 2 <= 100 + +Note: The tree input will be such that it can always be decomposed into +components containing an even number of nodes. +""" +from __future__ import print_function +# pylint: disable=invalid-name +from collections import defaultdict + + +def dfs(start): + """DFS traversal""" + # pylint: disable=redefined-outer-name + ret = 1 + visited[start] = True + for v in tree.get(start): + if v not in visited: + ret += dfs(v) + if ret % 2 == 0: + cuts.append(start) + return ret + + +def even_tree(): + """ + 2 1 + 3 1 + 4 3 + 5 2 + 6 1 + 7 2 + 8 6 + 9 8 + 10 8 + On removing edges (1,3) and (1,6), we can get the desired result 2. + """ + dfs(1) + + +if __name__ == '__main__': + n, m = 10, 9 + tree = defaultdict(list) + visited = {} + cuts = [] + count = 0 + edges = [ + (2, 1), + (3, 1), + (4, 3), + (5, 2), + (6, 1), + (7, 2), + (8, 6), + (9, 8), + (10, 8), + ] + for u, v in edges: + tree[u].append(v) + tree[v].append(u) + even_tree() + print(len(cuts) - 1) diff --git a/data_structures/Heap/heap.py b/data_structures/Heap/heap.py new file mode 100644 index 000000000000..e66d02b6d99f --- /dev/null +++ b/data_structures/Heap/heap.py @@ -0,0 +1,90 @@ +#!/usr/bin/python + +from __future__ import print_function + +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + +class Heap: + def __init__(self): + self.h = [] + self.currsize = 0 + + def leftChild(self,i): + if 2*i+1 < self.currsize: + return 2*i+1 + return None + + def rightChild(self,i): + if 2*i+2 < self.currsize: + return 2*i+2 + return None + + def maxHeapify(self,node): + if node < self.currsize: + m = node + lc = self.leftChild(node) + rc = self.rightChild(node) + if lc is not None and self.h[lc] > self.h[m]: + m = lc + if rc is not None and self.h[rc] > self.h[m]: + m = rc + if m!=node: + temp = self.h[node] + self.h[node] = self.h[m] + self.h[m] = temp + self.maxHeapify(m) + + def buildHeap(self,a): + self.currsize = len(a) + self.h = list(a) + for i in range(self.currsize/2,-1,-1): + self.maxHeapify(i) + + def getMax(self): + if self.currsize >= 1: + me = self.h[0] + temp = self.h[0] + self.h[0] = self.h[self.currsize-1] + self.h[self.currsize-1] = temp + self.currsize -= 1 + self.maxHeapify(0) + return me + return None + + def heapSort(self): + size = self.currsize + while self.currsize-1 >= 0: + temp = self.h[0] + self.h[0] = self.h[self.currsize-1] + self.h[self.currsize-1] = temp + self.currsize -= 1 + self.maxHeapify(0) + self.currsize = size + + def insert(self,data): + self.h.append(data) + curr = self.currsize + self.currsize+=1 + while self.h[curr] > self.h[curr/2]: + temp = self.h[curr/2] + self.h[curr/2] = self.h[curr] + self.h[curr] = temp + curr = curr/2 + + def display(self): + print(self.h) + +def main(): + l = list(map(int, raw_input().split())) + h = Heap() + h.buildHeap(l) + h.heapSort() + h.display() + +if __name__=='__main__': + main() + + diff --git a/data_structures/LinkedList/DoublyLinkedList.py b/data_structures/LinkedList/DoublyLinkedList.py index d152e465b399..6f17b7c81033 100644 --- a/data_structures/LinkedList/DoublyLinkedList.py +++ b/data_structures/LinkedList/DoublyLinkedList.py @@ -3,6 +3,9 @@ - This is an example of a double ended, doubly linked list. - Each link references the next link and the previous one. ''' +from __future__ import print_function + + class LinkedList: def __init__(self): self.head = None @@ -70,4 +73,4 @@ class Link: def __init__(self, x): self.value = x def displayLink(self): - print("{}".format(self.value), end=" ") \ No newline at end of file + print("{}".format(self.value), end=" ") diff --git a/data_structures/LinkedList/singly_LinkedList.py b/data_structures/LinkedList/singly_LinkedList.py new file mode 100644 index 000000000000..7358b3c07f35 --- /dev/null +++ b/data_structures/LinkedList/singly_LinkedList.py @@ -0,0 +1,67 @@ +from __future__ import print_function +class Node:#create a Node + def __int__(self,data): + self.data=data#given data + self.next=None#given next to None +class Linked_List: + + pass + + def insert_tail(Head,data): + if(Head.next is None): + Head.next = Node(data) + else: + insert_tail(Head.next, data) + + def insert_head(Head,data): + tamp = Head + if (tamp == None): + newNod = Node()#create a new Node + newNod.data = data + newNod.next = None + Head = newNod#make new node to Head + else: + newNod = Node() + newNod.data = data + newNod.next = Head#put the Head at NewNode Next + Head=newNod#make a NewNode to Head + return Head + + def printList(Head):#print every node data + tamp=Head + while tamp!=None: + print(tamp.data) + tamp=tamp.next + + def delete_head(Head):#delete from head + if Head!=None: + Head=Head.next + return Head#return new Head + + def delete_tail(Head):#delete from tail + if Head!=None: + tamp = Node() + tamp = Head + while (tamp.next).next!= None:#find the 2nd last element + tamp = tamp.next + tamp.next=None#delete the last element by give next None to 2nd last Element + return Head + + def isEmpty(Head): + return Head is None #Return if Head is none + + def reverse(Head): + prev = None + current = Head + + while(current): + # Store the current node's next node. + next_node = current.next + # Make the current node's next point backwards + current.next = prev + # Make the previous node be the current node + prev = current + # Make the current node the next node (to progress iteration) + current = next_node + # Return prev in order to put the head at the end + Head = prev diff --git a/data_structures/Queue/DeQueue.py b/data_structures/Queue/DeQueue.py new file mode 100644 index 000000000000..fdee64eb6ae0 --- /dev/null +++ b/data_structures/Queue/DeQueue.py @@ -0,0 +1,40 @@ +from __future__ import print_function +# Python code to demonstrate working of +# extend(), extendleft(), rotate(), reverse() + +# importing "collections" for deque operations +import collections + +# initializing deque +de = collections.deque([1, 2, 3,]) + +# using extend() to add numbers to right end +# adds 4,5,6 to right end +de.extend([4,5,6]) + +# printing modified deque +print ("The deque after extending deque at end is : ") +print (de) + +# using extendleft() to add numbers to left end +# adds 7,8,9 to right end +de.extendleft([7,8,9]) + +# printing modified deque +print ("The deque after extending deque at beginning is : ") +print (de) + +# using rotate() to rotate the deque +# rotates by 3 to left +de.rotate(-3) + +# printing modified deque +print ("The deque after rotating deque is : ") +print (de) + +# using reverse() to reverse the deque +de.reverse() + +# printing modified deque +print ("The deque after reversing deque is : ") +print (de) diff --git a/data_structures/Queue/QueueOnList.py b/data_structures/Queue/QueueOnList.py index 3e1c79be1f47..c8d0b41de5d5 100644 --- a/data_structures/Queue/QueueOnList.py +++ b/data_structures/Queue/QueueOnList.py @@ -3,6 +3,7 @@ class Queue(): def __init__(self): self.entries = [] self.length = 0 + self.front=0 def __str__(self): printed = '<' + str(self.entries)[1:-1] + '>' @@ -22,8 +23,9 @@ def put(self, item): item that was dequeued""" def get(self): self.length = self.length - 1 - dequeued = self.entries[0] - self.entries = self.entries[1:] + dequeued = self.entries[self.front] + self.front-=1 + self.entries = self.entries[self.front:] return dequeued """Rotates the queue {@code rotation} times diff --git a/data_structures/Stacks/balanced_parentheses.py b/data_structures/Stacks/balanced_parentheses.py new file mode 100644 index 000000000000..8d99358bea87 --- /dev/null +++ b/data_structures/Stacks/balanced_parentheses.py @@ -0,0 +1,23 @@ +from __future__ import print_function +from __future__ import absolute_import +from .Stack import Stack + +__author__ = 'Omkar Pathak' + + +def balanced_parentheses(parentheses): + """ Use a stack to check if a string of parentheses are balanced.""" + stack = Stack(len(parentheses)) + for parenthesis in parentheses: + if parenthesis == '(': + stack.push(parenthesis) + elif parenthesis == ')': + stack.pop() + return not stack.is_empty() + + +if __name__ == '__main__': + examples = ['((()))', '((())'] + print('Balanced parentheses demonstration:\n') + for example in examples: + print(example + ': ' + str(balanced_parentheses(example))) diff --git a/data_structures/Stacks/infix_to_postfix_conversion.py b/data_structures/Stacks/infix_to_postfix_conversion.py new file mode 100644 index 000000000000..75211fed258d --- /dev/null +++ b/data_structures/Stacks/infix_to_postfix_conversion.py @@ -0,0 +1,64 @@ +from __future__ import print_function +from __future__ import absolute_import +import string + +from .Stack import Stack + +__author__ = 'Omkar Pathak' + + +def is_operand(char): + return char in string.ascii_letters or char in string.digits + + +def precedence(char): + """ Return integer value representing an operator's precedence, or + order of operation. + + https://en.wikipedia.org/wiki/Order_of_operations + """ + dictionary = {'+': 1, '-': 1, + '*': 2, '/': 2, + '^': 3} + return dictionary.get(char, -1) + + +def infix_to_postfix(expression): + """ Convert infix notation to postfix notation using the Shunting-yard + algorithm. + + https://en.wikipedia.org/wiki/Shunting-yard_algorithm + https://en.wikipedia.org/wiki/Infix_notation + https://en.wikipedia.org/wiki/Reverse_Polish_notation + """ + stack = Stack(len(expression)) + postfix = [] + for char in expression: + if is_operand(char): + postfix.append(char) + elif char not in {'(', ')'}: + while (not stack.is_empty() + and precedence(char) <= precedence(stack.peek())): + postfix.append(stack.pop()) + stack.push(char) + elif char == '(': + stack.push(char) + elif char == ')': + while not stack.is_empty() and stack.peek() != '(': + postfix.append(stack.pop()) + # Pop '(' from stack. If there is no '(', there is a mismatched + # parentheses. + if stack.peek() != '(': + raise ValueError('Mismatched parentheses') + stack.pop() + while not stack.is_empty(): + postfix.append(stack.pop()) + return ' '.join(postfix) + + +if __name__ == '__main__': + expression = 'a+b*(c^d-e)^(f+g*h)-i' + + print('Infix to Postfix Notation demonstration:\n') + print('Infix notation: ' + expression) + print('Postfix notation: ' + infix_to_postfix(expression)) diff --git a/data_structures/Stacks/next.py b/data_structures/Stacks/next.py new file mode 100644 index 000000000000..bca83339592c --- /dev/null +++ b/data_structures/Stacks/next.py @@ -0,0 +1,17 @@ +from __future__ import print_function +# Function to print element and NGE pair for all elements of list +def printNGE(arr): + + for i in range(0, len(arr), 1): + + next = -1 + for j in range(i+1, len(arr), 1): + if arr[i] < arr[j]: + next = arr[j] + break + + print(str(arr[i]) + " -- " + str(next)) + +# Driver program to test above function +arr = [11,13,21,3] +printNGE(arr) diff --git a/data_structures/Stacks/stack.py b/data_structures/Stacks/stack.py new file mode 100644 index 000000000000..66af8c025d8c --- /dev/null +++ b/data_structures/Stacks/stack.py @@ -0,0 +1,69 @@ +from __future__ import print_function +__author__ = 'Omkar Pathak' + + +class Stack(object): + """ A stack is an abstract data type that serves as a collection of + elements with two principal operations: push() and pop(). push() adds an + element to the top of the stack, and pop() removes an element from the top + of a stack. The order in which elements come off of a stack are + Last In, First Out (LIFO). + + https://en.wikipedia.org/wiki/Stack_(abstract_data_type) + """ + + def __init__(self, limit=10): + self.stack = [] + self.limit = limit + + def __bool__(self): + return not bool(self.stack) + + def __str__(self): + return str(self.stack) + + def push(self, data): + """ Push an element to the top of the stack.""" + if len(self.stack) >= self.limit: + raise StackOverflowError + self.stack.append(data) + + def pop(self): + """ Pop an element off of the top of the stack.""" + if self.stack: + return self.stack.pop() + else: + raise IndexError('pop from an empty stack') + + def peek(self): + """ Peek at the top-most element of the stack.""" + if self.stack: + return self.stack[-1] + + def is_empty(self): + """ Check if a stack is empty.""" + return not bool(self.stack) + + def size(self): + """ Return the size of the stack.""" + return len(self.stack) + + +class StackOverflowError(BaseException): + pass + + +if __name__ == '__main__': + stack = Stack() + for i in range(10): + stack.push(i) + + print('Stack demonstration:\n') + print('Initial stack: ' + str(stack)) + print('pop(): ' + str(stack.pop())) + print('After pop(), the stack is now: ' + str(stack)) + print('peek(): ' + str(stack.peek())) + stack.push(100) + print('After push(100), the stack is now: ' + str(stack)) + print('is_empty(): ' + str(stack.is_empty())) + print('size(): ' + str(stack.size())) diff --git a/data_structures/Trie/Trie.py b/data_structures/Trie/Trie.py new file mode 100644 index 000000000000..7c886144d1f4 --- /dev/null +++ b/data_structures/Trie/Trie.py @@ -0,0 +1,75 @@ +""" +A Trie/Prefix Tree is a kind of search tree used to provide quick lookup +of words/patterns in a set of words. A basic Trie however has O(n^2) space complexity +making it impractical in practice. It however provides O(max(search_string, length of longest word)) lookup +time making it an optimal approach when space is not an issue. + +""" + + +class TrieNode: + def __init__(self): + self.nodes = dict() # Mapping from char to TrieNode + self.is_leaf = False + + def insert_many(self, words: [str]): + """ + Inserts a list of words into the Trie + :param words: list of string words + :return: None + """ + for word in words: + self.insert(word) + + def insert(self, word: str): + """ + Inserts a word into the Trie + :param word: word to be inserted + :return: None + """ + curr = self + for char in word: + if char not in curr.nodes: + curr.nodes[char] = TrieNode() + curr = curr.nodes[char] + curr.is_leaf = True + + def find(self, word: str) -> bool: + """ + Tries to find word in a Trie + :param word: word to look for + :return: Returns True if word is found, False otherwise + """ + curr = self + for char in word: + if char not in curr.nodes: + return False + curr = curr.nodes[char] + return curr.is_leaf + + +def print_words(node: TrieNode, word: str): + """ + Prints all the words in a Trie + :param node: root node of Trie + :param word: Word variable should be empty at start + :return: None + """ + if node.is_leaf: + print(word, end=' ') + + for key, value in node.nodes.items(): + print_words(value, word + key) + + +def test(): + words = ['banana', 'bananas', 'bandana', 'band', 'apple', 'all', 'beast'] + root = TrieNode() + root.insert_many(words) + # print_words(root, '') + assert root.find('banana') + assert not root.find('bandanas') + assert not root.find('apps') + assert root.find('apple') + +test() diff --git a/other/anagrams.txt b/data_structures/UnionFind/__init__.py similarity index 100% rename from other/anagrams.txt rename to data_structures/UnionFind/__init__.py diff --git a/data_structures/UnionFind/tests_union_find.py b/data_structures/UnionFind/tests_union_find.py new file mode 100644 index 000000000000..b0708778ddbd --- /dev/null +++ b/data_structures/UnionFind/tests_union_find.py @@ -0,0 +1,78 @@ +from __future__ import absolute_import +from .union_find import UnionFind +import unittest + + +class TestUnionFind(unittest.TestCase): + def test_init_with_valid_size(self): + uf = UnionFind(5) + self.assertEqual(uf.size, 5) + + def test_init_with_invalid_size(self): + with self.assertRaises(ValueError): + uf = UnionFind(0) + + with self.assertRaises(ValueError): + uf = UnionFind(-5) + + def test_union_with_valid_values(self): + uf = UnionFind(10) + + for i in range(11): + for j in range(11): + uf.union(i, j) + + def test_union_with_invalid_values(self): + uf = UnionFind(10) + + with self.assertRaises(ValueError): + uf.union(-1, 1) + + with self.assertRaises(ValueError): + uf.union(11, 1) + + def test_same_set_with_valid_values(self): + uf = UnionFind(10) + + for i in range(11): + for j in range(11): + if i == j: + self.assertTrue(uf.same_set(i, j)) + else: + self.assertFalse(uf.same_set(i, j)) + + uf.union(1, 2) + self.assertTrue(uf.same_set(1, 2)) + + uf.union(3, 4) + self.assertTrue(uf.same_set(3, 4)) + + self.assertFalse(uf.same_set(1, 3)) + self.assertFalse(uf.same_set(1, 4)) + self.assertFalse(uf.same_set(2, 3)) + self.assertFalse(uf.same_set(2, 4)) + + uf.union(1, 3) + self.assertTrue(uf.same_set(1, 3)) + self.assertTrue(uf.same_set(1, 4)) + self.assertTrue(uf.same_set(2, 3)) + self.assertTrue(uf.same_set(2, 4)) + + uf.union(4, 10) + self.assertTrue(uf.same_set(1, 10)) + self.assertTrue(uf.same_set(2, 10)) + self.assertTrue(uf.same_set(3, 10)) + self.assertTrue(uf.same_set(4, 10)) + + def test_same_set_with_invalid_values(self): + uf = UnionFind(10) + + with self.assertRaises(ValueError): + uf.same_set(-1, 1) + + with self.assertRaises(ValueError): + uf.same_set(11, 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/data_structures/UnionFind/union_find.py b/data_structures/UnionFind/union_find.py new file mode 100644 index 000000000000..40eea67ac944 --- /dev/null +++ b/data_structures/UnionFind/union_find.py @@ -0,0 +1,87 @@ +class UnionFind(): + """ + https://en.wikipedia.org/wiki/Disjoint-set_data_structure + + The union-find is a disjoint-set data structure + + You can merge two sets and tell if one set belongs to + another one. + + It's used on the Kruskal Algorithm + (https://en.wikipedia.org/wiki/Kruskal%27s_algorithm) + + The elements are in range [0, size] + """ + def __init__(self, size): + if size <= 0: + raise ValueError("size should be greater than 0") + + self.size = size + + # The below plus 1 is because we are using elements + # in range [0, size]. It makes more sense. + + # Every set begins with only itself + self.root = [i for i in range(size+1)] + + # This is used for heuristic union by rank + self.weight = [0 for i in range(size+1)] + + def union(self, u, v): + """ + Union of the sets u and v. + Complexity: log(n). + Amortized complexity: < 5 (it's very fast). + """ + + self._validate_element_range(u, "u") + self._validate_element_range(v, "v") + + if u == v: + return + + # Using union by rank will guarantee the + # log(n) complexity + rootu = self._root(u) + rootv = self._root(v) + weight_u = self.weight[rootu] + weight_v = self.weight[rootv] + if weight_u >= weight_v: + self.root[rootv] = rootu + if weight_u == weight_v: + self.weight[rootu] += 1 + else: + self.root[rootu] = rootv + + def same_set(self, u, v): + """ + Return true if the elements u and v belongs to + the same set + """ + + self._validate_element_range(u, "u") + self._validate_element_range(v, "v") + + return self._root(u) == self._root(v) + + def _root(self, u): + """ + Get the element set root. + This uses the heuristic path compression + See wikipedia article for more details. + """ + + if u != self.root[u]: + self.root[u] = self._root(self.root[u]) + + return self.root[u] + + def _validate_element_range(self, u, element_name): + """ + Raises ValueError if element is not in range + """ + if u < 0 or u > self.size: + msg = ("element {0} with value {1} " + "should be in range [0~{2}]")\ + .format(element_name, u, self.size) + raise ValueError(msg) diff --git a/dynamic_programming/FloydWarshall.py b/dynamic_programming/FloydWarshall.py new file mode 100644 index 000000000000..038499ca03b6 --- /dev/null +++ b/dynamic_programming/FloydWarshall.py @@ -0,0 +1,37 @@ +import math + +class Graph: + + def __init__(self, N = 0): # a graph with Node 0,1,...,N-1 + self.N = N + self.W = [[math.inf for j in range(0,N)] for i in range(0,N)] # adjacency matrix for weight + self.dp = [[math.inf for j in range(0,N)] for i in range(0,N)] # dp[i][j] stores minimum distance from i to j + + def addEdge(self, u, v, w): + self.dp[u][v] = w + + def floyd_warshall(self): + for k in range(0,self.N): + for i in range(0,self.N): + for j in range(0,self.N): + self.dp[i][j] = min(self.dp[i][j], self.dp[i][k] + self.dp[k][j]) + + def showMin(self, u, v): + return self.dp[u][v] + +if __name__ == '__main__': + graph = Graph(5) + graph.addEdge(0,2,9) + graph.addEdge(0,4,10) + graph.addEdge(1,3,5) + graph.addEdge(2,3,7) + graph.addEdge(3,0,10) + graph.addEdge(3,1,2) + graph.addEdge(3,2,1) + graph.addEdge(3,4,6) + graph.addEdge(4,1,3) + graph.addEdge(4,2,4) + graph.addEdge(4,3,9) + graph.floyd_warshall() + graph.showMin(1,4) + graph.showMin(0,3) diff --git a/dynamic_programming/abbreviation.py b/dynamic_programming/abbreviation.py new file mode 100644 index 000000000000..44a1689809b8 --- /dev/null +++ b/dynamic_programming/abbreviation.py @@ -0,0 +1,29 @@ +""" +https://www.hackerrank.com/challenges/abbr/problem +You can perform the following operation on some string, : + +1. Capitalize zero or more of 's lowercase letters at some index i + (i.e., make them uppercase). +2. Delete all of the remaining lowercase letters in . + +Example: +a=daBcd and b="ABC" +daBcd -> capitalize a and c(dABCd) -> remove d (ABC) +""" +def abbr(a, b): + n = len(a) + m = len(b) + dp = [[False for _ in range(m + 1)] for _ in range(n + 1)] + dp[0][0] = True + for i in range(n): + for j in range(m + 1): + if dp[i][j]: + if j < m and a[i].upper() == b[j]: + dp[i + 1][j + 1] = True + if a[i].islower(): + dp[i + 1][j] = True + return dp[n][m] + + +if __name__ == "__main__": + print abbr("daBcd", "ABC") # expect True diff --git a/dynamic_programming/coin_change.py b/dynamic_programming/coin_change.py new file mode 100644 index 000000000000..0116df0c024e --- /dev/null +++ b/dynamic_programming/coin_change.py @@ -0,0 +1,26 @@ +""" +You have m types of coins available in infinite quantities +where the value of each coins is given in the array S=[S0,... Sm-1] +Can you determine number of ways of making change for n units using +the given types of coins? +https://www.hackerrank.com/challenges/coin-change/problem +""" +from __future__ import print_function +def dp_count(S, m, n): + table = [0] * (n + 1) + + # Base case (If given value is 0) + table[0] = 1 + + # Pick all coins one by one and update table[] values + # after the index greater than or equal to the value of the + # picked coin + for i in range(0, m): + for j in range(S[i], n + 1): + table[j] += table[j - S[i]] + + return table[n] + +if __name__ == '__main__': + print(dp_count([1, 2, 3], 3, 4)) # answer 4 + print(dp_count([2, 5, 3, 6], 4, 10)) # answer 5 diff --git a/dynamic_programming/edit_distance.py b/dynamic_programming/edit_distance.py index 05682d2c66e6..335e5196ed53 100644 --- a/dynamic_programming/edit_distance.py +++ b/dynamic_programming/edit_distance.py @@ -7,6 +7,8 @@ The problem is : Given two strings A and B. Find the minimum number of operations to string B such that A = B. The permitted operations are removal, insertion, and substitution. """ +from __future__ import print_function + class EditDistance: """ @@ -51,11 +53,10 @@ def solve(self, A, B): return self.__solveDP(len(A)-1, len(B)-1) if __name__ == '__main__': - import sys - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 solver = EditDistance() @@ -63,10 +64,10 @@ def solve(self, A, B): print() print("Enter the first string: ", end="") - S1 = input_function() + S1 = raw_input().strip() print("Enter the second string: ", end="") - S2 = input_function() + S2 = raw_input().strip() print() print("The minimum Edit Distance is: %d" % (solver.solve(S1, S2))) diff --git a/dynamic_programming/fastfibonacci.py b/dynamic_programming/fastfibonacci.py new file mode 100644 index 000000000000..cdfa2dd08084 --- /dev/null +++ b/dynamic_programming/fastfibonacci.py @@ -0,0 +1,43 @@ +""" +This program calculates the nth Fibonacci number in O(log(n)). +It's possible to calculate F(1000000) in less than a second. +""" +from __future__ import print_function +import sys + + +# returns F(n) +def fibonacci(n: int): + if n < 0: + raise ValueError("Negative arguments are not supported") + return _fib(n)[0] + + +# returns (F(n), F(n-1)) +def _fib(n: int): + if n == 0: + # (F(0), F(1)) + return (0, 1) + else: + # F(2n) = F(n)[2F(n+1) − F(n)] + # F(2n+1) = F(n+1)^2+F(n)^2 + a, b = _fib(n // 2) + c = a * (b * 2 - a) + d = a * a + b * b + if n % 2 == 0: + return (c, d) + else: + return (d, c + d) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + print("Too few or too much parameters given.") + exit(1) + try: + n = int(args[0]) + except ValueError: + print("Could not convert data to an integer.") + exit(1) + print("F(%d) = %d" % (n, fibonacci(n))) diff --git a/dynamic_programming/fibonacci.py b/dynamic_programming/fibonacci.py index 5733f693a6ff..b453ce255853 100644 --- a/dynamic_programming/fibonacci.py +++ b/dynamic_programming/fibonacci.py @@ -1,53 +1,49 @@ """ This is a pure Python implementation of Dynamic Programming solution to the fibonacci sequence problem. """ +from __future__ import print_function class Fibonacci: def __init__(self, N=None): + self.fib_array = [] if N: N = int(N) - self.fib_array = [0] * (N + 1) - self.fib_array[0] = 0 - self.fib_array[1] = 1 + self.fib_array.append(0) + self.fib_array.append(1) for i in range(2, N + 1): - self.fib_array[i] = self.fib_array[ - i - 1] + self.fib_array[i - 2] - else: - self.fib_array = [None] * (N + 1) + self.fib_array.append(self.fib_array[i - 1] + self.fib_array[i - 2]) + elif N == 0: + self.fib_array.append(0) def get(self, sequence_no=None): - if sequence_no: + if sequence_no != None: if sequence_no < len(self.fib_array): - return print(self.fib_array[:sequence_no]) + return print(self.fib_array[:sequence_no + 1]) else: print("Out of bound.") else: - print("Please specify the a value") + print("Please specify a value") if __name__ == '__main__': - import sys - print("\n********* Fibonacci Series Using Dynamic Programming ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 print("\n Enter the upper limit for the fibonacci sequence: ", end="") try: - N = eval(input()) + N = eval(raw_input().strip()) fib = Fibonacci(N) print( "\n********* Enter different values to get the corresponding fibonacci sequence, enter any negative number to exit. ************\n") while True: print("Enter value: ", end=" ") try: - i = eval(input()) + i = eval(raw_input().strip()) if i < 0: print("\n********* Good Bye!! ************\n") break diff --git a/dynamic_programming/k_means_clustering_tensorflow.py b/dynamic_programming/k_means_clustering_tensorflow.py new file mode 100644 index 000000000000..ad495c71a978 --- /dev/null +++ b/dynamic_programming/k_means_clustering_tensorflow.py @@ -0,0 +1,141 @@ +import tensorflow as tf +from random import choice, shuffle +from numpy import array + + +def TFKMeansCluster(vectors, noofclusters): + """ + K-Means Clustering using TensorFlow. + 'vectors' should be a n*k 2-D NumPy array, where n is the number + of vectors of dimensionality k. + 'noofclusters' should be an integer. + """ + + noofclusters = int(noofclusters) + assert noofclusters < len(vectors) + + #Find out the dimensionality + dim = len(vectors[0]) + + #Will help select random centroids from among the available vectors + vector_indices = list(range(len(vectors))) + shuffle(vector_indices) + + #GRAPH OF COMPUTATION + #We initialize a new graph and set it as the default during each run + #of this algorithm. This ensures that as this function is called + #multiple times, the default graph doesn't keep getting crowded with + #unused ops and Variables from previous function calls. + + graph = tf.Graph() + + with graph.as_default(): + + #SESSION OF COMPUTATION + + sess = tf.Session() + + ##CONSTRUCTING THE ELEMENTS OF COMPUTATION + + ##First lets ensure we have a Variable vector for each centroid, + ##initialized to one of the vectors from the available data points + centroids = [tf.Variable((vectors[vector_indices[i]])) + for i in range(noofclusters)] + ##These nodes will assign the centroid Variables the appropriate + ##values + centroid_value = tf.placeholder("float64", [dim]) + cent_assigns = [] + for centroid in centroids: + cent_assigns.append(tf.assign(centroid, centroid_value)) + + ##Variables for cluster assignments of individual vectors(initialized + ##to 0 at first) + assignments = [tf.Variable(0) for i in range(len(vectors))] + ##These nodes will assign an assignment Variable the appropriate + ##value + assignment_value = tf.placeholder("int32") + cluster_assigns = [] + for assignment in assignments: + cluster_assigns.append(tf.assign(assignment, + assignment_value)) + + ##Now lets construct the node that will compute the mean + #The placeholder for the input + mean_input = tf.placeholder("float", [None, dim]) + #The Node/op takes the input and computes a mean along the 0th + #dimension, i.e. the list of input vectors + mean_op = tf.reduce_mean(mean_input, 0) + + ##Node for computing Euclidean distances + #Placeholders for input + v1 = tf.placeholder("float", [dim]) + v2 = tf.placeholder("float", [dim]) + euclid_dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub( + v1, v2), 2))) + + ##This node will figure out which cluster to assign a vector to, + ##based on Euclidean distances of the vector from the centroids. + #Placeholder for input + centroid_distances = tf.placeholder("float", [noofclusters]) + cluster_assignment = tf.argmin(centroid_distances, 0) + + ##INITIALIZING STATE VARIABLES + + ##This will help initialization of all Variables defined with respect + ##to the graph. The Variable-initializer should be defined after + ##all the Variables have been constructed, so that each of them + ##will be included in the initialization. + init_op = tf.initialize_all_variables() + + #Initialize all variables + sess.run(init_op) + + ##CLUSTERING ITERATIONS + + #Now perform the Expectation-Maximization steps of K-Means clustering + #iterations. To keep things simple, we will only do a set number of + #iterations, instead of using a Stopping Criterion. + noofiterations = 100 + for iteration_n in range(noofiterations): + + ##EXPECTATION STEP + ##Based on the centroid locations till last iteration, compute + ##the _expected_ centroid assignments. + #Iterate over each vector + for vector_n in range(len(vectors)): + vect = vectors[vector_n] + #Compute Euclidean distance between this vector and each + #centroid. Remember that this list cannot be named + #'centroid_distances', since that is the input to the + #cluster assignment node. + distances = [sess.run(euclid_dist, feed_dict={ + v1: vect, v2: sess.run(centroid)}) + for centroid in centroids] + #Now use the cluster assignment node, with the distances + #as the input + assignment = sess.run(cluster_assignment, feed_dict = { + centroid_distances: distances}) + #Now assign the value to the appropriate state variable + sess.run(cluster_assigns[vector_n], feed_dict={ + assignment_value: assignment}) + + ##MAXIMIZATION STEP + #Based on the expected state computed from the Expectation Step, + #compute the locations of the centroids so as to maximize the + #overall objective of minimizing within-cluster Sum-of-Squares + for cluster_n in range(noofclusters): + #Collect all the vectors assigned to this cluster + assigned_vects = [vectors[i] for i in range(len(vectors)) + if sess.run(assignments[i]) == cluster_n] + #Compute new centroid location + new_location = sess.run(mean_op, feed_dict={ + mean_input: array(assigned_vects)}) + #Assign value to appropriate variable + sess.run(cent_assigns[cluster_n], feed_dict={ + centroid_value: new_location}) + + #Return centroids and assignments + centroids = sess.run(centroids) + assignments = sess.run(assignments) + return centroids, assignments + diff --git a/dynamic_programming/longest common subsequence.py b/dynamic_programming/longest common subsequence.py deleted file mode 100644 index f722c5d12335..000000000000 --- a/dynamic_programming/longest common subsequence.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -LCS Problem Statement: Given two sequences, find the length of longest subsequence present in both of them. -A subsequence is a sequence that appears in the same relative order, but not necessarily continious. -Example:"abc", "abg" are subsequences of "abcdefgh". -""" -def LCS(s1, s2): - m = len(s1) - n = len(s2) - - arr = [[0 for i in range(n+1)]for j in range(m+1)] - - for i in range(1,m+1): - for j in range(1,n+1): - if s1[i-1] == s2[j-1]: - arr[i][j] = arr[i-1][j-1]+1 - else: - arr[i][j] = max(arr[i-1][j], arr[i][j-1]) - return arr[m][n] diff --git a/dynamic_programming/longest_common_subsequence.py b/dynamic_programming/longest_common_subsequence.py new file mode 100644 index 000000000000..0a4771cb2efd --- /dev/null +++ b/dynamic_programming/longest_common_subsequence.py @@ -0,0 +1,37 @@ +""" +LCS Problem Statement: Given two sequences, find the length of longest subsequence present in both of them. +A subsequence is a sequence that appears in the same relative order, but not necessarily continious. +Example:"abc", "abg" are subsequences of "abcdefgh". +""" +from __future__ import print_function + +try: + xrange # Python 2 +except NameError: + xrange = range # Python 3 + +def lcs_dp(x, y): + # find the length of strings + m = len(x) + n = len(y) + + # declaring the array for storing the dp values + L = [[None] * (n + 1) for i in xrange(m + 1)] + seq = [] + + for i in range(m + 1): + for j in range(n + 1): + if i == 0 or j == 0: + L[i][j] = 0 + elif x[i - 1] == y[ j - 1]: + L[i][j] = L[i - 1][j - 1] + 1 + seq.append(x[i -1]) + else: + L[i][j] = max(L[i - 1][j], L[i][j - 1]) + # L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1] + return L[m][n], seq + +if __name__=='__main__': + x = 'AGGTAB' + y = 'GXTXAYB' + print(lcs_dp(x, y)) diff --git a/dynamic_programming/longest_increasing_subsequence.py b/dynamic_programming/longest_increasing_subsequence.py index 37ddef207257..b6d165909e70 100644 --- a/dynamic_programming/longest_increasing_subsequence.py +++ b/dynamic_programming/longest_increasing_subsequence.py @@ -1,12 +1,42 @@ -""" -The Longest Increasing Subsequence (LIS) problem is to find the length of the longest subsequence of a given sequence such that all elements of the subsequence are sorted in increasing order. For example, the length of LIS for {10, 22, 9, 33, 21, 50, 41, 60, 80} is 6 -""" -def LIS(arr): - n= len(arr) - lis = [1]*n - - for i in range(1, n): - for j in range(0, i): - if arr[i] > arr[j] and lis[i] <= lis[j]: - lis[i] = lis[j] + 1 - return max(lis) +''' +Author : Mehdi ALAOUI + +This is a pure Python implementation of Dynamic Programming solution to the longest increasing subsequence of a given sequence. + +The problem is : +Given an ARRAY, to find the longest and increasing sub ARRAY in that given ARRAY and return it. +Example: [10, 22, 9, 33, 21, 50, 41, 60, 80] as input will return [10, 22, 33, 41, 60, 80] as output +''' +from __future__ import print_function + +def longestSub(ARRAY): #This function is recursive + + ARRAY_LENGTH = len(ARRAY) + if(ARRAY_LENGTH <= 1): #If the array contains only one element, we return it (it's the stop condition of recursion) + return ARRAY + #Else + PIVOT=ARRAY[0] + isFound=False + i=1 + LONGEST_SUB=[] + while(not isFound and i= ARRAY[i] ] + TEMPORARY_ARRAY = longestSub(TEMPORARY_ARRAY) + if ( len(TEMPORARY_ARRAY) > len(LONGEST_SUB) ): + LONGEST_SUB = TEMPORARY_ARRAY + else: + i+=1 + + TEMPORARY_ARRAY = [ element for element in ARRAY[1:] if element >= PIVOT ] + TEMPORARY_ARRAY = [PIVOT] + longestSub(TEMPORARY_ARRAY) + if ( len(TEMPORARY_ARRAY) > len(LONGEST_SUB) ): + return TEMPORARY_ARRAY + else: + return LONGEST_SUB + +#Some examples + +print(longestSub([4,8,7,5,1,12,2,3,9])) +print(longestSub([9,8,7,6,5,7])) \ No newline at end of file diff --git a/dynamic_programming/longest_increasing_subsequence_O(nlogn).py b/dynamic_programming/longest_increasing_subsequence_O(nlogn).py new file mode 100644 index 000000000000..21122a04d69f --- /dev/null +++ b/dynamic_programming/longest_increasing_subsequence_O(nlogn).py @@ -0,0 +1,41 @@ +from __future__ import print_function +############################# +# Author: Aravind Kashyap +# File: lis.py +# comments: This programme outputs the Longest Strictly Increasing Subsequence in O(NLogN) +# Where N is the Number of elements in the list +############################# +def CeilIndex(v,l,r,key): + while r-l > 1: + m = (l + r)/2 + if v[m] >= key: + r = m + else: + l = m + + return r + + +def LongestIncreasingSubsequenceLength(v): + if(len(v) == 0): + return 0 + + tail = [0]*len(v) + length = 1 + + tail[0] = v[0] + + for i in range(1,len(v)): + if v[i] < tail[0]: + tail[0] = v[i] + elif v[i] > tail[length-1]: + tail[length] = v[i] + length += 1 + else: + tail[CeilIndex(tail,-1,length-1,v[i])] = v[i] + + return length + + +v = [2, 5, 3, 7, 11, 8, 10, 13, 6] +print(LongestIncreasingSubsequenceLength(v)) diff --git a/dynamic_programming/longest_sub_array.py b/dynamic_programming/longest_sub_array.py index 988041ed0244..de2c88a8b525 100644 --- a/dynamic_programming/longest_sub_array.py +++ b/dynamic_programming/longest_sub_array.py @@ -6,6 +6,7 @@ The problem is : Given an array, to find the longest and continuous sub array and get the max sum of the sub array in the given array. ''' +from __future__ import print_function class SubArray: @@ -13,7 +14,7 @@ class SubArray: def __init__(self, arr): # we need a list not a string, so do something to change the type self.array = arr.split(',') - print("the input array is:", self.array) + print(("the input array is:", self.array)) def solve_sub_array(self): rear = [int(self.array[0])]*len(self.array) @@ -28,5 +29,5 @@ def solve_sub_array(self): whole_array = input("please input some numbers:") array = SubArray(whole_array) re = array.solve_sub_array() - print("the results is:", re) + print(("the results is:", re)) diff --git a/dynamic_programming/max_sub_array.py b/dynamic_programming/max_sub_array.py new file mode 100644 index 000000000000..58711f22ce90 --- /dev/null +++ b/dynamic_programming/max_sub_array.py @@ -0,0 +1,60 @@ +""" +author : Mayank Kumar Jha (mk9440) +""" +from __future__ import print_function + +import time +import matplotlib.pyplot as plt +from random import randint +def find_max_sub_array(A,low,high): + if low==high: + return low,high,A[low] + else : + mid=(low+high)//2 + left_low,left_high,left_sum=find_max_sub_array(A,low,mid) + right_low,right_high,right_sum=find_max_sub_array(A,mid+1,high) + cross_left,cross_right,cross_sum=find_max_cross_sum(A,low,mid,high) + if left_sum>=right_sum and left_sum>=cross_sum: + return left_low,left_high,left_sum + elif right_sum>=left_sum and right_sum>=cross_sum : + return right_low,right_high,right_sum + else: + return cross_left,cross_right,cross_sum + +def find_max_cross_sum(A,low,mid,high): + left_sum,max_left=-999999999,-1 + right_sum,max_right=-999999999,-1 + summ=0 + for i in range(mid,low-1,-1): + summ+=A[i] + if summ > left_sum: + left_sum=summ + max_left=i + summ=0 + for i in range(mid+1,high+1): + summ+=A[i] + if summ > right_sum: + right_sum=summ + max_right=i + return max_left,max_right,(left_sum+right_sum) + + +if __name__=='__main__': + inputs=[10,100,1000,10000,50000,100000,200000,300000,400000,500000] + tim=[] + for i in inputs: + li=[randint(1,i) for j in range(i)] + strt=time.time() + (find_max_sub_array(li,0,len(li)-1)) + end=time.time() + tim.append(end-strt) + print("No of Inputs Time Taken") + for i in range(len(inputs)): + print((inputs[i],'\t\t',tim[i])) + plt.plot(inputs,tim) + plt.xlabel("Number of Inputs");plt.ylabel("Time taken in seconds ") + plt.show() + + + + diff --git a/dynamic_programming/minimum_partition.py b/dynamic_programming/minimum_partition.py index 25b7621e1dd0..18aa1faa2fa6 100644 --- a/dynamic_programming/minimum_partition.py +++ b/dynamic_programming/minimum_partition.py @@ -20,7 +20,7 @@ def findMin(arr): if (arr[i-1] <= j): dp[i][j] = dp[i][j] or dp[i-1][j-arr[i-1]] - for j in range(s/2, -1, -1): + for j in range(int(s/2), -1, -1): if dp[n][j] == True: diff = s-2*j break; diff --git a/hashes/chaos_machine.py b/hashes/chaos_machine.py new file mode 100644 index 000000000000..8b6c004380aa --- /dev/null +++ b/hashes/chaos_machine.py @@ -0,0 +1,97 @@ +"""example of simple chaos machine""" +from __future__ import print_function + +# Chaos Machine (K, t, m) +K = [0.33, 0.44, 0.55, 0.44, 0.33]; t = 3; m = 5 + +# Buffer Space (with Parameters Space) +buffer_space, params_space = [], [] + +# Machine Time +machine_time = 0 + +def push(seed): + global buffer_space, params_space, machine_time, \ + K, m, t + + # Choosing Dynamical Systems (All) + for key, value in enumerate(buffer_space): + # Evolution Parameter + e = float(seed / value) + + # Control Theory: Orbit Change + value = (buffer_space[(key + 1) % m] + e) % 1 + + # Control Theory: Trajectory Change + r = (params_space[key] + e) % 1 + 3 + + # Modification (Transition Function) - Jumps + buffer_space[key] = \ + round(float(r * value * (1 - value)), 10) + params_space[key] = \ + r # Saving to Parameters Space + + # Logistic Map + assert(max(buffer_space) < 1) + assert(max(params_space) < 4) + + # Machine Time + machine_time += 1 + +def pull(): + global buffer_space, params_space, machine_time, \ + K, m, t + + # PRNG (Xorshift by George Marsaglia) + def xorshift(X, Y): + X ^= Y >> 13 + Y ^= X << 17 + X ^= Y >> 5 + return X + + # Choosing Dynamical Systems (Increment) + key = machine_time % m + + # Evolution (Time Length) + for i in range(0, t): + # Variables (Position + Parameters) + r = params_space[key] + value = buffer_space[key] + + # Modification (Transition Function) - Flow + buffer_space[key] = \ + round(float(r * value * (1 - value)), 10) + params_space[key] = \ + (machine_time * 0.01 + r * 1.01) % 1 + 3 + + # Choosing Chaotic Data + X = int(buffer_space[(key + 2) % m] * (10 ** 10)) + Y = int(buffer_space[(key - 2) % m] * (10 ** 10)) + + # Machine Time + machine_time += 1 + + return xorshift(X, Y) % 0xFFFFFFFF + +def reset(): + global buffer_space, params_space, machine_time, \ + K, m, t + + buffer_space = K; params_space = [0] * m + machine_time = 0 + +####################################### + +# Initialization +reset() + +# Pushing Data (Input) +import random +message = random.sample(range(0xFFFFFFFF), 100) +for chunk in message: + push(chunk) + +# Pulling Data (Output) +while True: + print("%s" % format(pull(), '#04x')) + print(buffer_space); print(params_space) diff --git a/hashes/md5.py b/hashes/md5.py index ff32f4c2e190..c336b5fe49ee 100644 --- a/hashes/md5.py +++ b/hashes/md5.py @@ -1,3 +1,4 @@ +from __future__ import print_function import math def rearrange(bitString32): diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py new file mode 100644 index 000000000000..71849904ccf2 --- /dev/null +++ b/machine_learning/decision_tree.py @@ -0,0 +1,140 @@ +""" +Implementation of a basic regression decision tree. +Input data set: The input data set must be 1-dimensional with continuous labels. +Output: The decision tree maps a real number input to a real number output. +""" +from __future__ import print_function + +import numpy as np + +class Decision_Tree: + def __init__(self, depth = 5, min_leaf_size = 5): + self.depth = depth + self.decision_boundary = 0 + self.left = None + self.right = None + self.min_leaf_size = min_leaf_size + self.prediction = None + + def mean_squared_error(self, labels, prediction): + """ + mean_squared_error: + @param labels: a one dimensional numpy array + @param prediction: a floating point value + return value: mean_squared_error calculates the error if prediction is used to estimate the labels + """ + if labels.ndim != 1: + print("Error: Input labels must be one dimensional") + + return np.mean((labels - prediction) ** 2) + + def train(self, X, y): + """ + train: + @param X: a one dimensional numpy array + @param y: a one dimensional numpy array. + The contents of y are the labels for the corresponding X values + + train does not have a return value + """ + + """ + this section is to check that the inputs conform to our dimensionality constraints + """ + if X.ndim != 1: + print("Error: Input data set must be one dimensional") + return + if len(X) != len(y): + print("Error: X and y have different lengths") + return + if y.ndim != 1: + print("Error: Data set labels must be one dimensional") + return + + if len(X) < 2 * self.min_leaf_size: + self.prediction = np.mean(y) + return + + if self.depth == 1: + self.prediction = np.mean(y) + return + + best_split = 0 + min_error = self.mean_squared_error(X,np.mean(y)) * 2 + + + """ + loop over all possible splits for the decision tree. find the best split. + if no split exists that is less than 2 * error for the entire array + then the data set is not split and the average for the entire array is used as the predictor + """ + for i in range(len(X)): + if len(X[:i]) < self.min_leaf_size: + continue + elif len(X[i:]) < self.min_leaf_size: + continue + else: + error_left = self.mean_squared_error(X[:i], np.mean(y[:i])) + error_right = self.mean_squared_error(X[i:], np.mean(y[i:])) + error = error_left + error_right + if error < min_error: + best_split = i + min_error = error + + if best_split != 0: + left_X = X[:best_split] + left_y = y[:best_split] + right_X = X[best_split:] + right_y = y[best_split:] + + self.decision_boundary = X[best_split] + self.left = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.right = Decision_Tree(depth = self.depth - 1, min_leaf_size = self.min_leaf_size) + self.left.train(left_X, left_y) + self.right.train(right_X, right_y) + else: + self.prediction = np.mean(y) + + return + + def predict(self, x): + """ + predict: + @param x: a floating point value to predict the label of + the prediction function works by recursively calling the predict function + of the appropriate subtrees based on the tree's decision boundary + """ + if self.prediction is not None: + return self.prediction + elif self.left or self.right is not None: + if x >= self.decision_boundary: + return self.right.predict(x) + else: + return self.left.predict(x) + else: + print("Error: Decision tree not yet trained") + return None + +def main(): + """ + In this demonstration we're generating a sample data set from the sin function in numpy. + We then train a decision tree on the data set and use the decision tree to predict the + label of 10 different test values. Then the mean squared error over this test is displayed. + """ + X = np.arange(-1., 1., 0.005) + y = np.sin(X) + + tree = Decision_Tree(depth = 10, min_leaf_size = 10) + tree.train(X,y) + + test_cases = (np.random.rand(10) * 2) - 1 + predictions = np.array([tree.predict(x) for x in test_cases]) + avg_error = np.mean((predictions - test_cases) ** 2) + + print("Test values: " + str(test_cases)) + print("Predictions: " + str(predictions)) + print("Average error: " + str(avg_error)) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/machine_learning/gradient_descent.py b/machine_learning/gradient_descent.py new file mode 100644 index 000000000000..db6415999bd7 --- /dev/null +++ b/machine_learning/gradient_descent.py @@ -0,0 +1,122 @@ +""" +Implementation of gradient descent algorithm for minimizing cost of a linear hypothesis function. +""" +from __future__ import print_function +import numpy + +# List of input, output pairs +train_data = (((5, 2, 3), 15), ((6, 5, 9), 25), + ((11, 12, 13), 41), ((1, 1, 1), 8), ((11, 12, 13), 41)) +test_data = (((515, 22, 13), 555), ((61, 35, 49), 150)) +parameter_vector = [2, 4, 1, 5] +m = len(train_data) +LEARNING_RATE = 0.009 + + +def _error(example_no, data_set='train'): + """ + :param data_set: train data or test data + :param example_no: example number whose error has to be checked + :return: error in example pointed by example number. + """ + return calculate_hypothesis_value(example_no, data_set) - output(example_no, data_set) + + +def _hypothesis_value(data_input_tuple): + """ + Calculates hypothesis function value for a given input + :param data_input_tuple: Input tuple of a particular example + :return: Value of hypothesis function at that point. + Note that there is an 'biased input' whose value is fixed as 1. + It is not explicitly mentioned in input data.. But, ML hypothesis functions use it. + So, we have to take care of it separately. Line 36 takes care of it. + """ + hyp_val = 0 + for i in range(len(parameter_vector) - 1): + hyp_val += data_input_tuple[i]*parameter_vector[i+1] + hyp_val += parameter_vector[0] + return hyp_val + + +def output(example_no, data_set): + """ + :param data_set: test data or train data + :param example_no: example whose output is to be fetched + :return: output for that example + """ + if data_set == 'train': + return train_data[example_no][1] + elif data_set == 'test': + return test_data[example_no][1] + + +def calculate_hypothesis_value(example_no, data_set): + """ + Calculates hypothesis value for a given example + :param data_set: test data or train_data + :param example_no: example whose hypothesis value is to be calculated + :return: hypothesis value for that example + """ + if data_set == "train": + return _hypothesis_value(train_data[example_no][0]) + elif data_set == "test": + return _hypothesis_value(test_data[example_no][0]) + + +def summation_of_cost_derivative(index, end=m): + """ + Calculates the sum of cost function derivative + :param index: index wrt derivative is being calculated + :param end: value where summation ends, default is m, number of examples + :return: Returns the summation of cost derivative + Note: If index is -1, this means we are calculating summation wrt to biased parameter. + """ + summation_value = 0 + for i in range(end): + if index == -1: + summation_value += _error(i) + else: + summation_value += _error(i)*train_data[i][0][index] + return summation_value + + +def get_cost_derivative(index): + """ + :param index: index of the parameter vector wrt to derivative is to be calculated + :return: derivative wrt to that index + Note: If index is -1, this means we are calculating summation wrt to biased parameter. + """ + cost_derivative_value = summation_of_cost_derivative(index, m)/m + return cost_derivative_value + + +def run_gradient_descent(): + global parameter_vector + # Tune these values to set a tolerance value for predicted output + absolute_error_limit = 0.000002 + relative_error_limit = 0 + j = 0 + while True: + j += 1 + temp_parameter_vector = [0, 0, 0, 0] + for i in range(0, len(parameter_vector)): + cost_derivative = get_cost_derivative(i-1) + temp_parameter_vector[i] = parameter_vector[i] - \ + LEARNING_RATE*cost_derivative + if numpy.allclose(parameter_vector, temp_parameter_vector, + atol=absolute_error_limit, rtol=relative_error_limit): + break + parameter_vector = temp_parameter_vector + print(("Number of iterations:", j)) + + +def test_gradient_descent(): + for i in range(len(test_data)): + print(("Actual output value:", output(i, 'test'))) + print(("Hypothesis output:", calculate_hypothesis_value(i, 'test'))) + + +if __name__ == '__main__': + run_gradient_descent() + print("\nTesting gradient descent for a linear hypothesis function.\n") + test_gradient_descent() diff --git a/machine_learning/k_means_clust.py b/machine_learning/k_means_clust.py new file mode 100644 index 000000000000..368739a45fe9 --- /dev/null +++ b/machine_learning/k_means_clust.py @@ -0,0 +1,173 @@ +'''README, Author - Anurag Kumar(mailto:anuragkumarak95@gmail.com) + +Requirements: + - sklearn + - numpy + - matplotlib + +Python: + - 3.5 + +Inputs: + - X , a 2D numpy array of features. + - k , number of clusters to create. + - initial_centroids , initial centroid values generated by utility function(mentioned in usage). + - maxiter , maximum number of iterations to process. + - heterogeneity , empty list that will be filled with hetrogeneity values if passed to kmeans func. + +Usage: + 1. define 'k' value, 'X' features array and 'hetrogeneity' empty list + + 2. create initial_centroids, + initial_centroids = get_initial_centroids( + X, + k, + seed=0 # seed value for initial centroid generation, None for randomness(default=None) + ) + + 3. find centroids and clusters using kmeans function. + + centroids, cluster_assignment = kmeans( + X, + k, + initial_centroids, + maxiter=400, + record_heterogeneity=heterogeneity, + verbose=True # whether to print logs in console or not.(default=False) + ) + + + 4. Plot the loss function, hetrogeneity values for every iteration saved in hetrogeneity list. + plot_heterogeneity( + heterogeneity, + k + ) + + 5. Have fun.. + +''' +from __future__ import print_function +from sklearn.metrics import pairwise_distances +import numpy as np + +TAG = 'K-MEANS-CLUST/ ' + +def get_initial_centroids(data, k, seed=None): + '''Randomly choose k data points as initial centroids''' + if seed is not None: # useful for obtaining consistent results + np.random.seed(seed) + n = data.shape[0] # number of data points + + # Pick K indices from range [0, N). + rand_indices = np.random.randint(0, n, k) + + # Keep centroids as dense format, as many entries will be nonzero due to averaging. + # As long as at least one document in a cluster contains a word, + # it will carry a nonzero weight in the TF-IDF vector of the centroid. + centroids = data[rand_indices,:] + + return centroids + +def centroid_pairwise_dist(X,centroids): + return pairwise_distances(X,centroids,metric='euclidean') + +def assign_clusters(data, centroids): + + # Compute distances between each data point and the set of centroids: + # Fill in the blank (RHS only) + distances_from_centroids = centroid_pairwise_dist(data,centroids) + + # Compute cluster assignments for each data point: + # Fill in the blank (RHS only) + cluster_assignment = np.argmin(distances_from_centroids,axis=1) + + return cluster_assignment + +def revise_centroids(data, k, cluster_assignment): + new_centroids = [] + for i in range(k): + # Select all data points that belong to cluster i. Fill in the blank (RHS only) + member_data_points = data[cluster_assignment==i] + # Compute the mean of the data points. Fill in the blank (RHS only) + centroid = member_data_points.mean(axis=0) + new_centroids.append(centroid) + new_centroids = np.array(new_centroids) + + return new_centroids + +def compute_heterogeneity(data, k, centroids, cluster_assignment): + + heterogeneity = 0.0 + for i in range(k): + + # Select all data points that belong to cluster i. Fill in the blank (RHS only) + member_data_points = data[cluster_assignment==i, :] + + if member_data_points.shape[0] > 0: # check if i-th cluster is non-empty + # Compute distances from centroid to data points (RHS only) + distances = pairwise_distances(member_data_points, [centroids[i]], metric='euclidean') + squared_distances = distances**2 + heterogeneity += np.sum(squared_distances) + + return heterogeneity + +from matplotlib import pyplot as plt +def plot_heterogeneity(heterogeneity, k): + plt.figure(figsize=(7,4)) + plt.plot(heterogeneity, linewidth=4) + plt.xlabel('# Iterations') + plt.ylabel('Heterogeneity') + plt.title('Heterogeneity of clustering over time, K={0:d}'.format(k)) + plt.rcParams.update({'font.size': 16}) + plt.show() + +def kmeans(data, k, initial_centroids, maxiter=500, record_heterogeneity=None, verbose=False): + '''This function runs k-means on given data and initial set of centroids. + maxiter: maximum number of iterations to run.(default=500) + record_heterogeneity: (optional) a list, to store the history of heterogeneity as function of iterations + if None, do not store the history. + verbose: if True, print how many data points changed their cluster labels in each iteration''' + centroids = initial_centroids[:] + prev_cluster_assignment = None + + for itr in range(maxiter): + if verbose: + print(itr, end='') + + # 1. Make cluster assignments using nearest centroids + cluster_assignment = assign_clusters(data,centroids) + + # 2. Compute a new centroid for each of the k clusters, averaging all data points assigned to that cluster. + centroids = revise_centroids(data,k, cluster_assignment) + + # Check for convergence: if none of the assignments changed, stop + if prev_cluster_assignment is not None and \ + (prev_cluster_assignment==cluster_assignment).all(): + break + + # Print number of new assignments + if prev_cluster_assignment is not None: + num_changed = np.sum(prev_cluster_assignment!=cluster_assignment) + if verbose: + print(' {0:5d} elements changed their cluster assignment.'.format(num_changed)) + + # Record heterogeneity convergence metric + if record_heterogeneity is not None: + # YOUR CODE HERE + score = compute_heterogeneity(data,k,centroids,cluster_assignment) + record_heterogeneity.append(score) + + prev_cluster_assignment = cluster_assignment[:] + + return centroids, cluster_assignment + +# Mock test below +if False: # change to true to run this test case. + import sklearn.datasets as ds + dataset = ds.load_iris() + k = 3 + heterogeneity = [] + initial_centroids = get_initial_centroids(dataset['data'], k, seed=0) + centroids, cluster_assignment = kmeans(dataset['data'], k, initial_centroids, maxiter=400, + record_heterogeneity=heterogeneity, verbose=True) + plot_heterogeneity(heterogeneity, k) diff --git a/machine_learning/linear_regression.py b/machine_learning/linear_regression.py new file mode 100644 index 000000000000..eb1f019c502c --- /dev/null +++ b/machine_learning/linear_regression.py @@ -0,0 +1,109 @@ +""" +Linear regression is the most basic type of regression commonly used for +predictive analysis. The idea is preety simple, we have a dataset and we have +a feature's associated with it. The Features should be choose very cautiously +as they determine, how much our model will be able to make future predictions. +We try to set these Feature weights, over many iterations, so that they best +fits our dataset. In this particular code, i had used a CSGO dataset (ADR vs +Rating). We try to best fit a line through dataset and estimate the parameters. +""" +from __future__ import print_function + +import requests +import numpy as np + + +def collect_dataset(): + """ Collect dataset of CSGO + The dataset contains ADR vs Rating of a Player + :return : dataset obtained from the link, as matrix + """ + response = requests.get('https://raw.githubusercontent.com/yashLadha/' + + 'The_Math_of_Intelligence/master/Week1/ADRvs' + + 'Rating.csv') + lines = response.text.splitlines() + data = [] + for item in lines: + item = item.split(',') + data.append(item) + data.pop(0) # This is for removing the labels from the list + dataset = np.matrix(data) + return dataset + + +def run_steep_gradient_descent(data_x, data_y, + len_data, alpha, theta): + """ Run steep gradient descent and updates the Feature vector accordingly_ + :param data_x : contains the dataset + :param data_y : contains the output associated with each data-entry + :param len_data : length of the data_ + :param alpha : Learning rate of the model + :param theta : Feature vector (weight's for our model) + ;param return : Updated Feature's, using + curr_features - alpha_ * gradient(w.r.t. feature) + """ + n = len_data + + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_grad = np.dot(prod, data_x) + theta = theta - (alpha / n) * sum_grad + return theta + + +def sum_of_square_error(data_x, data_y, len_data, theta): + """ Return sum of square error for error calculation + :param data_x : contains our dataset + :param data_y : contains the output (result vector) + :param len_data : len of the dataset + :param theta : contains the feature vector + :return : sum of square error computed from given feature's + """ + error = 0.0 + prod = np.dot(theta, data_x.transpose()) + prod -= data_y.transpose() + sum_elem = np.sum(np.square(prod)) + error = sum_elem / (2 * len_data) + return error + + +def run_linear_regression(data_x, data_y): + """ Implement Linear regression over the dataset + :param data_x : contains our dataset + :param data_y : contains the output (result vector) + :return : feature for line of best fit (Feature vector) + """ + iterations = 100000 + alpha = 0.0001550 + + no_features = data_x.shape[1] + len_data = data_x.shape[0] - 1 + + theta = np.zeros((1, no_features)) + + for i in range(0, iterations): + theta = run_steep_gradient_descent(data_x, data_y, + len_data, alpha, theta) + error = sum_of_square_error(data_x, data_y, len_data, theta) + print('At Iteration %d - Error is %.5f ' % (i + 1, error)) + + return theta + + +def main(): + """ Driver function """ + data = collect_dataset() + + len_data = data.shape[0] + data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) + data_y = data[:, -1].astype(float) + + theta = run_linear_regression(data_x, data_y) + len_result = theta.shape[1] + print('Resultant Feature vector : ') + for i in range(0, len_result): + print('%.5f' % (theta[0, i])) + + +if __name__ == '__main__': + main() diff --git a/machine_learning/perceptron.py b/machine_learning/perceptron.py new file mode 100644 index 000000000000..8ac3e8fc69e9 --- /dev/null +++ b/machine_learning/perceptron.py @@ -0,0 +1,124 @@ +''' + + Perceptron + w = w + N * (d(k) - y) * x(k) + + Using perceptron network for oil analysis, + with Measuring of 3 parameters that represent chemical characteristics we can classify the oil, in p1 or p2 + p1 = -1 + p2 = 1 + +''' +from __future__ import print_function + +import random + + +class Perceptron: + def __init__(self, sample, exit, learn_rate=0.01, epoch_number=1000, bias=-1): + self.sample = sample + self.exit = exit + self.learn_rate = learn_rate + self.epoch_number = epoch_number + self.bias = bias + self.number_sample = len(sample) + self.col_sample = len(sample[0]) + self.weight = [] + + def trannig(self): + for sample in self.sample: + sample.insert(0, self.bias) + + for i in range(self.col_sample): + self.weight.append(random.random()) + + self.weight.insert(0, self.bias) + + epoch_count = 0 + + while True: + erro = False + for i in range(self.number_sample): + u = 0 + for j in range(self.col_sample + 1): + u = u + self.weight[j] * self.sample[i][j] + y = self.sign(u) + if y != self.exit[i]: + + for j in range(self.col_sample + 1): + + self.weight[j] = self.weight[j] + self.learn_rate * (self.exit[i] - y) * self.sample[i][j] + erro = True + #print('Epoch: \n',epoch_count) + epoch_count = epoch_count + 1 + # if you want controle the epoch or just by erro + if erro == False: + print(('\nEpoch:\n',epoch_count)) + print('------------------------\n') + #if epoch_count > self.epoch_number or not erro: + break + + def sort(self, sample): + sample.insert(0, self.bias) + u = 0 + for i in range(self.col_sample + 1): + u = u + self.weight[i] * sample[i] + + y = self.sign(u) + + if y == -1: + print(('Sample: ', sample)) + print('classification: P1') + else: + print(('Sample: ', sample)) + print('classification: P2') + + def sign(self, u): + return 1 if u >= 0 else -1 + + +samples = [ + [-0.6508, 0.1097, 4.0009], + [-1.4492, 0.8896, 4.4005], + [2.0850, 0.6876, 12.0710], + [0.2626, 1.1476, 7.7985], + [0.6418, 1.0234, 7.0427], + [0.2569, 0.6730, 8.3265], + [1.1155, 0.6043, 7.4446], + [0.0914, 0.3399, 7.0677], + [0.0121, 0.5256, 4.6316], + [-0.0429, 0.4660, 5.4323], + [0.4340, 0.6870, 8.2287], + [0.2735, 1.0287, 7.1934], + [0.4839, 0.4851, 7.4850], + [0.4089, -0.1267, 5.5019], + [1.4391, 0.1614, 8.5843], + [-0.9115, -0.1973, 2.1962], + [0.3654, 1.0475, 7.4858], + [0.2144, 0.7515, 7.1699], + [0.2013, 1.0014, 6.5489], + [0.6483, 0.2183, 5.8991], + [-0.1147, 0.2242, 7.2435], + [-0.7970, 0.8795, 3.8762], + [-1.0625, 0.6366, 2.4707], + [0.5307, 0.1285, 5.6883], + [-1.2200, 0.7777, 1.7252], + [0.3957, 0.1076, 5.6623], + [-0.1013, 0.5989, 7.1812], + [2.4482, 0.9455, 11.2095], + [2.0149, 0.6192, 10.9263], + [0.2012, 0.2611, 5.4631] + +] + +exit = [-1, -1, -1, 1, 1, -1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, 1, 1, 1, 1, -1, -1, 1, -1, 1] + +network = Perceptron(sample=samples, exit = exit, learn_rate=0.01, epoch_number=1000, bias=-1) + +network.trannig() + +while True: + sample = [] + for i in range(3): + sample.insert(i, float(input('value: '))) + network.sort(sample) \ No newline at end of file diff --git a/machine_learning/scoring_functions.py b/machine_learning/scoring_functions.py new file mode 100755 index 000000000000..861d45c1f210 --- /dev/null +++ b/machine_learning/scoring_functions.py @@ -0,0 +1,78 @@ +import numpy as np + +""" Here I implemented the scoring functions. + MAE, MSE, RMSE, RMSLE are included. + + Those are used for calculating differences between + predicted values and actual values. + + Metrics are slightly differentiated. Sometimes squared, rooted, + even log is used. + + Using log and roots can be perceived as tools for penalizing big + erors. However, using appropriate metrics depends on the situations, + and types of data +""" + +#Mean Absolute Error +def mae(predict, actual): + predict = np.array(predict) + actual = np.array(actual) + + difference = abs(predict - actual) + score = difference.mean() + + return score + +#Mean Squared Error +def mse(predict, actual): + predict = np.array(predict) + actual = np.array(actual) + + difference = predict - actual + square_diff = np.square(difference) + + score = square_diff.mean() + return score + +#Root Mean Squared Error +def rmse(predict, actual): + predict = np.array(predict) + actual = np.array(actual) + + difference = predict - actual + square_diff = np.square(difference) + mean_square_diff = square_diff.mean() + score = np.sqrt(mean_square_diff) + return score + +#Root Mean Square Logarithmic Error +def rmsle(predict, actual): + predict = np.array(predict) + actual = np.array(actual) + + log_predict = np.log(predict+1) + log_actual = np.log(actual+1) + + difference = log_predict - log_actual + square_diff = np.square(difference) + mean_square_diff = square_diff.mean() + + score = np.sqrt(mean_square_diff) + + return score + +#Mean Bias Deviation +def mbd(predict, actual): + predict = np.array(predict) + actual = np.array(actual) + + difference = predict - actual + numerator = np.sum(difference) / len(predict) + denumerator = np.sum(actual) / len(predict) + print str(numerator) + print str(denumerator) + + score = float(numerator) / denumerator * 100 + + return score \ No newline at end of file diff --git a/other/FindingPrimes.py b/other/FindingPrimes.py index 3cb10f701ab3..035a14f4a335 100644 --- a/other/FindingPrimes.py +++ b/other/FindingPrimes.py @@ -2,6 +2,9 @@ -The sieve of Eratosthenes is an algorithm used to find prime numbers, less than or equal to a given value. -Illustration: https://upload.wikimedia.org/wikipedia/commons/b/b9/Sieve_of_Eratosthenes_animation.gif ''' +from __future__ import print_function + + from math import sqrt def SOE(n): check = round(sqrt(n)) #Need not check for multiples past the square root of n diff --git a/other/LinearCongruentialGenerator.py b/other/LinearCongruentialGenerator.py new file mode 100644 index 000000000000..34abdf34eaf3 --- /dev/null +++ b/other/LinearCongruentialGenerator.py @@ -0,0 +1,35 @@ +from __future__ import print_function +__author__ = "Tobias Carryer" + +from time import time + +class LinearCongruentialGenerator(object): + """ + A pseudorandom number generator. + """ + + def __init__( self, multiplier, increment, modulo, seed=int(time()) ): + """ + These parameters are saved and used when nextNumber() is called. + + modulo is the largest number that can be generated (exclusive). The most + efficent values are powers of 2. 2^32 is a common value. + """ + self.multiplier = multiplier + self.increment = increment + self.modulo = modulo + self.seed = seed + + def next_number( self ): + """ + The smallest number that can be generated is zero. + The largest number that can be generated is modulo-1. modulo is set in the constructor. + """ + self.seed = (self.multiplier * self.seed + self.increment) % self.modulo + return self.seed + +if __name__ == "__main__": + # Show the LCG in action. + lcg = LinearCongruentialGenerator(1664525, 1013904223, 2<<31) + while True : + print(lcg.next_number()) \ No newline at end of file diff --git a/other/anagrams.py b/other/anagrams.py index 6150ea8e6892..44cd96b75f62 100644 --- a/other/anagrams.py +++ b/other/anagrams.py @@ -1,3 +1,4 @@ +from __future__ import print_function import collections, pprint, time, os start_time = time.time() @@ -25,4 +26,4 @@ def anagram(myword): file.write(pprint.pformat(all_anagrams)) total_time = round(time.time() - start_time, 2) -print('Done [', total_time, 'seconds ]') +print(('Done [', total_time, 'seconds ]')) diff --git a/other/binary_exponentiation.py b/other/binary_exponentiation.py new file mode 100644 index 000000000000..1a30fb8fd266 --- /dev/null +++ b/other/binary_exponentiation.py @@ -0,0 +1,49 @@ +""" +* Binary Exponentiation for Powers +* This is a method to find a^b in a time complexity of O(log b) +* This is one of the most commonly used methods of finding powers. +* Also useful in cases where solution to (a^b)%c is required, +* where a,b,c can be numbers over the computers calculation limits. +* Done using iteration, can also be done using recursion + +* @author chinmoy159 +* @version 1.0 dated 10/08/2017 +""" + + +def b_expo(a, b): + res = 1 + while b > 0: + if b&1: + res *= a + + a *= a + b >>= 1 + + return res + + +def b_expo_mod(a, b, c): + res = 1 + while b > 0: + if b&1: + res = ((res%c) * (a%c)) % c + + a *= a + b >>= 1 + + return res + +""" +* Wondering how this method works ! +* It's pretty simple. +* Let's say you need to calculate a ^ b +* RULE 1 : a ^ b = (a*a) ^ (b/2) ---- example : 4 ^ 4 = (4*4) ^ (4/2) = 16 ^ 2 +* RULE 2 : IF b is ODD, then ---- a ^ b = a * (a ^ (b - 1)) :: where (b - 1) is even. +* Once b is even, repeat the process to get a ^ b +* Repeat the process till b = 1 OR b = 0, because a^1 = a AND a^0 = 1 +* +* As far as the modulo is concerned, +* the fact : (a*b) % c = ((a%c) * (b%c)) % c +* Now apply RULE 1 OR 2 whichever is required. +""" diff --git a/other/binary_exponentiation_2.py b/other/binary_exponentiation_2.py new file mode 100644 index 000000000000..217a616c99fb --- /dev/null +++ b/other/binary_exponentiation_2.py @@ -0,0 +1,50 @@ +""" +* Binary Exponentiation with Multiplication +* This is a method to find a*b in a time complexity of O(log b) +* This is one of the most commonly used methods of finding result of multiplication. +* Also useful in cases where solution to (a*b)%c is required, +* where a,b,c can be numbers over the computers calculation limits. +* Done using iteration, can also be done using recursion + +* @author chinmoy159 +* @version 1.0 dated 10/08/2017 +""" + + +def b_expo(a, b): + res = 0 + while b > 0: + if b&1: + res += a + + a += a + b >>= 1 + + return res + + +def b_expo_mod(a, b, c): + res = 0 + while b > 0: + if b&1: + res = ((res%c) + (a%c)) % c + + a += a + b >>= 1 + + return res + + +""" +* Wondering how this method works ! +* It's pretty simple. +* Let's say you need to calculate a ^ b +* RULE 1 : a * b = (a+a) * (b/2) ---- example : 4 * 4 = (4+4) * (4/2) = 8 * 2 +* RULE 2 : IF b is ODD, then ---- a * b = a + (a * (b - 1)) :: where (b - 1) is even. +* Once b is even, repeat the process to get a * b +* Repeat the process till b = 1 OR b = 0, because a*1 = a AND a*0 = 0 +* +* As far as the modulo is concerned, +* the fact : (a+b) % c = ((a%c) + (b%c)) % c +* Now apply RULE 1 OR 2, whichever is required. +""" diff --git a/other/euclidean_gcd.py b/other/euclidean_gcd.py new file mode 100644 index 000000000000..30853e172076 --- /dev/null +++ b/other/euclidean_gcd.py @@ -0,0 +1,19 @@ +from __future__ import print_function +# https://en.wikipedia.org/wiki/Euclidean_algorithm + +def euclidean_gcd(a, b): + while b: + t = b + b = a % b + a = t + return a + +def main(): + print("GCD(3, 5) = " + str(euclidean_gcd(3, 5))) + print("GCD(5, 3) = " + str(euclidean_gcd(5, 3))) + print("GCD(1, 3) = " + str(euclidean_gcd(1, 3))) + print("GCD(3, 6) = " + str(euclidean_gcd(3, 6))) + print("GCD(6, 3) = " + str(euclidean_gcd(6, 3))) + +if __name__ == '__main__': + main() diff --git a/other/game_of_life/game_o_life.py b/other/game_of_life/game_o_life.py new file mode 100644 index 000000000000..32ebe0fc1301 --- /dev/null +++ b/other/game_of_life/game_o_life.py @@ -0,0 +1,118 @@ +'''Conway's Game Of Life, Author Anurag Kumar(mailto:anuragkumarak95@gmail.com) + +Requirements: + - numpy + - random + - time + - matplotlib + +Python: + - 3.5 + +Usage: + - $python3 game_o_life + +Game-Of-Life Rules: + + 1. + Any live cell with fewer than two live neighbours + dies, as if caused by under-population. + 2. + Any live cell with two or three live neighbours lives + on to the next generation. + 3. + Any live cell with more than three live neighbours + dies, as if by over-population. + 4. + Any dead cell with exactly three live neighbours be- + comes a live cell, as if by reproduction. + ''' +import numpy as np +import random, time, sys +from matplotlib import pyplot as plt +import matplotlib.animation as animation +from matplotlib.colors import ListedColormap + +usage_doc='Usage of script: script_nama ' + +choice = [0]*100 + [1]*10 +random.shuffle(choice) + +def create_canvas(size): + canvas = [ [False for i in range(size)] for j in range(size)] + return canvas + +def seed(canvas): + for i,row in enumerate(canvas): + for j,_ in enumerate(row): + canvas[i][j]=bool(random.getrandbits(1)) + +def run(canvas): + ''' This function runs the rules of game through all points, and changes their status accordingly.(in the same canvas) + @Args: + -- + canvas : canvas of population to run the rules on. + + @returns: + -- + None + ''' + canvas = np.array(canvas) + next_gen_canvas = np.array(create_canvas(canvas.shape[0])) + for r, row in enumerate(canvas): + for c, pt in enumerate(row): + # print(r-1,r+2,c-1,c+2) + next_gen_canvas[r][c] = __judge_point(pt,canvas[r-1:r+2,c-1:c+2]) + + canvas = next_gen_canvas + del next_gen_canvas # cleaning memory as we move on. + return canvas.tolist() + +def __judge_point(pt,neighbours): + dead = 0 + alive = 0 + # finding dead or alive neighbours count. + for i in neighbours: + for status in i: + if status: alive+=1 + else: dead+=1 + + # handling duplicate entry for focus pt. + if pt : alive-=1 + else : dead-=1 + + # running the rules of game here. + state = pt + if pt: + if alive<2: + state=False + elif alive==2 or alive==3: + state=True + elif alive>3: + state=False + else: + if alive==3: + state=True + + return state + + +if __name__=='__main__': + if len(sys.argv) != 2: raise Exception(usage_doc) + + canvas_size = int(sys.argv[1]) + # main working structure of this module. + c=create_canvas(canvas_size) + seed(c) + fig, ax = plt.subplots() + fig.show() + cmap = ListedColormap(['w','k']) + try: + while True: + c = run(c) + ax.matshow(c,cmap=cmap) + fig.canvas.draw() + ax.cla() + except KeyboardInterrupt: + # do nothing. + pass diff --git a/other/game_of_life/sample.gif b/other/game_of_life/sample.gif new file mode 100644 index 000000000000..0bf2ae1f95e4 Binary files /dev/null and b/other/game_of_life/sample.gif differ diff --git a/other/nested_brackets.py b/other/nested_brackets.py index ea681ded554f..76677d56439a 100644 --- a/other/nested_brackets.py +++ b/other/nested_brackets.py @@ -13,33 +13,26 @@ returns true if S is nested and false otherwise. ''' +from __future__ import print_function def is_balanced(S): stack = [] - + open_brackets = set({'(', '[', '{'}) + closed_brackets = set({')', ']', '}'}) + open_to_closed = dict({'{':'}', '[':']', '(':')'}) + for i in range(len(S)): - - if S[i] == '(' or S[i] == '{' or S[i] == '[': + + if S[i] in open_brackets: stack.append(S[i]) - - else: - - if len(stack) > 0: - - pair = stack.pop() + S[i] - - if pair != '[]' and pair != '()' and pair != '{}': - return False - - else: + + elif S[i] in closed_brackets: + if len(stack) == 0 or (len(stack) > 0 and open_to_closed[stack.pop()] != S[i]): return False - - if len(stack) == 0: - return True - - return False + + return len(stack) == 0 def main(): @@ -47,10 +40,10 @@ def main(): S = input("Enter sequence of brackets: ") if is_balanced(S): - print(S, "is balanced") - + print((S, "is balanced")) + else: - print(S, "is not balanced") + print((S, "is not balanced")) if __name__ == "__main__": diff --git a/other/password_generator.py b/other/password_generator.py index 10ba77088eac..8916079fc758 100644 --- a/other/password_generator.py +++ b/other/password_generator.py @@ -1,3 +1,4 @@ +from __future__ import print_function import string import random @@ -12,3 +13,23 @@ password = ''.join(random.choice(chars) for x in range(random.randint(min_length, max_length))) print('Password: ' + password) print('[ If you are thinking of using this passsword, You better save it. ]') + + +# ALTERNATIVE METHODS +# ctbi= characters that must be in password +# i= how many letters or characters the password length will be +def password_generator(ctbi, i): + # Password generator = full boot with random_number, random_letters, and random_character FUNCTIONS + pass # Put your code here... + + +def random_number(ctbi, i): + pass # Put your code here... + + +def random_letters(ctbi, i): + pass # Put your code here... + + +def random_characters(ctbi, i): + pass # Put your code here... diff --git a/other/primelib.py b/other/primelib.py new file mode 100644 index 000000000000..16c44a0938a5 --- /dev/null +++ b/other/primelib.py @@ -0,0 +1,605 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 5 16:44:23 2017 + +@author: Christian Bender + +This python library contains some useful functions to deal with +prime numbers and whole numbers. + +Overview: + +isPrime(number) +sieveEr(N) +getPrimeNumbers(N) +primeFactorization(number) +greatestPrimeFactor(number) +smallestPrimeFactor(number) +getPrime(n) +getPrimesBetween(pNumber1, pNumber2) + +---- + +isEven(number) +isOdd(number) +gcd(number1, number2) // greatest common divisor +kgV(number1, number2) // least common multiple +getDivisors(number) // all divisors of 'number' inclusive 1, number +isPerfectNumber(number) + +NEW-FUNCTIONS + +simplifyFraction(numerator, denominator) +factorial (n) // n! +fib (n) // calculate the n-th fibonacci term. + +----- + +goldbach(number) // Goldbach's assumption + +""" + +def isPrime(number): + """ + input: positive integer 'number' + returns true if 'number' is prime otherwise false. + """ + import math # for function sqrt + + # precondition + assert isinstance(number,int) and (number >= 0) , \ + "'number' must been an int and positive" + + status = True + + # 0 and 1 are none primes. + if number <= 1: + status = False + + for divisor in range(2,int(round(math.sqrt(number)))+1): + + # if 'number' divisible by 'divisor' then sets 'status' + # of false and break up the loop. + if number % divisor == 0: + status = False + break + + # precondition + assert isinstance(status,bool), "'status' must been from type bool" + + return status + +# ------------------------------------------ + +def sieveEr(N): + """ + input: positive integer 'N' > 2 + returns a list of prime numbers from 2 up to N. + + This function implements the algorithm called + sieve of erathostenes. + + """ + + # precondition + assert isinstance(N,int) and (N > 2), "'N' must been an int and > 2" + + # beginList: conatins all natural numbers from 2 upt to N + beginList = [x for x in range(2,N+1)] + + ans = [] # this list will be returns. + + # actual sieve of erathostenes + for i in range(len(beginList)): + + for j in range(i+1,len(beginList)): + + if (beginList[i] != 0) and \ + (beginList[j] % beginList[i] == 0): + beginList[j] = 0 + + # filters actual prime numbers. + ans = [x for x in beginList if x != 0] + + # precondition + assert isinstance(ans,list), "'ans' must been from type list" + + return ans + + +# -------------------------------- + +def getPrimeNumbers(N): + """ + input: positive integer 'N' > 2 + returns a list of prime numbers from 2 up to N (inclusive) + This function is more efficient as function 'sieveEr(...)' + """ + + # precondition + assert isinstance(N,int) and (N > 2), "'N' must been an int and > 2" + + ans = [] + + # iterates over all numbers between 2 up to N+1 + # if a number is prime then appends to list 'ans' + for number in range(2,N+1): + + if isPrime(number): + + ans.append(number) + + # precondition + assert isinstance(ans,list), "'ans' must been from type list" + + return ans + + +# ----------------------------------------- + +def primeFactorization(number): + """ + input: positive integer 'number' + returns a list of the prime number factors of 'number' + """ + + import math # for function sqrt + + # precondition + assert isinstance(number,int) and number >= 0, \ + "'number' must been an int and >= 0" + + ans = [] # this list will be returns of the function. + + # potential prime number factors. + + factor = 2 + + quotient = number + + + if number == 0 or number == 1: + + ans.append(number) + + # if 'number' not prime then builds the prime factorization of 'number' + elif not isPrime(number): + + while (quotient != 1): + + if isPrime(factor) and (quotient % factor == 0): + ans.append(factor) + quotient /= factor + else: + factor += 1 + + else: + ans.append(number) + + # precondition + assert isinstance(ans,list), "'ans' must been from type list" + + return ans + + +# ----------------------------------------- + +def greatestPrimeFactor(number): + """ + input: positive integer 'number' >= 0 + returns the greatest prime number factor of 'number' + """ + + # precondition + assert isinstance(number,int) and (number >= 0), \ + "'number' bust been an int and >= 0" + + ans = 0 + + # prime factorization of 'number' + primeFactors = primeFactorization(number) + + ans = max(primeFactors) + + # precondition + assert isinstance(ans,int), "'ans' must been from type int" + + return ans + + +# ---------------------------------------------- + + +def smallestPrimeFactor(number): + """ + input: integer 'number' >= 0 + returns the smallest prime number factor of 'number' + """ + + # precondition + assert isinstance(number,int) and (number >= 0), \ + "'number' bust been an int and >= 0" + + ans = 0 + + # prime factorization of 'number' + primeFactors = primeFactorization(number) + + ans = min(primeFactors) + + # precondition + assert isinstance(ans,int), "'ans' must been from type int" + + return ans + + +# ---------------------- + +def isEven(number): + """ + input: integer 'number' + returns true if 'number' is even, otherwise false. + """ + + # precondition + assert isinstance(number, int), "'number' must been an int" + assert isinstance(number % 2 == 0, bool), "compare bust been from type bool" + + return number % 2 == 0 + +# ------------------------ + +def isOdd(number): + """ + input: integer 'number' + returns true if 'number' is odd, otherwise false. + """ + + # precondition + assert isinstance(number, int), "'number' must been an int" + assert isinstance(number % 2 != 0, bool), "compare bust been from type bool" + + return number % 2 != 0 + +# ------------------------ + + +def goldbach(number): + """ + Goldbach's assumption + input: a even positive integer 'number' > 2 + returns a list of two prime numbers whose sum is equal to 'number' + """ + + # precondition + assert isinstance(number,int) and (number > 2) and isEven(number), \ + "'number' must been an int, even and > 2" + + ans = [] # this list will returned + + # creates a list of prime numbers between 2 up to 'number' + primeNumbers = getPrimeNumbers(number) + lenPN = len(primeNumbers) + + # run variable for while-loops. + i = 0 + j = 1 + + # exit variable. for break up the loops + loop = True + + while (i < lenPN and loop): + + j = i+1; + + + while (j < lenPN and loop): + + if primeNumbers[i] + primeNumbers[j] == number: + loop = False + ans.append(primeNumbers[i]) + ans.append(primeNumbers[j]) + + j += 1; + + + i += 1 + + # precondition + assert isinstance(ans,list) and (len(ans) == 2) and \ + (ans[0] + ans[1] == number) and isPrime(ans[0]) and isPrime(ans[1]), \ + "'ans' must contains two primes. And sum of elements must been eq 'number'" + + return ans + +# ---------------------------------------------- + +def gcd(number1,number2): + """ + Greatest common divisor + input: two positive integer 'number1' and 'number2' + returns the greatest common divisor of 'number1' and 'number2' + """ + + # precondition + assert isinstance(number1,int) and isinstance(number2,int) \ + and (number1 >= 0) and (number2 >= 0), \ + "'number1' and 'number2' must been positive integer." + + rest = 0 + + while number2 != 0: + + rest = number1 % number2 + number1 = number2 + number2 = rest + + # precondition + assert isinstance(number1,int) and (number1 >= 0), \ + "'number' must been from type int and positive" + + return number1 + +# ---------------------------------------------------- + +def kgV(number1, number2): + """ + Least common multiple + input: two positive integer 'number1' and 'number2' + returns the least common multiple of 'number1' and 'number2' + """ + + # precondition + assert isinstance(number1,int) and isinstance(number2,int) \ + and (number1 >= 1) and (number2 >= 1), \ + "'number1' and 'number2' must been positive integer." + + ans = 1 # actual answer that will be return. + + # for kgV (x,1) + if number1 > 1 and number2 > 1: + + # builds the prime factorization of 'number1' and 'number2' + primeFac1 = primeFactorization(number1) + primeFac2 = primeFactorization(number2) + + elif number1 == 1 or number2 == 1: + + primeFac1 = [] + primeFac2 = [] + ans = max(number1,number2) + + count1 = 0 + count2 = 0 + + done = [] # captured numbers int both 'primeFac1' and 'primeFac2' + + # iterates through primeFac1 + for n in primeFac1: + + if n not in done: + + if n in primeFac2: + + count1 = primeFac1.count(n) + count2 = primeFac2.count(n) + + for i in range(max(count1,count2)): + ans *= n + + else: + + count1 = primeFac1.count(n) + + for i in range(count1): + ans *= n + + done.append(n) + + # iterates through primeFac2 + for n in primeFac2: + + if n not in done: + + count2 = primeFac2.count(n) + + for i in range(count2): + ans *= n + + done.append(n) + + # precondition + assert isinstance(ans,int) and (ans >= 0), \ + "'ans' must been from type int and positive" + + return ans + +# ---------------------------------- + +def getPrime(n): + """ + Gets the n-th prime number. + input: positive integer 'n' >= 0 + returns the n-th prime number, beginning at index 0 + """ + + # precondition + assert isinstance(n,int) and (n >= 0), "'number' must been a positive int" + + index = 0 + ans = 2 # this variable holds the answer + + while index < n: + + index += 1 + + ans += 1 # counts to the next number + + # if ans not prime then + # runs to the next prime number. + while not isPrime(ans): + ans += 1 + + # precondition + assert isinstance(ans,int) and isPrime(ans), \ + "'ans' must been a prime number and from type int" + + return ans + +# --------------------------------------------------- + +def getPrimesBetween(pNumber1, pNumber2): + """ + input: prime numbers 'pNumber1' and 'pNumber2' + pNumber1 < pNumber2 + returns a list of all prime numbers between 'pNumber1' (exclusiv) + and 'pNumber2' (exclusiv) + """ + + # precondition + assert isPrime(pNumber1) and isPrime(pNumber2) and (pNumber1 < pNumber2), \ + "The arguments must been prime numbers and 'pNumber1' < 'pNumber2'" + + number = pNumber1 + 1 # jump to the next number + + ans = [] # this list will be returns. + + # if number is not prime then + # fetch the next prime number. + while not isPrime(number): + number += 1 + + while number < pNumber2: + + ans.append(number) + + number += 1 + + # fetch the next prime number. + while not isPrime(number): + number += 1 + + # precondition + assert isinstance(ans,list) and ans[0] != pNumber1 \ + and ans[len(ans)-1] != pNumber2, \ + "'ans' must been a list without the arguments" + + # 'ans' contains not 'pNumber1' and 'pNumber2' ! + return ans + +# ---------------------------------------------------- + +def getDivisors(n): + """ + input: positive integer 'n' >= 1 + returns all divisors of n (inclusive 1 and 'n') + """ + + # precondition + assert isinstance(n,int) and (n >= 1), "'n' must been int and >= 1" + + from math import sqrt + + ans = [] # will be returned. + + for divisor in range(1,n+1): + + if n % divisor == 0: + ans.append(divisor) + + + #precondition + assert ans[0] == 1 and ans[len(ans)-1] == n, \ + "Error in function getDivisiors(...)" + + + return ans + + +# ---------------------------------------------------- + + +def isPerfectNumber(number): + """ + input: positive integer 'number' > 1 + returns true if 'number' is a perfect number otherwise false. + """ + + # precondition + assert isinstance(number,int) and (number > 1), \ + "'number' must been an int and >= 1" + + divisors = getDivisors(number) + + # precondition + assert isinstance(divisors,list) and(divisors[0] == 1) and \ + (divisors[len(divisors)-1] == number), \ + "Error in help-function getDivisiors(...)" + + # summed all divisors up to 'number' (exclusive), hence [:-1] + return sum(divisors[:-1]) == number + +# ------------------------------------------------------------ + +def simplifyFraction(numerator, denominator): + """ + input: two integer 'numerator' and 'denominator' + assumes: 'denominator' != 0 + returns: a tuple with simplify numerator and denominator. + """ + + # precondition + assert isinstance(numerator, int) and isinstance(denominator,int) \ + and (denominator != 0), \ + "The arguments must been from type int and 'denominator' != 0" + + # build the greatest common divisor of numerator and denominator. + gcdOfFraction = gcd(abs(numerator), abs(denominator)) + + # precondition + assert isinstance(gcdOfFraction, int) and (numerator % gcdOfFraction == 0) \ + and (denominator % gcdOfFraction == 0), \ + "Error in function gcd(...,...)" + + return (numerator // gcdOfFraction, denominator // gcdOfFraction) + +# ----------------------------------------------------------------- + +def factorial(n): + """ + input: positive integer 'n' + returns the factorial of 'n' (n!) + """ + + # precondition + assert isinstance(n,int) and (n >= 0), "'n' must been a int and >= 0" + + ans = 1 # this will be return. + + for factor in range(1,n+1): + ans *= factor + + return ans + +# ------------------------------------------------------------------- + +def fib(n): + """ + input: positive integer 'n' + returns the n-th fibonacci term , indexing by 0 + """ + + # precondition + assert isinstance(n, int) and (n >= 0), "'n' must been an int and >= 0" + + tmp = 0 + fib1 = 1 + ans = 1 # this will be return + + for i in range(n-1): + + tmp = ans + ans += fib1 + fib1 = tmp + + return ans diff --git a/other/sierpinski_triangle.py b/other/sierpinski_triangle.py new file mode 100644 index 000000000000..e566f693f63b --- /dev/null +++ b/other/sierpinski_triangle.py @@ -0,0 +1,64 @@ +'''Author Anurag Kumar | anuragkumarak95@gmail.com | git/anuragkumarak95 + +Simple example of Fractal generation using recursive function. + +What is Sierpinski Triangle? +>>The Sierpinski triangle (also with the original orthography Sierpinski), also called the Sierpinski gasket or the Sierpinski Sieve, +is a fractal and attractive fixed set with the overall shape of an equilateral triangle, subdivided recursively into smaller +equilateral triangles. Originally constructed as a curve, this is one of the basic examples of self-similar sets, i.e., +it is a mathematically generated pattern that can be reproducible at any magnification or reduction. It is named after +the Polish mathematician Wacław Sierpinski, but appeared as a decorative pattern many centuries prior to the work of Sierpinski. + +Requirements(pip): + - turtle + +Python: + - 2.6 + +Usage: + - $python sierpinski_triangle.py + +Credits: This code was written by editing the code from http://www.lpb-riannetrujillo.com/blog/python-fractal/ + +''' +import turtle +import sys +PROGNAME = 'Sierpinski Triangle' +if len(sys.argv) !=2: + raise Exception('right format for using this script: $python fractals.py ') + +myPen = turtle.Turtle() +myPen.ht() +myPen.speed(5) +myPen.pencolor('red') + +points = [[-175,-125],[0,175],[175,-125]] #size of triangle + +def getMid(p1,p2): + return ( (p1[0]+p2[0]) / 2, (p1[1] + p2[1]) / 2) #find midpoint + +def triangle(points,depth): + + myPen.up() + myPen.goto(points[0][0],points[0][1]) + myPen.down() + myPen.goto(points[1][0],points[1][1]) + myPen.goto(points[2][0],points[2][1]) + myPen.goto(points[0][0],points[0][1]) + + if depth>0: + triangle([points[0], + getMid(points[0], points[1]), + getMid(points[0], points[2])], + depth-1) + triangle([points[1], + getMid(points[0], points[1]), + getMid(points[1], points[2])], + depth-1) + triangle([points[2], + getMid(points[2], points[1]), + getMid(points[0], points[2])], + depth-1) + + +triangle(points,int(sys.argv[1])) \ No newline at end of file diff --git a/other/tower_of_hanoi.py b/other/tower_of_hanoi.py index de0cb6218705..dc15b2ce8e58 100644 --- a/other/tower_of_hanoi.py +++ b/other/tower_of_hanoi.py @@ -1,3 +1,4 @@ +from __future__ import print_function def moveTower(height, fromPole, toPole, withPole): ''' >>> moveTower(3, 'A', 'B', 'C') @@ -15,7 +16,7 @@ def moveTower(height, fromPole, toPole, withPole): moveTower(height-1, withPole, toPole, fromPole) def moveDisk(fp,tp): - print('moving disk from', fp, 'to', tp) + print(('moving disk from', fp, 'to', tp)) def main(): height = int(input('Height of hanoi: ')) diff --git a/other/two-sum.py b/other/two-sum.py new file mode 100644 index 000000000000..d4484aa85505 --- /dev/null +++ b/other/two-sum.py @@ -0,0 +1,29 @@ +""" +Given an array of integers, return indices of the two numbers such that they add up to a specific target. + +You may assume that each input would have exactly one solution, and you may not use the same element twice. + +Example: +Given nums = [2, 7, 11, 15], target = 9, + +Because nums[0] + nums[1] = 2 + 7 = 9, +return [0, 1]. +""" +from __future__ import print_function + +def twoSum(nums, target): + """ + :type nums: List[int] + :type target: int + :rtype: List[int] + """ + chk_map = {} + for index, val in enumerate(nums): + compl = target - val + if compl in chk_map: + indices = [chk_map[compl], index] + print(indices) + return [indices] + else: + chk_map[val] = index + return False diff --git a/other/word_patterns.py b/other/word_patterns.py index 827c9fa8b412..c33d520087f7 100644 --- a/other/word_patterns.py +++ b/other/word_patterns.py @@ -1,3 +1,4 @@ +from __future__ import print_function import pprint, time def getWordPattern(word): @@ -32,7 +33,7 @@ def main(): fo.write(pprint.pformat(allPatterns)) totalTime = round(time.time() - startTime, 2) - print('Done! [', totalTime, 'seconds ]') + print(('Done! [', totalTime, 'seconds ]')) if __name__ == '__main__': main() diff --git a/searches/binary_search.py b/searches/binary_search.py index f9e27d3ff85b..93bf189cc08f 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -12,6 +12,11 @@ from __future__ import print_function import bisect +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + def binary_search(sorted_collection, item): """Pure implementation of binary search algorithm in Python @@ -80,7 +85,40 @@ def binary_search_std_lib(sorted_collection, item): return index return None +def binary_search_by_recursion(sorted_collection, item, left, right): + + """Pure implementation of binary search algorithm in Python by recursion + + Be careful collection must be sorted, otherwise result will be + unpredictable + First recursion should be started with left=0 and right=(len(sorted_collection)-1) + + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + + Examples: + >>> binary_search_std_lib([0, 5, 7, 10, 15], 0) + 0 + + >>> binary_search_std_lib([0, 5, 7, 10, 15], 15) + 4 + >>> binary_search_std_lib([0, 5, 7, 10, 15], 5) + 1 + + >>> binary_search_std_lib([0, 5, 7, 10, 15], 6) + + """ + midpoint = left + (right - left) // 2 + + if sorted_collection[midpoint] == item: + return midpoint + elif sorted_collection[midpoint] > item: + return binary_search_by_recursion(sorted_collection, item, left, midpoint-1) + else: + return binary_search_by_recursion(sorted_collection, item, midpoint+1, right) + def __assert_sorted(collection): """Check if collection is sorted, if not - raises :py:class:`ValueError` @@ -104,23 +142,14 @@ def __assert_sorted(collection): if __name__ == '__main__': import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by coma:\n') + user_input = raw_input('Enter numbers separated by comma:\n').strip() collection = [int(item) for item in user_input.split(',')] try: __assert_sorted(collection) except ValueError: sys.exit('Sequence must be sorted to apply binary search') - target_input = input_function( - 'Enter a single number to be found in the list:\n' - ) + target_input = raw_input('Enter a single number to be found in the list:\n') target = int(target_input) result = binary_search(collection, target) if result is not None: diff --git a/searches/interpolation_search.py b/searches/interpolation_search.py new file mode 100644 index 000000000000..7b765c454d06 --- /dev/null +++ b/searches/interpolation_search.py @@ -0,0 +1,99 @@ +""" +This is pure python implementation of interpolation search algorithm +""" +from __future__ import print_function +import bisect + +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + + +def interpolation_search(sorted_collection, item): + """Pure implementation of interpolation search algorithm in Python + Be careful collection must be sorted, otherwise result will be + unpredictable + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + left = 0 + right = len(sorted_collection) - 1 + + while left <= right: + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + current_item = sorted_collection[point] + if current_item == item: + return point + else: + if item < current_item: + right = point - 1 + else: + left = point + 1 + return None + + +def interpolation_search_by_recursion(sorted_collection, item, left, right): + + """Pure implementation of interpolation search algorithm in Python by recursion + Be careful collection must be sorted, otherwise result will be + unpredictable + First recursion should be started with left=0 and right=(len(sorted_collection)-1) + :param sorted_collection: some sorted collection with comparable items + :param item: item value to search + :return: index of found item or None if item is not found + """ + point = left + ((item - sorted_collection[left]) * (right - left)) // (sorted_collection[right] - sorted_collection[left]) + + #out of range check + if point<0 or point>=len(sorted_collection): + return None + + if sorted_collection[point] == item: + return point + elif sorted_collection[point] > item: + return interpolation_search_by_recursion(sorted_collection, item, left, point-1) + else: + return interpolation_search_by_recursion(sorted_collection, item, point+1, right) + +def __assert_sorted(collection): + """Check if collection is sorted, if not - raises :py:class:`ValueError` + :param collection: collection + :return: True if collection is sorted + :raise: :py:class:`ValueError` if collection is not sorted + Examples: + >>> __assert_sorted([0, 1, 2, 4]) + True + >>> __assert_sorted([10, -1, 5]) + Traceback (most recent call last): + ... + ValueError: Collection must be sorted + """ + if collection != sorted(collection): + raise ValueError('Collection must be sorted') + return True + + +if __name__ == '__main__': + import sys + + user_input = raw_input('Enter numbers separated by comma:\n').strip() + collection = [int(item) for item in user_input.split(',')] + try: + __assert_sorted(collection) + except ValueError: + sys.exit('Sequence must be sorted to apply interpolation search') + + target_input = raw_input('Enter a single number to be found in the list:\n') + target = int(target_input) + result = interpolation_search(collection, target) + if result is not None: + print('{} found at positions: {}'.format(target, result)) + else: + print('Not found') diff --git a/searches/jump_search.py b/searches/jump_search.py new file mode 100644 index 000000000000..4cff92bb585c --- /dev/null +++ b/searches/jump_search.py @@ -0,0 +1,26 @@ +from __future__ import print_function +import math +def jump_search(arr, x): + n = len(arr) + step = math.floor(math.sqrt(n)) + prev = 0 + while arr[min(step, n)-1] < x: + prev = step + step += math.floor(math.sqrt(n)) + if prev >= n: + return -1 + + while arr[prev] < x: + prev = prev + 1 + if prev == min(step, n): + return -1 + if arr[prev] == x: + return prev + return -1 + + + +arr = [ 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610] +x = 55 +index = jump_search(arr, x) +print("\nNumber " + str(x) +" is at index " + str(index)); \ No newline at end of file diff --git a/searches/linear_search.py b/searches/linear_search.py index 24479e45b6af..50c6eaad5e9b 100644 --- a/searches/linear_search.py +++ b/searches/linear_search.py @@ -11,6 +11,10 @@ """ from __future__ import print_function +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 def linear_search(sequence, target): """Pure implementation of linear search algorithm in Python @@ -39,21 +43,10 @@ def linear_search(sequence, target): if __name__ == '__main__': - import sys - - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by coma:\n') + user_input = raw_input('Enter numbers separated by coma:\n').strip() sequence = [int(item) for item in user_input.split(',')] - target_input = input_function( - 'Enter a single number to be found in the list:\n' - ) + target_input = raw_input('Enter a single number to be found in the list:\n') target = int(target_input) result = linear_search(sequence, target) if result is not None: diff --git a/searches/quick_select.py b/searches/quick_select.py new file mode 100644 index 000000000000..e5e2ce99c682 --- /dev/null +++ b/searches/quick_select.py @@ -0,0 +1,47 @@ +import collections +import sys +import random +import time +import math +""" +A python implementation of the quick select algorithm, which is efficient for calculating the value that would appear in the index of a list if it would be sorted, even if it is not already sorted +https://en.wikipedia.org/wiki/Quickselect +""" +def _partition(data, pivot): + """ + Three way partition the data into smaller, equal and greater lists, + in relationship to the pivot + :param data: The data to be sorted (a list) + :param pivot: The value to partition the data on + :return: Three list: smaller, equal and greater + """ + less, equal, greater = [], [], [] + for element in data: + if element.address < pivot.address: + less.append(element) + elif element.address > pivot.address: + greater.append(element) + else: + equal.append(element) + return less, equal, greater + + def quickSelect(list, k): + #k = len(list) // 2 when trying to find the median (index that value would be when list is sorted) + smaller = [] + larger = [] + pivot = random.randint(0, len(list) - 1) + pivot = list[pivot] + count = 0 + smaller, equal, larger =_partition(list, pivot) + count = len(equal) + m = len(smaller) + + #k is the pivot + if m <= k < m + count: + return pivot + # must be in smaller + elif m > k: + return quickSelect(smaller, k) + #must be in larger + else: + return quickSelect(larger, k - (m + count)) diff --git a/searches/ternary_search.py b/searches/ternary_search.py new file mode 100644 index 000000000000..c610f9b3c6da --- /dev/null +++ b/searches/ternary_search.py @@ -0,0 +1,107 @@ +''' +This is a type of divide and conquer algorithm which divides the search space into +3 parts and finds the target value based on the property of the array or list +(usually monotonic property). + +Time Complexity : O(log3 N) +Space Complexity : O(1) +''' +from __future__ import print_function + +import sys + +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + +# This is the precision for this function which can be altered. +# It is recommended for users to keep this number greater than or equal to 10. +precision = 10 + +# This is the linear search that will occur after the search space has become smaller. +def lin_search(left, right, A, target): + for i in range(left, right+1): + if(A[i] == target): + return i + +# This is the iterative method of the ternary search algorithm. +def ite_ternary_search(A, target): + left = 0 + right = len(A) - 1; + while(True): + if(left maxValue: + maxValue = myList[i] + + # Initialize buckets + bucketCount = math.floor((maxValue - minValue) / bucketSize) + 1 + buckets = [] + for i in range(0, bucketCount): + buckets.append([]) + + # For putting values in buckets + for i in range(0, len(myList)): + buckets[math.floor((myList[i] - minValue) / bucketSize)].append(myList[i]) + + # Sort buckets and place back into input array + sortedArray = [] + for i in range(0, len(buckets)): + insertionSort(buckets[i]) + for j in range(0, len(buckets[i])): + sortedArray.append(buckets[i][j]) + + return sortedArray + +if __name__ == '__main__': + sortedArray = bucketSort([12, 23, 4, 5, 3, 2, 12, 81, 56, 95]) + print(sortedArray) diff --git a/sorts/cocktail_shaker_sort.py b/sorts/cocktail_shaker_sort.py index a2122463274e..8ad3383bbe9f 100644 --- a/sorts/cocktail_shaker_sort.py +++ b/sorts/cocktail_shaker_sort.py @@ -21,16 +21,12 @@ def cocktail_shaker_sort(unsorted): return unsorted if __name__ == '__main__': - import sys - - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 - user_input = input_function('Enter numbers separated by a comma:\n') + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] cocktail_shaker_sort(unsorted) - print(unsorted) \ No newline at end of file + print(unsorted) diff --git a/sorts/counting_sort.py b/sorts/counting_sort.py new file mode 100644 index 000000000000..4ca682b13cca --- /dev/null +++ b/sorts/counting_sort.py @@ -0,0 +1,69 @@ +""" +This is pure python implementation of counting sort algorithm +For doctests run following command: +python -m doctest -v counting_sort.py +or +python3 -m doctest -v counting_sort.py +For manual testing run: +python counting_sort.py +""" + +from __future__ import print_function + + +def counting_sort(collection): + """Pure implementation of counting sort algorithm in Python + :param collection: some mutable ordered collection with heterogeneous + comparable items inside + :return: the same collection ordered by ascending + Examples: + >>> counting_sort([0, 5, 3, 2, 2]) + [0, 2, 2, 3, 5] + >>> counting_sort([]) + [] + >>> counting_sort([-2, -5, -45]) + [-45, -5, -2] + """ + # if the collection is empty, returns empty + if collection == []: + return [] + + # get some information about the collection + coll_len = len(collection) + coll_max = max(collection) + coll_min = min(collection) + + # create the counting array + counting_arr_length = coll_max + 1 - coll_min + counting_arr = [0] * counting_arr_length + + # count how much a number appears in the collection + for number in collection: + counting_arr[number - coll_min] += 1 + + # sum each position with it's predecessors. now, counting_arr[i] tells + # us how many elements <= i has in the collection + for i in range(1, counting_arr_length): + counting_arr[i] = counting_arr[i] + counting_arr[i-1] + + # create the output collection + ordered = [0] * coll_len + + # place the elements in the output, respecting the original order (stable + # sort) from end to begin, updating counting_arr + for i in reversed(range(0, coll_len)): + ordered[counting_arr[collection[i] - coll_min]-1] = collection[i] + counting_arr[collection[i] - coll_min] -= 1 + + return ordered + + +if __name__ == '__main__': + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 + + user_input = raw_input('Enter numbers separated by a comma:\n').strip() + unsorted = [int(item) for item in user_input.split(',')] + print(counting_sort(unsorted)) diff --git a/sorts/countingsort.py b/sorts/countingsort.py new file mode 100644 index 000000000000..18ee8b851fd7 --- /dev/null +++ b/sorts/countingsort.py @@ -0,0 +1,42 @@ +from __future__ import print_function +# Python program for counting sort + +# This is the main function that sort the given string arr[] in +# in the alphabetical order +def countSort(arr): + + # The output character array that will have sorted arr + output = [0 for i in range(256)] + + # Create a count array to store count of inidividul + # characters and initialize count array as 0 + count = [0 for i in range(256)] + + # For storing the resulting answer since the + # string is immutable + ans = ["" for _ in arr] + + # Store count of each character + for i in arr: + count[ord(i)] += 1 + + # Change count[i] so that count[i] now contains actual + # position of this character in output array + for i in range(256): + count[i] += count[i-1] + + # Build the output character array + for i in range(len(arr)): + output[count[ord(arr[i])]-1] = arr[i] + count[ord(arr[i])] -= 1 + + # Copy the output array to arr, so that arr now + # contains sorted characters + for i in range(len(arr)): + ans[i] = output[i] + return ans + +# Driver program to test above function +arr = "thisisthestring" +ans = countSort(arr) +print ("Sorted string array is %s" %("".join(ans))) diff --git a/sorts/cyclesort.py b/sorts/cyclesort.py new file mode 100644 index 000000000000..ee19a1ade360 --- /dev/null +++ b/sorts/cyclesort.py @@ -0,0 +1,60 @@ +# Code contributed by Honey Sharma +from __future__ import print_function + + +def cycle_sort(array): + ans = 0 + + # Pass through the array to find cycles to rotate. + for cycleStart in range(0, len(array) - 1): + item = array[cycleStart] + + # finding the position for putting the item. + pos = cycleStart + for i in range(cycleStart + 1, len(array)): + if array[i] < item: + pos += 1 + + # If the item is already present-not a cycle. + if pos == cycleStart: + continue + + # Otherwise, put the item there or right after any duplicates. + while item == array[pos]: + pos += 1 + array[pos], item = item, array[pos] + ans += 1 + + # Rotate the rest of the cycle. + while pos != cycleStart: + + # Find where to put the item. + pos = cycleStart + for i in range(cycleStart + 1, len(array)): + if array[i] < item: + pos += 1 + + # Put the item there or right after any duplicates. + while item == array[pos]: + pos += 1 + array[pos], item = item, array[pos] + ans += 1 + + return ans + + +# Main Code starts here +if __name__ == '__main__': + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 + +user_input = raw_input('Enter numbers separated by a comma:\n') +unsorted = [int(item) for item in user_input.split(',')] +n = len(unsorted) +cycle_sort(unsorted) + +print("After sort : ") +for i in range(0, n): + print(unsorted[i], end=' ') diff --git a/sorts/external-sort.py b/sorts/external-sort.py new file mode 100644 index 000000000000..6c4adc94c0f0 --- /dev/null +++ b/sorts/external-sort.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python + +# +# Sort large text files in a minimum amount of memory +# +import os +import sys +import argparse + +class FileSplitter(object): + BLOCK_FILENAME_FORMAT = 'block_{0}.dat' + + def __init__(self, filename): + self.filename = filename + self.block_filenames = [] + + def write_block(self, data, block_number): + filename = self.BLOCK_FILENAME_FORMAT.format(block_number) + file = open(filename, 'w') + file.write(data) + file.close() + self.block_filenames.append(filename) + + def get_block_filenames(self): + return self.block_filenames + + def split(self, block_size, sort_key=None): + file = open(self.filename, 'r') + i = 0 + + while True: + lines = file.readlines(block_size) + + if lines == []: + break + + if sort_key is None: + lines.sort() + else: + lines.sort(key=sort_key) + + self.write_block(''.join(lines), i) + i += 1 + + def cleanup(self): + map(lambda f: os.remove(f), self.block_filenames) + + +class NWayMerge(object): + def select(self, choices): + min_index = -1 + min_str = None + + for i in range(len(choices)): + if min_str is None or choices[i] < min_str: + min_index = i + + return min_index + + +class FilesArray(object): + def __init__(self, files): + self.files = files + self.empty = set() + self.num_buffers = len(files) + self.buffers = {i: None for i in range(self.num_buffers)} + + def get_dict(self): + return {i: self.buffers[i] for i in range(self.num_buffers) if i not in self.empty} + + def refresh(self): + for i in range(self.num_buffers): + if self.buffers[i] is None and i not in self.empty: + self.buffers[i] = self.files[i].readline() + + if self.buffers[i] == '': + self.empty.add(i) + + if len(self.empty) == self.num_buffers: + return False + + return True + + def unshift(self, index): + value = self.buffers[index] + self.buffers[index] = None + + return value + + +class FileMerger(object): + def __init__(self, merge_strategy): + self.merge_strategy = merge_strategy + + def merge(self, filenames, outfilename, buffer_size): + outfile = open(outfilename, 'w', buffer_size) + buffers = FilesArray(self.get_file_handles(filenames, buffer_size)) + + while buffers.refresh(): + min_index = self.merge_strategy.select(buffers.get_dict()) + outfile.write(buffers.unshift(min_index)) + + def get_file_handles(self, filenames, buffer_size): + files = {} + + for i in range(len(filenames)): + files[i] = open(filenames[i], 'r', buffer_size) + + return files + + + +class ExternalSort(object): + def __init__(self, block_size): + self.block_size = block_size + + def sort(self, filename, sort_key=None): + num_blocks = self.get_number_blocks(filename, self.block_size) + splitter = FileSplitter(filename) + splitter.split(self.block_size, sort_key) + + merger = FileMerger(NWayMerge()) + buffer_size = self.block_size / (num_blocks + 1) + merger.merge(splitter.get_block_filenames(), filename + '.out', buffer_size) + + splitter.cleanup() + + def get_number_blocks(self, filename, block_size): + return (os.stat(filename).st_size / block_size) + 1 + + +def parse_memory(string): + if string[-1].lower() == 'k': + return int(string[:-1]) * 1024 + elif string[-1].lower() == 'm': + return int(string[:-1]) * 1024 * 1024 + elif string[-1].lower() == 'g': + return int(string[:-1]) * 1024 * 1024 * 1024 + else: + return int(string) + + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('-m', + '--mem', + help='amount of memory to use for sorting', + default='100M') + parser.add_argument('filename', + metavar='', + nargs=1, + help='name of file to sort') + args = parser.parse_args() + + sorter = ExternalSort(parse_memory(args.mem)) + sorter.sort(args.filename[0]) + + +if __name__ == '__main__': + main() diff --git a/sorts/gnome_sort.py b/sorts/gnome_sort.py index b353e31aab56..2927b097f11d 100644 --- a/sorts/gnome_sort.py +++ b/sorts/gnome_sort.py @@ -19,16 +19,12 @@ def gnome_sort(unsorted): i = 1 if __name__ == '__main__': - import sys - - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 - user_input = input_function('Enter numbers separated by a comma:\n') + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] gnome_sort(unsorted) - print(unsorted) \ No newline at end of file + print(unsorted) diff --git a/sorts/heap_sort.py b/sorts/heap_sort.py index 2d9dd844d0fa..3c72abca8059 100644 --- a/sorts/heap_sort.py +++ b/sorts/heap_sort.py @@ -54,12 +54,11 @@ def heap_sort(unsorted): return unsorted if __name__ == '__main__': - import sys - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 - user_input = input_function('Enter numbers separated by a comma:\n') + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] print(heap_sort(unsorted)) diff --git a/sorts/insertion_sort.py b/sorts/insertion_sort.py index caaa9305c968..b7a4aa7a3c33 100644 --- a/sorts/insertion_sort.py +++ b/sorts/insertion_sort.py @@ -39,15 +39,11 @@ def insertion_sort(collection): if __name__ == '__main__': - import sys + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by a comma:\n') + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] print(insertion_sort(unsorted)) diff --git a/sorts/merge_sort.py b/sorts/merge_sort.py index 92a6780165ac..ca4d319fa7f1 100644 --- a/sorts/merge_sort.py +++ b/sorts/merge_sort.py @@ -62,15 +62,11 @@ def merge_sort(collection): if __name__ == '__main__': - import sys + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by a comma:\n') + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] print(merge_sort(unsorted)) diff --git a/sorts/normal_distribution_QuickSort_README.md b/sorts/normal_distribution_QuickSort_README.md new file mode 100644 index 000000000000..635262bfdf7d --- /dev/null +++ b/sorts/normal_distribution_QuickSort_README.md @@ -0,0 +1,76 @@ +# Normal Distribution QuickSort + + +Algorithm implementing QuickSort Algorithm where the pivot element is chosen randomly between first and last elements of the array and the array elements are taken from a Standard Normal Distribution. +This is different from the ordinary quicksort in the sense, that it applies more to real life problems , where elements usually follow a normal distribution. Also the pivot is randomized to make it a more generic one. + + +## Array Elements + +The array elements are taken from a Standard Normal Distribution , having mean = 0 and standard deviation 1. + +#### The code + +```python + +>>> import numpy as np +>>> from tempfile import TemporaryFile +>>> outfile = TemporaryFile() +>>> p = 100 # 100 elements are to be sorted +>>> mu, sigma = 0, 1 # mean and standard deviation +>>> X = np.random.normal(mu, sigma, p) +>>> np.save(outfile, X) +>>> print('The array is') +>>> print(X) + +``` + +------ + +#### The Distribution of the Array elements. + +```python +>>> mu, sigma = 0, 1 # mean and standard deviation +>>> s = np.random.normal(mu, sigma, p) +>>> count, bins, ignored = plt.hist(s, 30, normed=True) +>>> plt.plot(bins , 1/(sigma * np.sqrt(2 * np.pi)) *np.exp( - (bins - mu)**2 / (2 * sigma**2) ),linewidth=2, color='r') +>>> plt.show() + +``` + + +----- + + + + +![](https://www.mathsisfun.com/data/images/normal-distrubution-large.gif) + +--- + +--------------------- + +-- + +## Plotting the function for Checking 'The Number of Comparisons' taking place between Normal Distribution QuickSort and Ordinary QuickSort + +```python +>>>import matplotlib.pyplot as plt + + + # Normal Disrtibution QuickSort is red +>>> plt.plot([1,2,4,16,32,64,128,256,512,1024,2048],[1,1,6,15,43,136,340,800,2156,6821,16325],linewidth=2, color='r') + + #Ordinary QuickSort is green +>>> plt.plot([1,2,4,16,32,64,128,256,512,1024,2048],[1,1,4,16,67,122,362,949,2131,5086,12866],linewidth=2, color='g') + +>>> plt.show() + +``` + + +---- + + +------------------ + diff --git a/sorts/quick_sort.py b/sorts/quick_sort.py index 26b92fd3b7bf..136cbc021669 100644 --- a/sorts/quick_sort.py +++ b/sorts/quick_sort.py @@ -10,15 +10,9 @@ python quick_sort.py """ from __future__ import print_function -from random import shuffle -def sort(collection): - shuffle(collection) - return quick_sort(collection) - - -def quick_sort(collection): +def quick_sort(ARRAY): """Pure implementation of quick sort algorithm in Python :param collection: some mutable ordered collection with heterogeneous @@ -35,39 +29,22 @@ def quick_sort(collection): >>> quick_sort([-2, -5, -45]) [-45, -5, -2] """ - total_elements = len(collection) - - if total_elements <= 1: - return collection - less = [] - equal = [] - greater = [] - pivot = collection[0] - - equal.append(pivot) - - for i in range(1, total_elements): - element = collection[i] - - if element < pivot: - less.append(element) - elif element == pivot: - equal.append(element) - else: - greater.append(element) - return quick_sort(less) + equal + quick_sort(greater) + ARRAY_LENGTH = len(ARRAY) + if( ARRAY_LENGTH <= 1): + return ARRAY + else: + PIVOT = ARRAY[0] + GREATER = [ element for element in ARRAY[1:] if element > PIVOT ] + LESSER = [ element for element in ARRAY[1:] if element <= PIVOT ] + return quick_sort(LESSER) + [PIVOT] + quick_sort(GREATER) if __name__ == '__main__': - import sys - - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by a comma:\n') - unsorted = [int(item) for item in user_input.split(',')] - print(sort(unsorted)) + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 + + user_input = raw_input('Enter numbers separated by a comma:\n').strip() + unsorted = [ int(item) for item in user_input.split(',') ] + print( quick_sort(unsorted) ) diff --git a/sorts/radix_sort.py b/sorts/radix_sort.py new file mode 100644 index 000000000000..b0b4b41ab24f --- /dev/null +++ b/sorts/radix_sort.py @@ -0,0 +1,28 @@ +def radixsort(lst): + RADIX = 10 + maxLength = False + tmp , placement = -1, 1 + + while not maxLength: + maxLength = True + # declare and initialize buckets + buckets = [list() for _ in range( RADIX )] + + # split lst between lists + for i in lst: + tmp = int((i / placement) % RADIX) + buckets[tmp].append(i) + + if maxLength and tmp > 0: + maxLength = False + + # empty lists into lst array + a = 0 + for b in range( RADIX ): + buck = buckets[b] + for i in buck: + lst[a] = i + a += 1 + + # move to next + placement *= RADIX diff --git a/sorts/random_normaldistribution_quicksort.py b/sorts/random_normaldistribution_quicksort.py index 19b180578ad6..bd730b3b1e6d 100644 --- a/sorts/random_normaldistribution_quicksort.py +++ b/sorts/random_normaldistribution_quicksort.py @@ -1,3 +1,4 @@ +from __future__ import print_function from random import randint from tempfile import TemporaryFile import numpy as np diff --git a/sorts/selection_sort.py b/sorts/selection_sort.py index 14bc804637c5..432d14090b12 100644 --- a/sorts/selection_sort.py +++ b/sorts/selection_sort.py @@ -43,14 +43,11 @@ def selection_sort(collection): if __name__ == '__main__': - import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by a comma:\n') + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 + + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] print(selection_sort(unsorted)) diff --git a/sorts/shell_sort.py b/sorts/shell_sort.py index fdb98a570d9f..dc1846758243 100644 --- a/sorts/shell_sort.py +++ b/sorts/shell_sort.py @@ -44,14 +44,11 @@ def shell_sort(collection): return collection if __name__ == '__main__': - import sys - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input - - user_input = input_function('Enter numbers separated by a comma:\n') + try: + raw_input # Python 2 + except NameError: + raw_input = input # Python 3 + + user_input = raw_input('Enter numbers separated by a comma:\n').strip() unsorted = [int(item) for item in user_input.split(',')] print(shell_sort(unsorted)) diff --git a/sorts/timsort.py b/sorts/timsort.py new file mode 100644 index 000000000000..80c5cd1e8d3f --- /dev/null +++ b/sorts/timsort.py @@ -0,0 +1,82 @@ +from __future__ import print_function +def binary_search(lst, item, start, end): + if start == end: + if lst[start] > item: + return start + else: + return start + 1 + if start > end: + return start + + mid = (start + end) // 2 + if lst[mid] < item: + return binary_search(lst, item, mid + 1, end) + elif lst[mid] > item: + return binary_search(lst, item, start, mid - 1) + else: + return mid + + +def insertion_sort(lst): + length = len(lst) + + for index in range(1, length): + value = lst[index] + pos = binary_search(lst, value, 0, index - 1) + lst = lst[:pos] + [value] + lst[pos:index] + lst[index+1:] + + return lst + + +def merge(left, right): + if not left: + return right + + if not right: + return left + + if left[0] < right[0]: + return [left[0]] + merge(left[1:], right) + + return [right[0]] + merge(left, right[1:]) + + +def timsort(lst): + runs, sorted_runs = [], [] + length = len(lst) + new_run = [lst[0]] + sorted_array = [] + + for i in range(1, length): + if i == length - 1: + new_run.append(lst[i]) + runs.append(new_run) + break + + if lst[i] < lst[i - 1]: + if not new_run: + runs.append([lst[i - 1]]) + new_run.append(lst[i]) + else: + runs.append(new_run) + new_run = [] + else: + new_run.append(lst[i]) + + for run in runs: + sorted_runs.append(insertion_sort(run)) + + for run in sorted_runs: + sorted_array = merge(sorted_array, run) + + return sorted_array + + +def main(): + + lst = [5,9,10,3,-4,5,178,92,46,-18,0,7] + sorted_lst = timsort(lst) + print(sorted_lst) + +if __name__ == '__main__': + main() diff --git a/sorts/topological_sort.py b/sorts/topological_sort.py new file mode 100644 index 000000000000..52dc34f4f733 --- /dev/null +++ b/sorts/topological_sort.py @@ -0,0 +1,33 @@ +from __future__ import print_function +# a +# / \ +# b c +# / \ +# d e +edges = {'a': ['c', 'b'], 'b': ['d', 'e'], 'c': [], 'd': [], 'e': []} +vertices = ['a', 'b', 'c', 'd', 'e'] + + +def topological_sort(start, visited, sort): + """Perform topolical sort on a directed acyclic graph.""" + current = start + # add current to visited + visited.append(current) + neighbors = edges[current] + for neighbor in neighbors: + # if neighbor not in visited, visit + if neighbor not in visited: + sort = topological_sort(neighbor, visited, sort) + # if all neighbors visited add current to sort + sort.append(current) + # if all vertices haven't been visited select a new one to visit + if len(visited) != len(vertices): + for vertice in vertices: + if vertice not in visited: + sort = topological_sort(vertice, visited, sort) + # return sort + return sort + + +sort = topological_sort('a', [], []) +print(sort) diff --git a/strings/knuth-morris-pratt.py b/strings/knuth-morris-pratt.py new file mode 100644 index 000000000000..4553944284be --- /dev/null +++ b/strings/knuth-morris-pratt.py @@ -0,0 +1,80 @@ +def kmp(pattern, text): + """ + The Knuth-Morris-Pratt Algorithm for finding a pattern within a piece of text + with complexity O(n + m) + + 1) Preprocess pattern to identify any suffixes that are identical to prefixes + + This tells us where to continue from if we get a mismatch between a character in our pattern + and the text. + + 2) Step through the text one character at a time and compare it to a character in the pattern + updating our location within the pattern if necessary + + """ + + # 1) Construct the failure array + failure = get_failure_array(pattern) + + # 2) Step through text searching for pattern + i, j = 0, 0 # index into text, pattern + while i < len(text): + if pattern[j] == text[i]: + if j == (len(pattern) - 1): + return True + j += 1 + + # if this is a prefix in our pattern + # just go back far enough to continue + elif j > 0: + j = failure[j - 1] + continue + i += 1 + return False + + +def get_failure_array(pattern): + """ + Calculates the new index we should go to if we fail a comparison + :param pattern: + :return: + """ + failure = [0] + i = 0 + j = 1 + while j < len(pattern): + if pattern[i] == pattern[j]: + i += 1 + elif i > 0: + i = failure[i-1] + continue + j += 1 + failure.append(i) + return failure + + +if __name__ == '__main__': + # Test 1) + pattern = "abc1abc12" + text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc" + text2 = "alskfjaldsk23adsfabcabc" + assert kmp(pattern, text1) and not kmp(pattern, text2) + + # Test 2) + pattern = "ABABX" + text = "ABABZABABYABABX" + assert kmp(pattern, text) + + # Test 3) + pattern = "AAAB" + text = "ABAAAAAB" + assert kmp(pattern, text) + + # Test 4) + pattern = "abcdabcy" + text = "abcxabcdabxabcdabcdabcy" + assert kmp(pattern, text) + + # Test 5) + pattern = "aabaabaaa" + assert get_failure_array(pattern) == [0, 1, 0, 1, 2, 3, 4, 5, 2] diff --git a/strings/rabin-karp.py b/strings/rabin-karp.py new file mode 100644 index 000000000000..04a849266ead --- /dev/null +++ b/strings/rabin-karp.py @@ -0,0 +1,50 @@ +def rabin_karp(pattern, text): + """ + + The Rabin-Karp Algorithm for finding a pattern within a piece of text + with complexity O(nm), most efficient when it is used with multiple patterns + as it is able to check if any of a set of patterns match a section of text in o(1) given the precomputed hashes. + + This will be the simple version which only assumes one pattern is being searched for but it's not hard to modify + + 1) Calculate pattern hash + + 2) Step through the text one character at a time passing a window with the same length as the pattern + calculating the hash of the text within the window compare it with the hash of the pattern. Only testing + equality if the hashes match + + """ + p_len = len(pattern) + p_hash = hash(pattern) + + for i in range(0, len(text) - (p_len - 1)): + + # written like this t + text_hash = hash(text[i:i + p_len]) + if text_hash == p_hash and \ + text[i:i + p_len] == pattern: + return True + return False + + +if __name__ == '__main__': + # Test 1) + pattern = "abc1abc12" + text1 = "alskfjaldsabc1abc1abc12k23adsfabcabc" + text2 = "alskfjaldsk23adsfabcabc" + assert rabin_karp(pattern, text1) and not rabin_karp(pattern, text2) + + # Test 2) + pattern = "ABABX" + text = "ABABZABABYABABX" + assert rabin_karp(pattern, text) + + # Test 3) + pattern = "AAAB" + text = "ABAAAAAB" + assert rabin_karp(pattern, text) + + # Test 4) + pattern = "abcdabcy" + text = "abcxabcdabxabcdabcdabcy" + assert rabin_karp(pattern, text) diff --git a/traverals/binary_tree_traversals.py b/traversals/binary_tree_traversals.py similarity index 63% rename from traverals/binary_tree_traversals.py rename to traversals/binary_tree_traversals.py index 591142b278c7..cbcaf08b7b03 100644 --- a/traverals/binary_tree_traversals.py +++ b/traversals/binary_tree_traversals.py @@ -1,9 +1,14 @@ """ This is pure python implementation of tree traversal algorithms """ - +from __future__ import print_function import queue +try: + raw_input # Python 2 +except NameError: + raw_input = input # Python 3 + class TreeNode: def __init__(self, data): @@ -13,34 +18,37 @@ def __init__(self, data): def build_tree(): + print("\n********Press N to stop entering at any point of time********\n") print("Enter the value of the root node: ", end="") - data = eval(input()) - if data < 0: - return None - else: - q = queue.Queue() - tree_node = TreeNode(data) - q.put(tree_node) - while not q.empty(): - node_found = q.get() - print("Enter the left node of %s: " % node_found.data, end="") - left_data = eval(input()) - if left_data >= 0: - left_node = TreeNode(left_data) - node_found.left = left_node - q.put(left_node) - print("Enter the right node of %s: " % node_found.data, end="") - right_data = eval(input()) - if right_data >= 0: - right_node = TreeNode(right_data) - node_found.right = right_node - q.put(right_node) - return tree_node + check = raw_input().strip().lower() + if check == 'n': + return None + data = int(check) + q = queue.Queue() + tree_node = TreeNode(data) + q.put(tree_node) + while not q.empty(): + node_found = q.get() + print("Enter the left node of %s: " % node_found.data, end="") + check = raw_input().strip().lower() + if check == 'n': + return tree_node + left_data = int(check) + left_node = TreeNode(left_data) + node_found.left = left_node + q.put(left_node) + print("Enter the right node of %s: " % node_found.data, end="") + check = raw_input().strip().lower() + if check == 'n': + return tree_node + right_data = int(check) + right_node = TreeNode(right_data) + node_found.right = right_node + q.put(right_node) def pre_order(node): if not isinstance(node, TreeNode) or not node: - print("Invalid input") return print(node.data, end=" ") pre_order(node.left) @@ -78,15 +86,7 @@ def level_order(node): if __name__ == '__main__': - import sys - print("\n********* Binary Tree Traversals ************\n") - # For python 2.x and 3.x compatibility: 3.x has not raw_input builtin - # otherwise 2.x's input builtin function is too "smart" - if sys.version_info.major < 3: - input_function = raw_input - else: - input_function = input node = build_tree() print("\n********* Pre Order Traversal ************")