From 6db3ad59a2c476b6ba945861e740e10dd0f65b95 Mon Sep 17 00:00:00 2001 From: anzai <872790417@qq.com> Date: Fri, 27 Mar 2020 18:05:18 +0800 Subject: [PATCH] x --- "000.\347\273\203\344\271\240/1.txt" | 3 + "000.\347\273\203\344\271\240/2.txt" | 2 + "000.\347\273\203\344\271\240/3.txt" | 3 + "000.\347\273\203\344\271\240/4.txt" | 2 + "000.\347\273\203\344\271\240/5.txt" | 2 + .../searchEngine.py" | 156 ++++++++++++++++++ "000.\347\273\203\344\271\240/sort.py" | 125 ++++++++++++++ "000.\347\273\203\344\271\240/test.py" | 102 ++++++++++++ 8 files changed, 395 insertions(+) create mode 100644 "000.\347\273\203\344\271\240/1.txt" create mode 100644 "000.\347\273\203\344\271\240/2.txt" create mode 100644 "000.\347\273\203\344\271\240/3.txt" create mode 100644 "000.\347\273\203\344\271\240/4.txt" create mode 100644 "000.\347\273\203\344\271\240/5.txt" create mode 100644 "000.\347\273\203\344\271\240/searchEngine.py" create mode 100644 "000.\347\273\203\344\271\240/sort.py" create mode 100644 "000.\347\273\203\344\271\240/test.py" diff --git "a/000.\347\273\203\344\271\240/1.txt" "b/000.\347\273\203\344\271\240/1.txt" new file mode 100644 index 0000000..b94ec8c --- /dev/null +++ "b/000.\347\273\203\344\271\240/1.txt" @@ -0,0 +1,3 @@ + +# 1.txt +I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. I have a dream today. diff --git "a/000.\347\273\203\344\271\240/2.txt" "b/000.\347\273\203\344\271\240/2.txt" new file mode 100644 index 0000000..1aadc1e --- /dev/null +++ "b/000.\347\273\203\344\271\240/2.txt" @@ -0,0 +1,2 @@ +# 2.txt +I have a dream that one day down in Alabama, with its vicious racists, . . . one day right there in Alabama little black boys and black girls will be able to join hands with little white boys and white girls as sisters and brothers. I have a dream today. \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/3.txt" "b/000.\347\273\203\344\271\240/3.txt" new file mode 100644 index 0000000..f64516b --- /dev/null +++ "b/000.\347\273\203\344\271\240/3.txt" @@ -0,0 +1,3 @@ +# 3.txt +I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight, and the glory of the Lord shall be revealed, and all flesh shall see it together. + \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/4.txt" "b/000.\347\273\203\344\271\240/4.txt" new file mode 100644 index 0000000..f8c658f --- /dev/null +++ "b/000.\347\273\203\344\271\240/4.txt" @@ -0,0 +1,2 @@ +# 4.txt +This is our hope. . . With this faith we will be able to hew out of the mountain of despair a stone of hope. With this faith we will be able to transform the jangling discords of our nation into a beautiful symphony of brotherhood. With this faith we will be able to work together, to pray together, to struggle together, to go to jail together, to stand up for freedom together, knowing that we will be free one day. . . . \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/5.txt" "b/000.\347\273\203\344\271\240/5.txt" new file mode 100644 index 0000000..497f82c --- /dev/null +++ "b/000.\347\273\203\344\271\240/5.txt" @@ -0,0 +1,2 @@ +# 5.txt +And when this happens, and when we allow freedom ring, when we let it ring from every village and every hamlet, from every state and every city, we will be able to speed up that day when all of God's children, black men and white men, Jews and Gentiles, Protestants and Catholics, will be able to join hands and sing in the words of the old Negro spiritual: "Free at last! Free at last! Thank God Almighty, we are free at last!" \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/searchEngine.py" "b/000.\347\273\203\344\271\240/searchEngine.py" new file mode 100644 index 0000000..d26dea1 --- /dev/null +++ "b/000.\347\273\203\344\271\240/searchEngine.py" @@ -0,0 +1,156 @@ +# -*- coding: UTF-8 -*- + +import re + + +class SearchEngineBase(object): + def __init__(self): + pass + + def add_corpus(self, file_path): + filename = '/Users/tonyzeng/Documents/LeetCode-Python/000.练习/' + file_path + with open(filename, 'r') as fin: + text = fin.read() + self.process_corpus(file_path, text) + + def process_corpus(self, id, text): + raise Exception('process_corpus not implemented.') + + def search(self, query): + raise Exception('search not implemented.') + + +def main(search_engine): + for file_path in ['1.txt', '2.txt', '3.txt', '4.txt', '5.txt']: + search_engine.add_corpus(file_path) + + while True: + query = input() + results = search_engine.search(query) + print('found {} result(s):'.format(len(results))) + for result in results: + print(result) + + +class SimpleEngine(SearchEngineBase): + def __init__(self): + super(SimpleEngine, self).__init__() + self.__id_to_texts = {} + + def process_corpus(self, id, text): + self.__id_to_texts[id] = text + + def search(self, query): + results = [] + for id, text in self.__id_to_texts.items(): + if query in text: + results.append(id) + return results + + +# 分词搜索(初级版本) +class BOWEngine(SearchEngineBase): + def __init__(self): + super(BOWEngine, self).__init__() + self.__id_to_words = {} + + def process_corpus(self, id, text): + self.__id_to_words[id] = self.parse_text_to_words(text) + + def search(self, query): + query_words = self.parse_text_to_words(query) + results = [] + for id, words in self.__id_to_words.items(): + if self.query_match(query_words, words): + results.append(id) + return results + + @staticmethod + def query_match(query_words, words): + for query_word in query_words: + if query_word not in words: + return False + return True + + @staticmethod + def parse_text_to_words(text): + # 使用正则表达式去除标点符号和换行符 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 生成所有单词的列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + # 返回单词的 set + return set(word_list) + +# BOW模型扩展 +class BOWInvertedIndexEngine(SearchEngineBase): + def __init__(self): + super(BOWInvertedIndexEngine, self).__init__() + self.inverted_index = {} + + def process_corpus(self, id, text): + words = self.parse_text_to_words(text) + for word in words: + if word not in self.inverted_index: + self.inverted_index[word] = [] + self.inverted_index[word].append(id) + # print(self.inverted_index) + + def search(self, query): + query_words = list(self.parse_text_to_words(query)) + query_words_index = list() + for query_word in query_words: + query_words_index.append(0) + print('query_words_index:',query_words_index) + # 如果某一个查询单词的倒序索引为空,我们就立刻返回 + for query_word in query_words: + if query_word not in self.inverted_index: + return [] + + result = [] + while True: + + # 首先,获得当前状态下所有倒序索引的 index + current_ids = [] + + for idx, query_word in enumerate(query_words): + current_index = query_words_index[idx] + current_inverted_list = self.inverted_index[query_word] + + # 已经遍历到了某一个倒序索引的末尾,结束 search + if current_index >= len(current_inverted_list): + return result + + current_ids.append(current_inverted_list[current_index]) + print('current_ids:', current_ids) + # 然后,如果 current_ids 的所有元素都一样,那么表明这个单词在这个元素对应的文档中都出现了 + if all(x == current_ids[0] for x in current_ids): + result.append(current_ids[0]) + query_words_index = [x + 1 for x in query_words_index] + continue + + # 如果不是,我们就把最小的元素加一 + min_val = min(current_ids) + min_val_pos = current_ids.index(min_val) + query_words_index[min_val_pos] += 1 + + @staticmethod + def parse_text_to_words(text): + # 使用正则表达式去除标点符号和换行符 + text = re.sub(r'[^\w ]', ' ', text) + # 转为小写 + text = text.lower() + # 生成所有单词的列表 + word_list = text.split(' ') + # 去除空白单词 + word_list = filter(None, word_list) + # 返回单词的 set + return set(word_list) + + +search_engine = BOWInvertedIndexEngine() +# search_engine = BOWEngine() +main(search_engine) \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/sort.py" "b/000.\347\273\203\344\271\240/sort.py" new file mode 100644 index 0000000..619f206 --- /dev/null +++ "b/000.\347\273\203\344\271\240/sort.py" @@ -0,0 +1,125 @@ +# -*- coding: UTF-8 -*- + +# 冒泡排序 时间 O(n^2) +def bubble_sort(list): + n = len(list) + if n<2: return list + for i in range(n-1, -1, -1): + print('i:{}'.format(i)) + for j in range(i): + if j+1>=n: break + if list[j+1] 0: + for i in range(gap, n): + # 每个步长进行插入排序 + temp = list[i] + print('gap:{}; temp:{}'.format(gap, temp)) + j = i + # 插入排序 + while j >= gap and list[j - gap] > temp: + list[j] = list[j - gap] + j -= gap + list[j] = temp + # 得到新的步长 + gap = gap // 2 + return list + +# 归并排序 +def merge_sort(li): + #这里接收两个列表 + def merge(left, right): + # 从两个有顺序的列表里边依次取数据比较后放入result + # 每次我们分别拿出两个列表中最小的数比较,把较小的放入result + result = [] + while len(left) > 0 and len(right) > 0: + #为了保持稳定性,当遇到相等的时候优先把左侧的数放进结果列表,因为left本来也是大数列中比较靠左的 + if left[0] <= right[0]: + result.append(left.pop(0)) + else: + result.append(right.pop(0)) + #while循环出来之后 说明其中一个数组没有数据了,我们把另一个数组添加到结果数组后面 + if left: result += left + if right: result += right + return result + + #不断递归调用自己一直到拆分成成单个元素的时候就返回这个元素,不再拆分了 + if len(li) == 1: + return li + + #取拆分的中间位置 + mid = len(li) // 2 + #拆分过后左右两侧子串 + left = li[:mid] + right = li[mid:] + + #对拆分过后的左右再拆分 一直到只有一个元素为止 + #最后一次递归时候ll和lr都会接到一个元素的列表 + # 最后一次递归之前的ll和rl会接收到排好序的子序列 + ll = merge_sort(left) + rl = merge_sort(right) + + # 我们对返回的两个拆分结果进行排序后合并再返回正确顺序的子列表 + # 这里我们调用拎一个函数帮助我们按顺序合并ll和lr + return merge(ll, rl) + +# 快速排序 +def quick_sort(arr): + less = [] + greater = [] + if len(arr) <= 1: + return arr + else: + pivot = arr[0] + for x in arr[1:]: + if x < pivot: + less.append(x) + else: + greater.append(x) + return quick_sort(less) + [pivot] + quick_sort(greater) + +# 堆排序 +def heap_sort(list): + # 最大堆调整 + def sift_down(lst, start, end): + root = start + while True: + child = 2 * root + 1 + # 子节点为最后一个了 + if child > end: + break + # 左孩子小于右孩子 + if child + 1 <= end and lst[child] < lst[child + 1]: + child += 1 + # 根节点root小于俩孩子中最大的那个child,交换root和child + if lst[root] < lst[child]: + lst[root], lst[child] = lst[child], lst[root] + root = child + else: + break + + # 创建最大堆 + # start代表有孩子的节点 + for start in range((len(list) - 2) // 2, -1, -1): + print("Start:{}; Value:{}".format(start, list[start])) + sift_down(list, start, len(list) - 1) + + # 堆排序 + for end in range(len(list) - 1, 0, -1): + print("End:{}; Value:{}".format(end, list[start])) + list[0], list[end] = list[end], list[0] + sift_down(list, 0, end - 1) + return list + + +unsort_list = [80, 25, 15, 0, 9, 3, 102, 22, 12, 56, 89] +# print(shell_sort(unsort_list)) +print(bubble_sort(unsort_list)) \ No newline at end of file diff --git "a/000.\347\273\203\344\271\240/test.py" "b/000.\347\273\203\344\271\240/test.py" new file mode 100644 index 0000000..e6b449c --- /dev/null +++ "b/000.\347\273\203\344\271\240/test.py" @@ -0,0 +1,102 @@ +# -*- coding: UTF-8 -*- +def pow(x, n): + pow = 1 + while n: + print('n当前的值:{}'.format(n)) + # 奇数 + if n & 1: + pow *= x + x *= x + n >>= 1 + return pow + + +# print(pow(int(input('X:')), int(input('n:')))) +import re + + +# str = ' ¥%6767xffgdgf fgfd' +# str = re.findall('^[\+\-]?\d+', str.lstrip()) +# print(int(*str)) +# +# +# def is_palindrome(num): +# temp = num +# total = 0 +# while temp > 0: +# total = total * 10 + temp % 10 +# temp //= 10 +# print(temp, total) +# return total == num +# +# +# print(is_palindrome(1234321)) +# +# +# def dfs(root): +# def _dfs(node, level): +# if not node: return +# if len(result) < level + 1: +# result.append([]) +# result[level].append(node.val) +# _dfs(node.left, level + 1) +# _dfs(node.right, level + 1) +# +# if not root: return [] +# result = [] +# _dfs(root, 0) +# return result +def mySqrt(x): + r = 100 + round = 0 + while r * r > x: + r = (r + x / r) / 2 + round += 1 + print(round) + return r + +import json +def test(): + root = {} + node = root + for i in 'abcdef': + node = node.setdefault(i, {}) + + # if not node: break + # for k,j in node.iteritems(): + # print(k,j) + + print(json.dumps(root)) + +# my_set = [1,2,3] +def testFunc(x,y): + print("X:{}; Y:{}".format(x,y)) + return x*y + +from collections import OrderedDict +cacheDict = OrderedDict() +cacheDict[2] = 123 +cacheDict[3] = 234 +cacheDict[1] = 111111 + +# cacheDict.move_to_end(3) +# for k,v in cacheDict.items(): +# print(k, v) +# mylist = [2,3,4,5,6] +# if not 2 in mylist: +# print('Yes') +# else: +# print('Nope') +def power(a, n): + if n==0: + return 1 + result, tmp = 1, a + while n: + print('N:', n) + if n&1: + result *= tmp + n >>= 1 + tmp *= tmp + return result + +print(power(3, 7))