Skip to content

Commit 6db3ad5

Browse files
committed
x
1 parent 86e9e7f commit 6db3ad5

File tree

8 files changed

+395
-0
lines changed

8 files changed

+395
-0
lines changed

000.练习/1.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2+
# 1.txt
3+
I have a dream that my four little children will one day live in a nation where they will not be judged by the color of their skin but by the content of their character. I have a dream today.

000.练习/2.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# 2.txt
2+
I have a dream that one day down in Alabama, with its vicious racists, . . . one day right there in Alabama little black boys and black girls will be able to join hands with little white boys and white girls as sisters and brothers. I have a dream today.

000.练习/3.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# 3.txt
2+
I have a dream that one day every valley shall be exalted, every hill and mountain shall be made low, the rough places will be made plain, and the crooked places will be made straight, and the glory of the Lord shall be revealed, and all flesh shall see it together.
3+

000.练习/4.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# 4.txt
2+
This is our hope. . . With this faith we will be able to hew out of the mountain of despair a stone of hope. With this faith we will be able to transform the jangling discords of our nation into a beautiful symphony of brotherhood. With this faith we will be able to work together, to pray together, to struggle together, to go to jail together, to stand up for freedom together, knowing that we will be free one day. . . .

000.练习/5.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# 5.txt
2+
And when this happens, and when we allow freedom ring, when we let it ring from every village and every hamlet, from every state and every city, we will be able to speed up that day when all of God's children, black men and white men, Jews and Gentiles, Protestants and Catholics, will be able to join hands and sing in the words of the old Negro spiritual: "Free at last! Free at last! Thank God Almighty, we are free at last!"

000.练习/searchEngine.py

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
# -*- coding: UTF-8 -*-
2+
3+
import re
4+
5+
6+
class SearchEngineBase(object):
7+
def __init__(self):
8+
pass
9+
10+
def add_corpus(self, file_path):
11+
filename = '/Users/tonyzeng/Documents/LeetCode-Python/000.练习/' + file_path
12+
with open(filename, 'r') as fin:
13+
text = fin.read()
14+
self.process_corpus(file_path, text)
15+
16+
def process_corpus(self, id, text):
17+
raise Exception('process_corpus not implemented.')
18+
19+
def search(self, query):
20+
raise Exception('search not implemented.')
21+
22+
23+
def main(search_engine):
24+
for file_path in ['1.txt', '2.txt', '3.txt', '4.txt', '5.txt']:
25+
search_engine.add_corpus(file_path)
26+
27+
while True:
28+
query = input()
29+
results = search_engine.search(query)
30+
print('found {} result(s):'.format(len(results)))
31+
for result in results:
32+
print(result)
33+
34+
35+
class SimpleEngine(SearchEngineBase):
36+
def __init__(self):
37+
super(SimpleEngine, self).__init__()
38+
self.__id_to_texts = {}
39+
40+
def process_corpus(self, id, text):
41+
self.__id_to_texts[id] = text
42+
43+
def search(self, query):
44+
results = []
45+
for id, text in self.__id_to_texts.items():
46+
if query in text:
47+
results.append(id)
48+
return results
49+
50+
51+
# 分词搜索(初级版本)
52+
class BOWEngine(SearchEngineBase):
53+
def __init__(self):
54+
super(BOWEngine, self).__init__()
55+
self.__id_to_words = {}
56+
57+
def process_corpus(self, id, text):
58+
self.__id_to_words[id] = self.parse_text_to_words(text)
59+
60+
def search(self, query):
61+
query_words = self.parse_text_to_words(query)
62+
results = []
63+
for id, words in self.__id_to_words.items():
64+
if self.query_match(query_words, words):
65+
results.append(id)
66+
return results
67+
68+
@staticmethod
69+
def query_match(query_words, words):
70+
for query_word in query_words:
71+
if query_word not in words:
72+
return False
73+
return True
74+
75+
@staticmethod
76+
def parse_text_to_words(text):
77+
# 使用正则表达式去除标点符号和换行符
78+
text = re.sub(r'[^\w ]', ' ', text)
79+
# 转为小写
80+
text = text.lower()
81+
# 生成所有单词的列表
82+
word_list = text.split(' ')
83+
# 去除空白单词
84+
word_list = filter(None, word_list)
85+
# 返回单词的 set
86+
return set(word_list)
87+
88+
# BOW模型扩展
89+
class BOWInvertedIndexEngine(SearchEngineBase):
90+
def __init__(self):
91+
super(BOWInvertedIndexEngine, self).__init__()
92+
self.inverted_index = {}
93+
94+
def process_corpus(self, id, text):
95+
words = self.parse_text_to_words(text)
96+
for word in words:
97+
if word not in self.inverted_index:
98+
self.inverted_index[word] = []
99+
self.inverted_index[word].append(id)
100+
# print(self.inverted_index)
101+
102+
def search(self, query):
103+
query_words = list(self.parse_text_to_words(query))
104+
query_words_index = list()
105+
for query_word in query_words:
106+
query_words_index.append(0)
107+
print('query_words_index:',query_words_index)
108+
# 如果某一个查询单词的倒序索引为空,我们就立刻返回
109+
for query_word in query_words:
110+
if query_word not in self.inverted_index:
111+
return []
112+
113+
result = []
114+
while True:
115+
116+
# 首先,获得当前状态下所有倒序索引的 index
117+
current_ids = []
118+
119+
for idx, query_word in enumerate(query_words):
120+
current_index = query_words_index[idx]
121+
current_inverted_list = self.inverted_index[query_word]
122+
123+
# 已经遍历到了某一个倒序索引的末尾,结束 search
124+
if current_index >= len(current_inverted_list):
125+
return result
126+
127+
current_ids.append(current_inverted_list[current_index])
128+
print('current_ids:', current_ids)
129+
# 然后,如果 current_ids 的所有元素都一样,那么表明这个单词在这个元素对应的文档中都出现了
130+
if all(x == current_ids[0] for x in current_ids):
131+
result.append(current_ids[0])
132+
query_words_index = [x + 1 for x in query_words_index]
133+
continue
134+
135+
# 如果不是,我们就把最小的元素加一
136+
min_val = min(current_ids)
137+
min_val_pos = current_ids.index(min_val)
138+
query_words_index[min_val_pos] += 1
139+
140+
@staticmethod
141+
def parse_text_to_words(text):
142+
# 使用正则表达式去除标点符号和换行符
143+
text = re.sub(r'[^\w ]', ' ', text)
144+
# 转为小写
145+
text = text.lower()
146+
# 生成所有单词的列表
147+
word_list = text.split(' ')
148+
# 去除空白单词
149+
word_list = filter(None, word_list)
150+
# 返回单词的 set
151+
return set(word_list)
152+
153+
154+
search_engine = BOWInvertedIndexEngine()
155+
# search_engine = BOWEngine()
156+
main(search_engine)

000.练习/sort.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# -*- coding: UTF-8 -*-
2+
3+
# 冒泡排序 时间 O(n^2)
4+
def bubble_sort(list):
5+
n = len(list)
6+
if n<2: return list
7+
for i in range(n-1, -1, -1):
8+
print('i:{}'.format(i))
9+
for j in range(i):
10+
if j+1>=n: break
11+
if list[j+1]<list[j]:
12+
list[j], list[j+1] = list[j+1], list[j]
13+
return list
14+
15+
16+
# 希尔排序 时间 O(nlogn)
17+
def shell_sort(list):
18+
n = len(list)
19+
# 初始步长
20+
gap = n // 2
21+
while gap > 0:
22+
for i in range(gap, n):
23+
# 每个步长进行插入排序
24+
temp = list[i]
25+
print('gap:{}; temp:{}'.format(gap, temp))
26+
j = i
27+
# 插入排序
28+
while j >= gap and list[j - gap] > temp:
29+
list[j] = list[j - gap]
30+
j -= gap
31+
list[j] = temp
32+
# 得到新的步长
33+
gap = gap // 2
34+
return list
35+
36+
# 归并排序
37+
def merge_sort(li):
38+
#这里接收两个列表
39+
def merge(left, right):
40+
# 从两个有顺序的列表里边依次取数据比较后放入result
41+
# 每次我们分别拿出两个列表中最小的数比较,把较小的放入result
42+
result = []
43+
while len(left) > 0 and len(right) > 0:
44+
#为了保持稳定性,当遇到相等的时候优先把左侧的数放进结果列表,因为left本来也是大数列中比较靠左的
45+
if left[0] <= right[0]:
46+
result.append(left.pop(0))
47+
else:
48+
result.append(right.pop(0))
49+
#while循环出来之后 说明其中一个数组没有数据了,我们把另一个数组添加到结果数组后面
50+
if left: result += left
51+
if right: result += right
52+
return result
53+
54+
#不断递归调用自己一直到拆分成成单个元素的时候就返回这个元素,不再拆分了
55+
if len(li) == 1:
56+
return li
57+
58+
#取拆分的中间位置
59+
mid = len(li) // 2
60+
#拆分过后左右两侧子串
61+
left = li[:mid]
62+
right = li[mid:]
63+
64+
#对拆分过后的左右再拆分 一直到只有一个元素为止
65+
#最后一次递归时候ll和lr都会接到一个元素的列表
66+
# 最后一次递归之前的ll和rl会接收到排好序的子序列
67+
ll = merge_sort(left)
68+
rl = merge_sort(right)
69+
70+
# 我们对返回的两个拆分结果进行排序后合并再返回正确顺序的子列表
71+
# 这里我们调用拎一个函数帮助我们按顺序合并ll和lr
72+
return merge(ll, rl)
73+
74+
# 快速排序
75+
def quick_sort(arr):
76+
less = []
77+
greater = []
78+
if len(arr) <= 1:
79+
return arr
80+
else:
81+
pivot = arr[0]
82+
for x in arr[1:]:
83+
if x < pivot:
84+
less.append(x)
85+
else:
86+
greater.append(x)
87+
return quick_sort(less) + [pivot] + quick_sort(greater)
88+
89+
# 堆排序
90+
def heap_sort(list):
91+
# 最大堆调整
92+
def sift_down(lst, start, end):
93+
root = start
94+
while True:
95+
child = 2 * root + 1
96+
# 子节点为最后一个了
97+
if child > end:
98+
break
99+
# 左孩子小于右孩子
100+
if child + 1 <= end and lst[child] < lst[child + 1]:
101+
child += 1
102+
# 根节点root小于俩孩子中最大的那个child,交换root和child
103+
if lst[root] < lst[child]:
104+
lst[root], lst[child] = lst[child], lst[root]
105+
root = child
106+
else:
107+
break
108+
109+
# 创建最大堆
110+
# start代表有孩子的节点
111+
for start in range((len(list) - 2) // 2, -1, -1):
112+
print("Start:{}; Value:{}".format(start, list[start]))
113+
sift_down(list, start, len(list) - 1)
114+
115+
# 堆排序
116+
for end in range(len(list) - 1, 0, -1):
117+
print("End:{}; Value:{}".format(end, list[start]))
118+
list[0], list[end] = list[end], list[0]
119+
sift_down(list, 0, end - 1)
120+
return list
121+
122+
123+
unsort_list = [80, 25, 15, 0, 9, 3, 102, 22, 12, 56, 89]
124+
# print(shell_sort(unsort_list))
125+
print(bubble_sort(unsort_list))

0 commit comments

Comments
 (0)