diff --git a/content_processor.py b/content_processor.py
index 22ce2e3..63ab058 100644
--- a/content_processor.py
+++ b/content_processor.py
@@ -1,5 +1,5 @@
 from multiprocessing import Pool
-import re, sys, logging
+import re, sys, logging, string
 
 from ready_queue import ready_queue
 
@@ -9,13 +9,21 @@ def rankKeywords(text):
 	invalid_keywords = ['', ' ', "i", "a", "an", "and", "the", "for", "be", "to", "or", "too", "also"]
 	ranks = {}
 	text = text.split(' ')
+	exclude = set(string.punctuation)
 	for t in text:
+		#remove punctuation if attached to word
+		temp = t
+		t = ''
+		for i in range(len(temp)):
+			if(temp[i] not in exclude):
+				t += temp[i]
+		t = t.strip()
 		if t in invalid_keywords:
 			continue
 		if not ranks.has_key(t):
 			ranks[t] = 1
 		else:
-			ranks[t] += 1
+			ranks[t] += 1 
 	return ranks
 
 def stripPunctuation(text):
@@ -83,13 +91,18 @@ def processBody(self):
 			offset = 0
 			i = 0
 			l = []
-			while True:
+			cont = True
+			while cont:
+				#this divides the text into sets of 500 words
+				#set j to the index of the last letter of the 500th word
 				j = self.findnth(self.text[i:],' ',500)
-				offset += j
+				#if only 500 words or less are left
 				if j == -1:
-					break
-				l.append(self.text[i:j])
-				i = offset + j+1
+					cont = False
+				#Should append a string that contains 500 words for each loop(except the last loop) to l
+				#last loop should append a string with 500 words or less to l
+				l.append(self.text[i:i+j])
+				i += j+1
 			logger.debug("processing with %i threads" % len(l))
 			try:
 				if len(l) == 0:
@@ -136,4 +149,4 @@ def getDataDict(self):
 		for k,v in self.keywords.items():
 			if v < 3:
 				del self.keywords[k]
-		return {"address":self.url, "title":self.title, "status":self.status, "size":self.size, "keywords":self.keywords}
\ No newline at end of file
+		return {"address":self.url, "title":self.title, "status":self.status, "size":self.size, "keywords":self.keywords}
diff --git a/query.py b/query.py
index 7d079fc..064f799 100644
--- a/query.py
+++ b/query.py
@@ -56,7 +56,7 @@ def enqueue(self, urls):
 			return False
 		if len(urls) == 0:
 			return True
-		args = [{'address':unicode(u)} for u in urls]
+		args = [{'address':u.decode("utf8")} for u in urls]
 		result = self.connection.execute(self.queue_table.insert(), args)
 		if result:
 			return True
@@ -81,7 +81,7 @@ def dequeue(self):
 		return False
 	
 	def checkCrawled(self, url):
-		s =  select([self.crawl_table]).where(self.crawl_table.c.address == unicode(url))
+		s =  select([self.crawl_table]).where(self.crawl_table.c.address == url.decode("utf8"))
 		result = self.connection.execute(s)
 		if len(result.fetchall()) > 0:
 			result.close()
@@ -116,4 +116,4 @@ def addPage(self, data):
 		return True
 
 	def close(self):
-		self.connection.close()
\ No newline at end of file
+		self.connection.close()