wdhbsikejbfskjefbskjbfdskbibl
the bible is the truth
and was written by ..
adabjdbakj
[[pot]]
CODE 1:
from pattern.web import PDF
from pattern.en import sentiment, parse
from pattern.db import Datasheet
ds = Datasheet()
f = open('Bible.pdf')
pdf = PDF(f)
ds.append((pdf.string, 'Bible'))
f = open('quran.pdf')
pdf = PDF(f)
ds.append((pdf.string, 'Quran'))
ds.save('bible_quran.csv')
[[bashjbdhabdhabd]]
print 'saved!'
CODE 2:
from pattern.web import URL, plaintext
from pattern.vector import Document, NB, KNN, SLP, SVM, POLYNOMIAL
from pattern.db import csv
from pattern.en import parse
import math
# classifier = SVM(kernel=POLYNOMIAL, degree=10)
classifier = SVM()
print 'TRAINING:'
for text, book in csv('bible_quran_torah.csv'):
length = len(text)
# part_len = int(math.floor(length/10))
# print book
# # print part_len
# for i in xrange(1,10):
# print i
# s = text[i*part_len : i*part_len + part_len]
# v = Document(parse(s, tokenize=True, lemata=True, tags=False, relations=False, chunks=False), type=book, stopwords=True)
# classifier.train(v)
v = Document(parse(text, tokenize=True, lemata=True, tags=False, relations=False, chunks=False), type=book, stopwords=True)
classifier.train(v)
print 'CLASSES:',classifier.classes
print 'RESULTS\n======'
return_discrete = True
print "OBAMA"
s = open("speech_obama.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "OSAMA"
s = open("speech_osama.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "MALCOLM X"
s = open("speech_malcolmx").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "ANITA"
s = open("essay_anita.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "POPE"
s = open("speech_pope.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "NETANYAHU"
s = open("speech_netanyahu.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "LUTHER KING"
s = open("speech_luther-king.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
print "CQRRELATIONS"
s = open("cqrrelations.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)
------------------------