wdhbsikejbfskjefbskjbfdskbibl


the bible is the truth

and was written by ..




adabjdbakj


[[pot]]


CODE 1:
from pattern.web import PDF
from pattern.en import sentiment, parse
from pattern.db import Datasheet

ds = Datasheet()

f = open('Bible.pdf')
pdf = PDF(f)
ds.append((pdf.string, 'Bible'))

f = open('quran.pdf')
pdf = PDF(f)
ds.append((pdf.string, 'Quran'))

ds.save('bible_quran.csv')

[[bashjbdhabdhabd]]
print 'saved!' 

CODE 2:
from pattern.web import URL, plaintext
from pattern.vector import Document, NB, KNN, SLP, SVM, POLYNOMIAL
from pattern.db import csv
from pattern.en import parse
import math

# classifier = SVM(kernel=POLYNOMIAL, degree=10)
classifier = SVM()

print 'TRAINING:'
for text, book in csv('bible_quran_torah.csv'):
        length = len(text)
        # part_len = int(math.floor(length/10))
        # print book
        # # print part_len
        # for i in xrange(1,10):
        #         print i
        #         s = text[i*part_len : i*part_len + part_len]
        #         v = Document(parse(s, tokenize=True, lemata=True, tags=False, relations=False, chunks=False), type=book, stopwords=True)
        #         classifier.train(v)
        
        v = Document(parse(text, tokenize=True, lemata=True, tags=False, relations=False, chunks=False), type=book, stopwords=True)
        classifier.train(v)

print 'CLASSES:',classifier.classes

print 'RESULTS\n======'

return_discrete = True

print "OBAMA"
s = open("speech_obama.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "OSAMA"
s = open("speech_osama.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "MALCOLM X"
s = open("speech_malcolmx").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "ANITA"
s = open("essay_anita.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "POPE"
s = open("speech_pope.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "NETANYAHU"
s = open("speech_netanyahu.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "LUTHER KING"
s = open("speech_luther-king.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)

print "CQRRELATIONS"
s = open("cqrrelations.txt").read().replace('\n','')
s = parse(plaintext(s), tokenize=True, lemata=True, tags=False, relations=False, chunks=False)
print classifier.classify(Document(s), discrete=return_discrete)








------------------------