Gutenberg project sources
- J. B. Bury, The Idea Of Progress, 1920, http://www.gutenberg.org/cache/epub/4557/pg4557.txt
- Maud Churton Braby, Modern Marriage and How To Bear It, 1908, https://www.gutenberg.org/files/31529/31529-0.txt
- Harriet Martineau, How to Observe Morals and Manners, 1838, http://www.gutenberg.org/cache/epub/33944/pg33944.txt
- Irwin Edman, Human Traits and their Social Significance, 1920, http://www.gutenberg.org/cache/epub/22306/pg22306.txt
- James Hayden Tufts, The Ethics of Cooperation, 1918, http://www.gutenberg.org/cache/epub/29508/pg29508.txt
- James Harvey Robinson, The Mind in the Making: The Relation of Intelligence to Social Reform, 1921, http://www.gutenberg.org/cache/epub/8077/pg8077.txt
- Helen Kendrick Johnson, Woman And The Republic, 1897, https://www.gutenberg.org/cache/epub/7300/pg7300.txt
- Charles Darwin, On the Origin of species, 1859, http://www.gutenberg.org/cache/epub/1228/pg1228.txt
- Emma Goldman, Anarchism and other essays, 1910, http://www.gutenberg.org/cache/epub/2162/pg2162.txt
- John F. Hume, The Abolitionists (Together With Personal Memories Of The Struggle For Human Rights), 1830-1864, http://www.gutenberg.org/cache/epub/13176/pg13176.txt
Wikipedia sources
- Mining : https://en.wikipedia.org/wiki/Mining FS
- Textile Industry : https://en.wikipedia.org/wiki/Textile_industry FS
- History of computing hardware : https://en.wikipedia.org/wiki/History_of_computing_hardware MB
- Marissa Mayer : https://en.wikipedia.org/wiki/Marissa_Mayer MB
- Larry Page : https://en.wikipedia.org/wiki/Larry_Page CL
- Liberty : https://en.wikipedia.org/wiki/Liberty CL
- Choice : https://en.wikipedia.org/wiki/Choice CC
- Sabotage : http://en.wikipedia.org/wiki/Sabotage CC
- Social Darwinism : http://en.wikipedia.org/wiki/Social_Darwinism JBT
- Anarchism : https://en.wikipedia.org/wiki/Anarchism JBT
(Wikipedia - pattern import)
#!/usr/bin/env python
from pattern.web import Wikipedia
article = Wikipedia().search('sociology')
for section in article.sections:
print repr(' ')
print repr(' ' * section.level + section.title)
print repr(' ' * section.level + section.content)
[[wikipedia-sociology-scraped-content]]
Roels Random Wiki Section Script
#!/usr/bin/env python
from pattern.web import Wikipedia
import random
wikilist = ['Mining', 'Textile Industry', 'History of computing hardware', 'Marissa Mayer', 'Larry Page', 'List of stock characters']
#This script will choose a random section from a list of given articles.
for wiki in wikilist:
try:
article = Wikipedia().search(wiki)
chosen_section = ''
section_filter =['References', 'Further Reading', 'See also', 'Other uses']
chosen_section = random.choice(article.sections)
if chosen_section.title in section_filter:
chosen_section = random.choice(article.sections).title
print chosen_section.title
print chosen_section.plaintext()
print '*'*40
except:
pass
Roels 35-Random-Paragraph-From-Plaintext-Gutenberg-book-To-CSV-O'matic
#!/bin/env python
# a script to automatically get paragraphs from gutenberg plaintext books and feed them into a csv
import os, random, sys, csv
#author name, book title, year, source, localfilename
authorlist=[
("J. B. Bury", "The Idea Of Progress", "1920", "http://www.gutenberg.org/cache/epub/4557/pg4557.txt", 'theideaofprogress.txt'),
("Maud Churton Braby", "Modern Marriage and How To Bear It", "1908", "https://www.gutenberg.org/files/31529/31529-0.txt", 'modernmarriageandhowtobearit.txt'),
("Harriet Martineau", "How to Observe Morals and Manners", "1838", "http://www.gutenberg.org/cache/epub/33944/pg33944.txt", 'howtoobservemoralsandmanners.txt'),
("Irwin Edman", "Human Traits and their Social Significance", "1920", "http://www.gutenberg.org/cache/epub/22306/pg22306.txt", 'humantraitsandtheirsocialsignificance.txt'),
("James Hayden Tufts", "The Ethics of Cooperation", "1918", "http://www.gutenberg.org/cache/epub/29508/pg29508.txt",'theethicsofcooperation.txt'),
("James Harvey Robinson", "The Mind in the Making: The Relation of Intelligence to Social Reform", "1921", "http://www.gutenberg.org/cache/epub/8077/pg8077.txt",'themindinthemaking.txt'),
("Helen Kendrick Johnson", "Woman And The Republic", "1897", "https://www.gutenberg.org/cache/epub/7300/pg7300.txt",'womanandtherepublic.txt'),
("Charles Darwin", "On the Origin of species", "1859", "http://www.gutenberg.org/cache/epub/1228/pg1228.txt", 'originofspecies.txt'),
("Emma Goldman", "Anarchism and other essays", "1910", "http://www.gutenberg.org/cache/epub/2162/pg2162.txt", 'anarchismandotheressays.txt'),
("John F. Hume","The Abolitionists (Together With Personal Memories Of The Struggle For Human Rights)","1830-1864","http://www.gutenberg.org/cache/epub/13176/pg13176.txt","theabolitionists.txt")
]
with open('gutenberg.csv', 'wb') as f:
writser=csv.writer(f)
for i in authorlist:
print "working on", i
filtered_list = []
text = open(i[-1]).read().split('\r\n\r\n')
for paragraph in text:
paragraph = paragraph.replace('\r','').replace('\n','')
if len(paragraph) > 1:
filtered_list.append(paragraph)
for a in range(0,35):
print "paragraph", a, "of", i[-1]
random_pick = random.choice(filtered_list)
#test.append(random_pick)
# id, source, subject, year, content
writer.writerow(['',i[3],i[1],i[2],random_pick])
print "\n"