Gutenberg project sources


  1. J. B. Bury, The Idea Of Progress, 1920, http://www.gutenberg.org/cache/epub/4557/pg4557.txt
  2. Maud Churton Braby, Modern Marriage and How To Bear It, 1908, https://www.gutenberg.org/files/31529/31529-0.txt
  3. Harriet Martineau, How to Observe Morals and Manners, 1838, http://www.gutenberg.org/cache/epub/33944/pg33944.txt
  4. Irwin Edman, Human Traits and their Social Significance, 1920, http://www.gutenberg.org/cache/epub/22306/pg22306.txt
  5. James Hayden Tufts, The Ethics of Cooperation, 1918, http://www.gutenberg.org/cache/epub/29508/pg29508.txt
  6. James Harvey Robinson, The Mind in the Making: The Relation of Intelligence to Social Reform, 1921, http://www.gutenberg.org/cache/epub/8077/pg8077.txt
  7. Helen Kendrick Johnson, Woman And The Republic, 1897, https://www.gutenberg.org/cache/epub/7300/pg7300.txt
  8. Charles Darwin, On the Origin of species, 1859, http://www.gutenberg.org/cache/epub/1228/pg1228.txt
  9. Emma Goldman, Anarchism and other essays, 1910, http://www.gutenberg.org/cache/epub/2162/pg2162.txt
  10. John F. Hume, The Abolitionists (Together With Personal Memories Of The Struggle For Human Rights), 1830-1864, http://www.gutenberg.org/cache/epub/13176/pg13176.txt

Wikipedia sources


  1. Mining : https://en.wikipedia.org/wiki/Mining FS
  2. Textile Industry : https://en.wikipedia.org/wiki/Textile_industry FS
  3. History of computing hardware : https://en.wikipedia.org/wiki/History_of_computing_hardware MB
  4. Marissa Mayer : https://en.wikipedia.org/wiki/Marissa_Mayer MB
  5. Larry Page : https://en.wikipedia.org/wiki/Larry_Page CL
  6. Liberty : https://en.wikipedia.org/wiki/Liberty CL
  7. Choice : https://en.wikipedia.org/wiki/Choice CC
  8. Sabotage : http://en.wikipedia.org/wiki/Sabotage CC
  9. Social Darwinism : http://en.wikipedia.org/wiki/Social_Darwinism JBT
  10. Anarchism : https://en.wikipedia.org/wiki/Anarchism JBT

(Wikipedia - pattern import)


#!/usr/bin/env python from pattern.web import Wikipedia article = Wikipedia().search('sociology') for section in article.sections: print repr(' ') print repr(' ' * section.level + section.title) print repr(' ' * section.level + section.content) [[wikipedia-sociology-scraped-content]]

Roels Random Wiki Section Script


#!/usr/bin/env pythonfrom pattern.web import Wikipedia
import random

wikilist = ['Mining', 'Textile Industry', 'History of computing hardware', 'Marissa Mayer', 'Larry Page', 'List of stock characters']

#This script will choose a random section from a list of given articles.

for wiki in wikilist:
        try:
                article = Wikipedia().search(wiki)
                chosen_section = ''
                section_filter =['References', 'Further Reading', 'See also', 'Other uses']

                chosen_section = random.choice(article.sections)

                if chosen_section.title in section_filter:
                        chosen_section = random.choice(article.sections).title

                print chosen_section.title
                print chosen_section.plaintext()
                print '*'*40
        except:
                 pass
                 

Roels 35-Random-Paragraph-From-Plaintext-Gutenberg-book-To-CSV-O'matic

#!/bin/env python
# a script to automatically get paragraphs from gutenberg plaintext books and feed them into a csv 
import os, random, sys, csv 


#author name, book title, year, source, localfilename

authorlist=[
("J. B. Bury", "The Idea Of Progress", "1920", "http://www.gutenberg.org/cache/epub/4557/pg4557.txt", 'theideaofprogress.txt'),
("Maud Churton Braby", "Modern Marriage and How To Bear It", "1908", "https://www.gutenberg.org/files/31529/31529-0.txt", 'modernmarriageandhowtobearit.txt'),
("Harriet Martineau", "How to Observe Morals and Manners", "1838", "http://www.gutenberg.org/cache/epub/33944/pg33944.txt", 'howtoobservemoralsandmanners.txt'),
("Irwin Edman", "Human Traits and their Social Significance", "1920", "http://www.gutenberg.org/cache/epub/22306/pg22306.txt", 'humantraitsandtheirsocialsignificance.txt'),
("James Hayden Tufts", "The Ethics of Cooperation", "1918", "http://www.gutenberg.org/cache/epub/29508/pg29508.txt",'theethicsofcooperation.txt'),
("James Harvey Robinson", "The Mind in the Making: The Relation of Intelligence to Social Reform", "1921", "http://www.gutenberg.org/cache/epub/8077/pg8077.txt",'themindinthemaking.txt'),
("Helen Kendrick Johnson", "Woman And The Republic", "1897", "https://www.gutenberg.org/cache/epub/7300/pg7300.txt",'womanandtherepublic.txt'),
("Charles Darwin", "On the Origin of species", "1859", "http://www.gutenberg.org/cache/epub/1228/pg1228.txt", 'originofspecies.txt'),
("Emma Goldman", "Anarchism and other essays", "1910", "http://www.gutenberg.org/cache/epub/2162/pg2162.txt", 'anarchismandotheressays.txt'),
("John F. Hume","The Abolitionists (Together With Personal Memories Of The Struggle For Human Rights)","1830-1864","http://www.gutenberg.org/cache/epub/13176/pg13176.txt","theabolitionists.txt")
]

with open('gutenberg.csv', 'wb') as f:
    writser=csv.writer(f)
    for i in authorlist:
        print "working on", i
        filtered_list = []
        text = open(i[-1]).read().split('\r\n\r\n')
        for paragraph in text:
            paragraph = paragraph.replace('\r','').replace('\n','')
            if len(paragraph) > 1:
                filtered_list.append(paragraph)
        for a in range(0,35):
            print "paragraph", a, "of", i[-1]
            random_pick = random.choice(filtered_list)
            #test.append(random_pick)
            # id, source, subject, year, content
            writer.writerow(['',i[3],i[1],i[2],random_pick])
        print "\n"