Commit 1b29dd7d authored by Mattia Bondanza's avatar Mattia Bondanza

First commit.

parents
import urlgrabber.grabber as urlgrab
import os
import OpenLibrary
outsc = open("dl.sh", 'w')
index = open( 'index_general.html', 'r')
in_content = index.read();
#print in_content
index.close()
found = in_content.find('ISBN: ')
start = found + 1
g = urlgrab.URLGrabber()
while( start != 0 ):
isbn = in_content[found + len('ISBN: '):found + len('ISBN: ')+17]
print 'Generating download script for ', isbn
stripped_isbn = isbn.replace('-', '')
metadata = OpenLibrary.metadataFromISBN( stripped_isbn )
print 'Author ' + metadata['author'] + ' Title ' + metadata['title']
outsc.write('mkdir '+stripped_isbn+'\ncd '+stripped_isbn+'\n')
g.urlgrab( 'http://pubs.rsc.org/en/content/ebook/'+isbn, 'index.html')
_index = open( 'index.html', 'r')
_in_content = _index.read();
#print in_content
_index.close()
_found = _in_content.find('<a class="btn btn--primary btn--tiny" href="')
_start = _found + 1
cnt = 0
while( _start != 0 ):
link = (_in_content[_found + len('<a class="btn btn--primary btn--tiny" href="'):].split('"'))[0]
if link[:len('/en/content/chapterpdf')] == '/en/content/chapterpdf':
outsc.write('wget \'http://pubs.rsc.org'+link+'\' -O '+str(cnt)+'.pdf\n');
cnt += 1
_found = _in_content.find('<a class="btn btn--primary btn--tiny" href="', _start )
_start = _found + 1
#Pdf unite
outsc.write('pdfunite ')
for i in range(cnt):
outsc.write(str(i)+'.pdf ')
outsc.write('../'+stripped_isbn+'.pdf\n ')
outsc.write('cd ..\n')
outsc.write('rm -r '+stripped_isbn+'\n')
os.remove('index.html')
found = in_content.find('ISBN: ', start )
start = found + 1
outsc.close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment