This repository was archived by the owner on Dec 4, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
executable file
·147 lines (121 loc) · 5.76 KB
/
Copy pathmain.py
File metadata and controls
executable file
·147 lines (121 loc) · 5.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
import argparse
import os, sys
import libficconvert as lfc
# ## command line argument management:
def outputpath_arg(string):
"""output path. format: (`DIR or FILE`, directory, file (if provided), overwrite warning(bool) )"""
if os.path.isdir(string):
return ["DIR" , os.path.abspath(string), '', False]
elif os.path.isdir(os.path.split(string)[0]):
path, file = os.path.split(os.path.abspath(string))
ovrw_warning = os.path.isfile(string)
return ["FILE", path, file, ovrw_warning]
else:
raise argparse.ArgumentTypeError("invalid output path. must be a existing dir or a proper file")
def ext_format(string):
if string.lower() in ('epub', 'html', 'mobi', 'azw3'):
return string.lower()
else:
raise argparse.ArgumentTypeError("invalid extension. must be 'epub', 'html', 'mobi' or 'azw3'")
def parse():
parser = argparse.ArgumentParser() # prog='myprogram'
parser.add_argument('input', type=lfc.url_checker, help="the url of the original AO3 fic"
"(tip: recommended formats are `https://archiveofourown.org/works/12345678` and `12345678`)")
parser.add_argument('output', type=outputpath_arg, help="output path. Can be either an existing directory or a full file path"
"(within an existing directory). The file name will be generated automatically if it is the former.")
parser.add_argument('-W', '--overwrite', help="do allow overwriting output file", default=False, action='store_true')
parser.add_argument('-f', '--format', type=ext_format, help="choose the output format (overridden by a possible extension in the output filepath)", default='epub')
parser.add_argument('-e', '--embed', help="choose to embed images into the final document", default=False, action='store_true')
parser.add_argument('-A', '--adult', help='do confirm you are an adult now. if you don\'t, you might be prompted to do so later.', default=False, action='store_true')
#parser.add_argument('-f', '--filling', type=int, help="filling for file numbering", default=5)
#parser.add_argument('-d', '--directory', help="diretory where files shall be created", default='./TEST')
args = parser.parse_args()
if args.output[0]=='FILE' and args.output[2][-4:] in ('epub', 'html', 'mobi', 'azw3'):
args.format = args.output[2][-4:]
elif args.output[0]=='FILE':
args.output[2] += '.' + args.format
args.output[3] = os.path.isfile( os.path.join( args.output[1], args.output[2] ) )
if (not args.overwrite) and args.output[3]:
print("Error: file \"" + os.path.join(args.output[1], args.output[2]) + "\" already exists and overwriting is disabled. Use -W to enable.", file=sys.stderr)
exit(1)
print(args)
return args
def main(args):
address, page = lfc.getwebpage(args.input)
tempdir_obj = lfc.TDir()
tempdir = tempdir_obj.name
page = lfc.makeseekable(page)
# if the work contains adult content, be sure of user consent
if lfc.scanforagebarrier(page):
page.close()
if not args.adult:
print('This work could have adult content. If you proceed you have agreed that you are willing to see such content.', file=sys.stderr)
try:
adult = input('proceed? (y/n) > ')
if adult.lower() in ('y', 'yes', 'confirm', 'proceed'):
pass
else:
raise KeyboardInterrupt()
except KeyboardInterrupt:
return
address, page = lfc.getwebpage(address, '?view_adult=true')
page = lfc.makeseekable(page)
down_url = lfc.getdownloadurl(page)
_, down_page = lfc.getwebpage(down_url, base_site='download.archiveofourown.org')
page.seek(0)
style = lfc.getstylefrompage(page)
style = lfc.style_parser(style)
if not args.output[2]: # if no filename was provided
page.seek(0)
temp_title, temp_author = lfc.getmetafrompage(page)
args.output[2] = '{}-{}_by_{}.{}'.format(args.input[7:], temp_title, temp_author, args.format)
if args.format == 'html':
mainfile_path = os.path.join(args.output[1], args.output[2])
else:
mainfile_path = os.path.join(tempdir, 'main.html')
pagedump = open(mainfile_path, 'bw')
lfc.filedump(down_page, pagedump)
page.close()
pagedump.close()
mainfile = open(mainfile_path, 'r+b') # use binary because io_insert has a problem with text files
# todo: include images if asked
if args.format == 'html':
lfc.add_style_to_final_html(mainfile, style)
mainfile.close()
else:
lfc.add_chapter_boundaries(mainfile)
if args.embed:
mainfile.seek(0)
lfc.parse_images(mainfile, tempdir)
mainfile.close()
lfc.localconvert(mainfile_path, os.path.join(args.output[1], args.output[2]), style)
if __name__=='__main__':
try:
main(parse())
except Exception as err:
print(repr(err), file=sys.stderr)
print("\noops, something went wrong. The error is right here ^^^. Press enter for more details", file=sys.stderr)
input() # show any error that could have happened
raise
"""
try: # TODO: options?
#raise ValueError('fbpsr')
# argv: [pyfile, ...] in every case
print(sys.argv)
if len(sys.argv)==1:
print(__doc__)
else:
if argv[1][-4:] == 'html':
fix_html(argv[1])
elif argv[1][-4:] == 'epub':
fix_epub(argv[1])
else:
print(__doc__)
input('[Press enter to close]')
except Exception as err:
print(repr(err))
print("\noops, something went wrong. The error is right here ^^^. Press enter for more details")
input() # show any error that could have happened
raise
"""