' % type_name).encode(encoding))
if s1 != None and s1 != "":
- output_file.write('%d | ' % line1)
- output_file.write('')
- output_file.write(convert(s1, linesize=linesize, ponct=1))
+ output_file.write((' | %d | ' % line1).encode(encoding))
+ output_file.write(''.encode(encoding))
+ output_file.write(convert(s1, linesize=linesize, ponct=1).encode(encoding))
output_file.write(' | ')
else:
s1 = ""
output_file.write(' | ')
if s2 != None and s2 != "":
- output_file.write('%d | '%line2)
+ output_file.write(('%d | '%line2).encode(encoding))
output_file.write('')
- output_file.write(convert(s2, linesize=linesize, ponct=1))
+ output_file.write(convert(s2, linesize=linesize, ponct=1).encode(encoding))
output_file.write(' | ')
else:
s2 = ""
@@ -289,15 +383,16 @@ def empty_buffer(output_file):
buf = []
-def parse_input(input_file, output_file,
+def parse_input(input_file, output_file, input_file_name, output_file_name,
exclude_headers, show_hunk_infos):
global add_cpt, del_cpt
global line1, line2
global hunk_off1, hunk_size1, hunk_off2, hunk_size2
if not exclude_headers:
- output_file.write(html_hdr)
- output_file.write(table_hdr)
+ title_suffix = ' ' + input_file_name
+ output_file.write(html_hdr.format(title_suffix, encoding, desc, "", modified_date, lang).encode(encoding))
+ output_file.write(table_hdr.encode(encoding))
while True:
l = input_file.readline()
@@ -308,10 +403,12 @@ def parse_input(input_file, output_file,
if m:
empty_buffer(output_file)
file1 = m.groups()[0]
- l = input_file.readline()
- m = re.match('^\+\+\+ ([^\s]*)', l)
- if m:
- file2 = m.groups()[0]
+ while True:
+ l = input_file.readline()
+ m = re.match('^\+\+\+ ([^\s]*)', l)
+ if m:
+ file2 = m.groups()[0]
+ break
add_filename(file1, file2, output_file)
hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
continue
@@ -353,58 +450,66 @@ def parse_input(input_file, output_file,
add_comment(l, output_file)
empty_buffer(output_file)
- output_file.write(table_footer)
+ output_file.write(table_footer.encode(encoding))
if not exclude_headers:
- output_file.write(html_footer)
+ output_file.write(html_footer.format("", dtnow.strftime("%d.%m.%Y")).encode(encoding))
def usage():
print '''
-diff2html.py [-i file] [-o file] [-x]
+diff2html.py [-e encoding] [-i file] [-o file] [-x]
diff2html.py -h
Transform a unified diff from stdin to a colored side-by-side HTML
page on stdout.
+stdout may not work with UTF-8, instead use -o option.
-i file set input file, else use stdin
+ -e encoding set file encoding (default utf-8)
-o file set output file, else use stdout
-x exclude html header and footer
-t tabsize set tab size (default 8)
-l linesize set maximum line size is there is no word break (default 20)
-r show \\r characters
-k show hunk infos
+ -a algo line diff algorithm (0: linediff characters, 1: word, 2: simplediff characters) (default 0)
-h show help and exit
'''
def main():
global linesize, tabsize
global show_CR
+ global encoding
+ global algorithm
- input_file = sys.stdin
- output_file = sys.stdout
+ input_file_name = ''
+ output_file_name = ''
exclude_headers = False
show_hunk_infos = False
try:
- opts, args = getopt.getopt(sys.argv[1:], "hi:o:xt:l:rk",
- ["help", "input=", "output=",
+ opts, args = getopt.getopt(sys.argv[1:], "he:i:o:xt:l:rka:",
+ ["help", "encoding=", "input=", "output=",
"exclude-html-headers", "tabsize=",
- "linesize=", "show-cr", "show-hunk-infos"])
+ "linesize=", "show-cr", "show-hunk-infos", "algorithm="])
except getopt.GetoptError, err:
- print str(err) # will print something like "option -a not recognized"
+ print unicode(err) # will print something like "option -a not recognized"
usage()
sys.exit(2)
- output = None
verbose = False
for o, a in opts:
if o in ("-h", "--help"):
usage()
sys.exit()
+ elif o in ("-e", "--encoding"):
+ encoding = a
elif o in ("-i", "--input"):
- input_file = open(a, "r")
+ input_file = codecs.open(a, "r", encoding)
+ input_file_name = a
elif o in ("-o", "--output"):
- output_file = open(a, "w")
+ output_file = codecs.open(a, "w")
+ output_file_name = a
elif o in ("-x", "--exclude-html-headers"):
exclude_headers = True
elif o in ("-t", "--tabsize"):
@@ -415,11 +520,29 @@ def main():
show_CR = True
elif o in ("-k", "--show-hunk-infos"):
show_hunk_infos = True
+ elif o in ("-a", "--algorithm"):
+ algorithm = int(a)
else:
assert False, "unhandled option"
- parse_input(input_file, output_file,
+
+ # Use stdin if not input file is set
+ if not ('input_file' in locals()):
+ input_file = codecs.getreader(encoding)(sys.stdin)
+
+ # Use stdout if not output file is set
+ if not ('output_file' in locals()):
+ output_file = codecs.getwriter(encoding)(sys.stdout)
+
+ parse_input(input_file, output_file, input_file_name, output_file_name,
exclude_headers, show_hunk_infos)
+def parse_from_memory(txt, exclude_headers, show_hunk_infos):
+ " Parses diff from memory and returns a string with html "
+ input_stream = StringIO.StringIO(txt)
+ output_stream = StringIO.StringIO()
+ parse_input(input_stream, output_stream, '', '', exclude_headers, show_hunk_infos)
+ return output_stream.getvalue()
+
if __name__ == "__main__":
main()