X-Git-Url: http://git.droids-corp.org/?p=diff2html.git;a=blobdiff_plain;f=diff2html.py;h=71ac19a3343b450c0d07a1a2683531caa9167129;hp=6628a3622d47c82798b83b48585551a4aa1f21cb;hb=659c1a60bdf4fc73098bc97b48db70b68d82ad23;hpb=4f5410c869e253007baa8b5693c09cce4f7f99f5
diff --git a/diff2html.py b/diff2html.py
index 6628a36..71ac19a 100644
--- a/diff2html.py
+++ b/diff2html.py
@@ -1,4 +1,5 @@
#! /usr/bin/python
+# coding=utf-8
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -18,105 +19,128 @@
# Transform a unified diff from stdin to a colored
# side-by-side HTML page on stdout.
#
-# Authors: Olivier MATZ
+# Authors: Olivier Matz
# Alan De Smet
+# Sergey Satskiy
+# scito
#
# Inspired by diff2html.rb from Dave Burt
# (mainly for html theme)
#
# TODO:
# - The sane function currently mashes non-ASCII characters to "."
-# Instead be clever and convert to something like "xF0"
+# Instead be clever and convert to something like "xF0"
# (the hex value), and mark with a . Even more clever:
# Detect if the character is "printable" for whatever definition,
# and display those directly.
-
-import sys, re, htmlentitydefs, getopt
+import sys, re, htmlentitydefs, getopt, StringIO, codecs, datetime
+try:
+ from simplediff import diff, string_diff
+except ImportError:
+ sys.stderr.write("info: simplediff module not found, only linediff is available\n")
+ sys.stderr.write("info: it can be downloaded at https://github.com/paulgb/simplediff\n")
# minimum line size, we add a zero-sized breakable space every
# LINESIZE characters
linesize = 20
tabsize = 8
-inputfile = sys.stdin
-outputfile = sys.stdout
-exclude_headers = False
show_CR = False
-show_hunk_infos = False
-
-
-html_hdr="""
-
-
- HTML Diff
-
-
-
+encoding = "utf-8"
+lang = "en"
+algorithm = 0
+
+desc = "File comparison"
+dtnow = datetime.datetime.now()
+modified_date = "%s+01:00"%dtnow.isoformat()
+
+html_hdr = """
+
+
+
+
+
+ HTML Diff{0}
+
+
+
+
+
+
+
+
+
"""
-html_footer="""
+html_footer = """
+
"""
-table_hdr="""
+table_hdr = """
"""
-table_footer="""
+table_footer = """
"""
-DIFFON="\x01"
-DIFFOFF="\x02"
+DIFFON = "\x01"
+DIFFOFF = "\x02"
-buffer=[]
-add_cpt, del_cpt = 0,0
-line1, line2 = 0,0
-hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0,0,0,0
+buf = []
+add_cpt, del_cpt = 0, 0
+line1, line2 = 0, 0
+hunk_off1, hunk_size1, hunk_off2, hunk_size2 = 0, 0, 0, 0
# Characters we're willing to word wrap on
-WORDBREAK=" \t;.,/):"
+WORDBREAK = " \t;.,/):-"
def sane(x):
- r=""
+ r = ""
for i in x:
j = ord(i)
- if i not in ['\t', '\n'] and ((j < 32) or (j >= 127)):
- r=r+"."
+ if i not in ['\t', '\n'] and (j < 32):
+ r = r + "."
else:
- r=r+i
+ r = r + i
return r
def linediff(s, t):
+ '''
+ Original line diff algorithm of diff2html. It's character based.
+ '''
if len(s):
- s=str(reduce(lambda x,y:x+y, [ sane(c) for c in s ]))
+ s = unicode(reduce(lambda x, y:x+y, [ sane(c) for c in s ]))
if len(t):
- t=str(reduce(lambda x,y:x+y, [ sane(c) for c in t ]))
-
- m,n = len(s), len(t)
- d=[[(0,0) for i in range(n+1)] for i in range(m+1)]
- x=[[(0,0) for i in range(n+1)] for i in range(m+1)]
-
-
- d[0][0] = (0, (0,0))
+ t = unicode(reduce(lambda x, y:x+y, [ sane(c) for c in t ]))
+
+ m, n = len(s), len(t)
+ d = [[(0, 0) for i in range(n+1)] for i in range(m+1)]
+
+
+ d[0][0] = (0, (0, 0))
for i in range(m+1)[1:]:
- d[i][0] = (i,(i-1,0))
+ d[i][0] = (i,(i-1, 0))
for j in range(n+1)[1:]:
- d[0][j] = (j,(0,j-1))
+ d[0][j] = (j,(0, j-1))
for i in range(m+1)[1:]:
for j in range(n+1)[1:]:
@@ -124,34 +148,34 @@ def linediff(s, t):
cost = 0
else:
cost = 1
- d[i][j] = min((d[i-1][j][0] + 1, (i-1,j)),
- (d[i][j-1][0] + 1, (i,j-1)),
- (d[i-1][j-1][0] + cost, (i-1,j-1)))
-
- l=[]
- coord = (m,n)
- while coord != (0,0):
+ d[i][j] = min((d[i-1][j][0] + 1, (i-1, j)),
+ (d[i][j-1][0] + 1, (i, j-1)),
+ (d[i-1][j-1][0] + cost, (i-1, j-1)))
+
+ l = []
+ coord = (m, n)
+ while coord != (0, 0):
l.insert(0, coord)
- x,y = coord
+ x, y = coord
coord = d[x][y][1]
l1 = []
l2 = []
for coord in l:
- cx,cy = coord
+ cx, cy = coord
child_val = d[cx][cy][0]
-
+
father_coord = d[cx][cy][1]
- fx,fy = father_coord
+ fx, fy = father_coord
father_val = d[fx][fy][0]
diff = (cx-fx, cy-fy)
- if diff == (0,1):
+ if diff == (0, 1):
l1.append("")
l2.append(DIFFON + t[fy] + DIFFOFF)
- elif diff == (1,0):
+ elif diff == (1, 0):
l1.append(DIFFON + s[fx] + DIFFOFF)
l2.append("")
elif child_val-father_val == 1:
@@ -161,251 +185,331 @@ def linediff(s, t):
l1.append(s[fx])
l2.append(t[fy])
- r1,r2 = (reduce(lambda x,y:x+y, l1), reduce(lambda x,y:x+y, l2))
- return r1,r2
+ r1, r2 = (reduce(lambda x, y:x+y, l1), reduce(lambda x, y:x+y, l2))
+ return r1, r2
+
+
+def diff_changed(old, new):
+ '''
+ Returns the differences basend on characters between two strings
+ wrapped with DIFFON and DIFFOFF using `diff`.
+ '''
+ con = {'=': (lambda x: x),
+ '+': (lambda x: DIFFON + x + DIFFOFF),
+ '-': (lambda x: '')}
+ return "".join([(con[a])("".join(b)) for a, b in diff(old, new)])
+
+
+def diff_changed_ts(old, new):
+ '''
+ Returns a tuple for a two sided comparison based on characters, see `diff_changed`.
+ '''
+ return (diff_changed(new, old), diff_changed(old, new))
+
+
+def word_diff(old, new):
+ '''
+ Returns the difference between the old and new strings based on words. Punctuation is not part of the word.
+
+ Params:
+ old the old string
+ new the new string
+
+ Returns:
+ the output of `diff` on the two strings after splitting them
+ on whitespace (a list of change instructions; see the docstring
+ of `diff`)
+ '''
+ separator_pattern = '(\W+)';
+ return diff(re.split(separator_pattern, old, flags=re.UNICODE), re.split(separator_pattern, new, flags=re.UNICODE))
+
+
+def diff_changed_words(old, new):
+ '''
+ Returns the difference between two strings based on words (see `word_diff`)
+ wrapped with DIFFON and DIFFOFF.
+
+ Returns:
+ the output of the diff expressed delimited with DIFFON and DIFFOFF.
+ '''
+ con = {'=': (lambda x: x),
+ '+': (lambda x: DIFFON + x + DIFFOFF),
+ '-': (lambda x: '')}
+ return "".join([(con[a])("".join(b)) for a, b in word_diff(old, new)])
+
+
+def diff_changed_words_ts(old, new):
+ '''
+ Returns a tuple for a two sided comparison based on words, see `diff_changed_words`.
+ '''
+ return (diff_changed_words(new, old), diff_changed_words(old, new))
def convert(s, linesize=0, ponct=0):
- i=0
- t=""
- l=[]
+ i = 0
+ t = u""
for c in s:
# used by diffs
- if c==DIFFON:
- t += ''
- elif c==DIFFOFF:
- t += ""
+ if c == DIFFON:
+ t += u''
+ elif c == DIFFOFF:
+ t += u""
# special html chars
elif htmlentitydefs.codepoint2name.has_key(ord(c)):
- t += "&%s;"%(htmlentitydefs.codepoint2name[ord(c)])
+ t += u"&%s;" % (htmlentitydefs.codepoint2name[ord(c)])
i += 1
# special highlighted chars
- elif c=="\t" and ponct==1:
+ elif c == "\t" and ponct == 1:
n = tabsize-(i%tabsize)
- if n==0:
- n=tabsize
- t += ('»'+' '*(n-1))
- elif c==" " and ponct==1:
- t += '·'
- elif c=="\n" and ponct==1:
+ if n == 0:
+ n = tabsize
+ t += (u'»'+' '*(n-1))
+ elif c == " " and ponct == 1:
+ t += u'·'
+ elif c == "\n" and ponct == 1:
if show_CR:
- t += '\'
+ t += u'\'
else:
t += c
i += 1
- if linesize and (WORDBREAK.count(c)==1):
- t += ''
- i=0
- if linesize and i>linesize:
- i=0
- t += ""
-
+ if linesize and (WORDBREAK.count(c) == 1):
+ t += u''
+ i = 0
+ if linesize and i > linesize:
+ i = 0
+ t += u""
+
return t
-def add_comment(s):
- outputfile.write('
\n')
+ output_file.write('\n')
- if s1!="":
+ if s1 != "":
line1 += 1
- if s2!="":
+ if s2 != "":
line2 += 1
-def empty_buffer():
- global buffer
+def empty_buffer(output_file):
+ global buf
global add_cpt
global del_cpt
if del_cpt == 0 or add_cpt == 0:
- for l in buffer:
- add_line(l[0], l[1])
+ for l in buf:
+ add_line(l[0], l[1], output_file)
elif del_cpt != 0 and add_cpt != 0:
l0, l1 = [], []
- for l in buffer:
+ for l in buf:
if l[0] != None:
l0.append(l[0])
if l[1] != None:
l1.append(l[1])
- max = (len(l0) > len(l1)) and len(l0) or len(l1)
- for i in range(max):
+ max_len = (len(l0) > len(l1)) and len(l0) or len(l1)
+ for i in range(max_len):
s0, s1 = "", ""
- if i