summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas ten Cate <ttencate@gmail.com>2011-05-13 08:31:18 -0700
committerThomas ten Cate <ttencate@gmail.com>2011-05-13 08:31:18 -0700
commit7b8a052d05e0210052dfcd69bb06dee5742d77a5 (patch)
treefceee39e102a8e46a220e055a8ca9293a3b623bb
parentad7f699e39fb8474eb6405e821e0ec2bedeab12f (diff)
parent441437721ff75cb0634c15b1fd6b482a6a1153e9 (diff)
Merge pull request #1 from albertz/master
multiple trackers. more safety. more output
-rw-r--r--.gitignore1
-rw-r--r--README43
-rw-r--r--README.md10
-rw-r--r--better_exchook.py211
-rwxr-xr-xissues.py131
5 files changed, 335 insertions, 61 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0d20b64
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.pyc
diff --git a/README b/README
deleted file mode 100644
index 4211bca..0000000
--- a/README
+++ /dev/null
@@ -1,43 +0,0 @@
-sf2github README
-================
-
-`sf2github` is a Python program that reads an XML export from a SourceForge project and pushes this data to GitHub via its REST API.
-
-The script is currently very incomplete and barely tested. If it works for you, great; if not, fix it up and send me a pull request! Currently, only migration of tracker issues is partly implemented, and there's no error handling.
-
-Also note that the GitHub API is quite slow, taking about 5 seconds per request on my machine and internet connection. Migration of a large project will take a while.
-
-Issue migration
----------------
-
-What works (for me):
-
-* SF tracker issues become GitHub tracker issues.
-* Comments on SF become comments in GitHub.
-* Groups and categories on SF both become labels on GitHub.
-* Issues with a status that is exactly the text "Closed" or "Deleted" will be closed on GitHub.
-
-Limitations:
-
-* Only a single tracker is supported, though this could be easily fixed.
-* All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter.
-* There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export.
-* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character
-
-Code migration
---------------
-
-This script doesn't help you to migrate code from SF's Subversion to GitHub. However, I found the following page helpful in doing that: http://help.github.com/svn-importing/
-
-Usage
------
-
-Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`:
-
- ./issues.py foo.xml john/bar
-
-License
--------
-
-This software is in the public domain. I accept no responsibility for any damage resulting from it. Use at your own risk.
-
diff --git a/README.md b/README.md
index 28998a6..ec1173e 100644
--- a/README.md
+++ b/README.md
@@ -22,11 +22,19 @@ Limitations:
* Only a single tracker is supported, though this could be easily fixed.
* All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter.
* There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export.
+* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character
+
+Code migration
+--------------
+
+This script doesn't help you to migrate code from SF's Subversion to GitHub. However, I found the following page helpful in doing that: http://help.github.com/svn-importing/
Usage
-----
-Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`:
+From SourceForge, you need to export the tracker data in XML. Read [here](https://sourceforge.net/apps/trac/sourceforge/wiki/XML%20export) for instructions.
+
+Run the `issues.py` script and it will print further instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`:
./issues.py foo.xml john/bar
diff --git a/better_exchook.py b/better_exchook.py
new file mode 100644
index 0000000..083cbc2
--- /dev/null
+++ b/better_exchook.py
@@ -0,0 +1,211 @@
+
+# by Albert Zeyer, www.az2000.de
+# code under GPLv3+
+# 2011-04-15
+
+# This is a simple replacement for the standard Python exception handler (sys.excepthook).
+# In addition to what the standard handler does, it also prints all referenced variables
+# (no matter if local, global or builtin) of the code line of each stack frame.
+# See below for some examples and some example output.
+
+import sys
+
+def parse_py_statement(line):
+ state = 0
+ curtoken = ""
+ spaces = " \t\n"
+ ops = ".,;:+-*/%&=(){}[]^<>"
+ i = 0
+ def _escape_char(c):
+ if c == "n": return "\n"
+ elif c == "t": return "\t"
+ else: return c
+ while i < len(line):
+ c = line[i]
+ i += 1
+ if state == 0:
+ if c in spaces: pass
+ elif c in ops: yield ("op", c)
+ elif c == "#": state = 6
+ elif c == "\"": state = 1
+ elif c == "'": state = 2
+ else:
+ curtoken = c
+ state = 3
+ elif state == 1: # string via "
+ if c == "\\": state = 4
+ elif c == "\"":
+ yield ("str", curtoken)
+ curtoken = ""
+ state = 0
+ else: curtoken += c
+ elif state == 2: # string via '
+ if c == "\\": state = 5
+ elif c == "'":
+ yield ("str", curtoken)
+ curtoken = ""
+ state = 0
+ else: curtoken += c
+ elif state == 3: # identifier
+ if c in spaces + ops + "#\"'":
+ yield ("id", curtoken)
+ curtoken = ""
+ state = 0
+ i -= 1
+ else: curtoken += c
+ elif state == 4: # escape in "
+ curtoken += _escape_char(c)
+ state = 1
+ elif state == 5: # escape in '
+ curtoken += _escape_char(c)
+ state = 2
+ elif state == 6: # comment
+ curtoken += c
+ if state == 3: yield ("id", curtoken)
+ elif state == 6: yield ("comment", curtoken)
+
+def grep_full_py_identifiers(tokens):
+ tokens = list(tokens)
+ i = 0
+ pykeywords = set(["for","in","while","print","continue","break","if","else","elif","yield","def","class","try","except","import","pass","lambda"])
+ while i < len(tokens):
+ tokentype, token = tokens[i]
+ i += 1
+ if tokentype != "id": continue
+ while i+1 < len(tokens) and tokens[i] == ("op", ".") and tokens[i+1][0] == "id":
+ token += "." + tokens[i+1][1]
+ i += 2
+ if token == "": continue
+ if token in pykeywords: continue
+ if token[0] in ".0123456789": continue
+ yield token
+
+
+def output_limit():
+ return 300
+
+def output(s):
+ limit = output_limit()
+ if len(s) > limit:
+ s = s[:limit - 3] + "..."
+ sys.stderr.write(s)
+ sys.stderr.write("\n")
+ sys.stderr.flush()
+
+def debug_shell(user_ns, user_global_ns):
+ from IPython.Shell import IPShellEmbed,IPShell
+ ipshell = IPShell(argv=[], user_ns=user_ns, user_global_ns=user_global_ns)
+ #ipshell()
+ ipshell.mainloop()
+
+
+def better_exchook(etype, value, tb):
+ output("EXCEPTION")
+ output('Traceback (most recent call last):')
+ topFrameLocals,topFrameGlobals = None,None
+ try:
+ import linecache
+ limit = None
+ if hasattr(sys, 'tracebacklimit'):
+ limit = sys.tracebacklimit
+ n = 0
+ _tb = tb
+ def _resolveIdentifier(namespace, id):
+ obj = namespace[id[0]]
+ for part in id[1:]:
+ obj = getattr(obj, part)
+ return obj
+ def _trySet(old, func):
+ if old is not None: return old
+ try: return func()
+ except: return old
+ while _tb is not None and (limit is None or n < limit):
+ f = _tb.tb_frame
+ topFrameLocals,topFrameGlobals = f.f_locals,f.f_globals
+ lineno = _tb.tb_lineno
+ co = f.f_code
+ filename = co.co_filename
+ name = co.co_name
+ output(' File "%s", line %d, in %s' % (filename,lineno,name))
+ linecache.checkcache(filename)
+ line = linecache.getline(filename, lineno, f.f_globals)
+ if line:
+ line = line.strip()
+ output(' line: ' + line)
+ output(' locals:')
+ alreadyPrintedLocals = set()
+ for tokenstr in grep_full_py_identifiers(parse_py_statement(line)):
+ splittedtoken = tuple(tokenstr.split("."))
+ for token in map(lambda i: splittedtoken[0:i], range(1, len(splittedtoken) + 1)):
+ if token in alreadyPrintedLocals: continue
+ tokenvalue = None
+ tokenvalue = _trySet(tokenvalue, lambda: "<local> " + repr(_resolveIdentifier(f.f_locals, token)))
+ tokenvalue = _trySet(tokenvalue, lambda: "<global> " + repr(_resolveIdentifier(f.f_globals, token)))
+ tokenvalue = _trySet(tokenvalue, lambda: "<builtin> " + repr(_resolveIdentifier(f.f_builtins, token)))
+ tokenvalue = tokenvalue or "<not found>"
+ output(' ' + ".".join(token) + " = " + tokenvalue)
+ alreadyPrintedLocals.add(token)
+ if len(alreadyPrintedLocals) == 0: output(" no locals")
+ _tb = _tb.tb_next
+ n += 1
+
+ except Exception, e:
+ output("ERROR: cannot get more detailed exception info because:")
+ import traceback
+ for l in traceback.format_exc().split("\n"): output(" " + l)
+ output("simple traceback:")
+ traceback.print_tb(tb)
+
+ import types
+ def _some_str(value):
+ try: return str(value)
+ except: return '<unprintable %s object>' % type(value).__name__
+ def _format_final_exc_line(etype, value):
+ valuestr = _some_str(value)
+ if value is None or not valuestr:
+ line = "%s" % etype
+ else:
+ line = "%s: %s" % (etype, valuestr)
+ return line
+ if (isinstance(etype, BaseException) or
+ isinstance(etype, types.InstanceType) or
+ etype is None or type(etype) is str):
+ output(_format_final_exc_line(etype, value))
+ else:
+ output(_format_final_exc_line(etype.__name__, value))
+
+ debug = False
+ try:
+ import os
+ debug = int(os.environ["DEBUG"]) != 0
+ except: pass
+ if debug:
+ output("---------- DEBUG SHELL -----------")
+ debug_shell(user_ns=topFrameLocals, user_global_ns=topFrameGlobals)
+
+def install():
+ sys.excepthook = better_exchook
+
+if __name__ == "__main__":
+ # some examples
+ # this code produces this output: https://gist.github.com/922622
+
+ try:
+ x = {1:2, "a":"b"}
+ def f():
+ y = "foo"
+ x, 42, sys.stdin.__class__, sys.exc_info, y, z
+ f()
+ except:
+ better_exchook(*sys.exc_info())
+
+ try:
+ f = lambda x: None
+ f(x, y)
+ except:
+ better_exchook(*sys.exc_info())
+
+ # use this to overwrite the global exception handler
+ sys.excepthook = better_exchook
+ # and fail
+ finalfail(sys)
diff --git a/issues.py b/issues.py
index 4b275f5..fdc2300 100755
--- a/issues.py
+++ b/issues.py
@@ -1,5 +1,8 @@
#!/usr/bin/env python
+import better_exchook
+better_exchook.install()
+
import sys
import optparse
@@ -20,10 +23,6 @@ print 'Parsing XML export...'
soup = BeautifulStoneSoup(open(xml_file_name, 'r'), convertEntities=BeautifulStoneSoup.ALL_ENTITIES)
trackers = soup.document.find('trackers', recursive=False).findAll('tracker', recursive=False)
-if len(trackers) > 1:
- print 'Multiple trackers not yet supported, sorry'
- sys.exit(1)
-tracker = trackers[0]
from urllib import urlencode
from urllib2 import Request, urlopen
@@ -32,9 +31,8 @@ from time import sleep
from getpass import getpass
import re
-github_password = getpass('%s\'s GitHub password: ' % github_user)
-
def rest_call(before, after, data_dict=None):
+ global github_user, github_password
url = 'https://github.com/api/v2/xml/%s/%s/%s' % (before, github_repo, after)
if data_dict is None:
data = None
@@ -52,29 +50,39 @@ def rest_call(before, after, data_dict=None):
def labelify(string):
return re.sub(r'[^a-z0-9._-]+', '-', string.lower())
-closed_status_ids = []
-for status in tracker.statuses('status', recursive=False):
- status_id = status.id.string
- status_name = status.nameTag.string
- if status_name in ['Closed', 'Deleted']:
- closed_status_ids.append(status_id)
+closed_status_ids = set()
+for tracker in trackers:
+ for status in tracker.statuses('status', recursive=False):
+ status_id = status.id.string
+ status_name = status.nameTag.string
+ if status_name in ['Closed', 'Deleted']:
+ closed_status_ids.add(status_id)
+print "closed_status_ids:", closed_status_ids
groups = {}
-for group in tracker.groups('group', recursive=False):
- groups[group.id.string] = group.group_name.string
+for tracker in trackers:
+ for group in tracker.groups('group', recursive=False):
+ groups[group.id.string] = group.group_name.string
+print "groups:", groups
categories = {}
for category in tracker.categories('category', recursive=False):
categories[category.id.string] = category.category_name.string
+print "categories:", categories
started = opts.start_id is None
-for item in tracker.tracker_items('tracker_item', recursive=False):
+def handle_tracker_item(item, issue_title_prefix):
+ global started
if not started:
if item.id.string == opts.start_id:
started = True
else:
- continue
- title = item.summary.string
+ return
+
+ if len(issue_title_prefix) > 0:
+ issue_title_prefix = issue_title_prefix.strip() + " "
+
+ title = issue_title_prefix + item.summary.string
body = '\n\n'.join([
'Converted from [SourceForge issue %s](%s), submitted by %s' % (item.id.string, item.url.string, item.submitter.string),
item.details.string,
@@ -111,3 +119,92 @@ for item in tracker.tracker_items('tracker_item', recursive=False):
print 'Closing...'
rest_call('issues/close', number)
+
+import signal
+def signal_handler(signal, frame):
+ print 'You pressed Ctrl+C!'
+ import sys
+ sys.exit(0)
+signal.signal(signal.SIGINT, signal_handler)
+
+import readline
+readline.parse_and_bind("tab: complete")
+readline.parse_and_bind("set show-all-if-ambiguous on")
+
+class Completer:
+ def __init__(self, words):
+ self.words = words
+ self.prefix = None
+
+ def complete(self, prefix, index):
+ if prefix != self.prefix:
+ self.matching_words = [w for w in self.words if w.startswith(prefix)]
+ self.prefix = prefix
+ else:
+ pass
+ try:
+ return self.matching_words[index]
+ except IndexError:
+ return None
+
+def userRawInput(prompt):
+ readline.set_completer(None)
+ s = raw_input(prompt)
+ return s
+
+def userInput(words, prompt=""):
+ readline.set_completer(Completer(words).complete)
+ while True:
+ s = raw_input((prompt + " ").lstrip() + "Choice of [" + ", ".join(words) + "] ? ")
+ if s in words: return s
+ print "Error: '" + s + "' unknown, please try again"
+
+def userVerify(txt, abortOnFail=True):
+ if userInput(["yes","no"], txt) != 'yes':
+ if abortOnFail:
+ print "Aborted."
+ sys.exit(1)
+ return False
+ return True
+
+def getIssueTitlePrefix(trackername):
+ prefixes = {
+ "Bug": "",
+ "Feature Request": "[Feature]",
+ "Patch": "[Patch]",
+ "Tech Support": "[Support]"
+ }
+ if trackername in prefixes:
+ return prefixes[trackername]
+
+ prefix = "[" + trackername + "]"
+ if not userVerify("Tracker '" + trackername + "' is unknown,"
+ + "I would use the prefix '" + prefix + "', ok?", False):
+
+ while True:
+ prefix = userRawInput("Please enter a prefix: ")
+ if userVerify("Is prefix '" + prefix + "' ok?"):
+ break
+ return prefix
+
+items = []
+for tracker in trackers:
+ trackeritems = tracker.tracker_items('tracker_item', recursive=False)
+ trackername = tracker.description.string
+ print "Found tracker:", trackername, ",", len(trackeritems), "items"
+ trackername = trackername.replace("Tracking System", "")
+ trackername = trackername.strip()
+
+ issue_title_prefix = None
+ for item in trackeritems:
+ if issue_title_prefix is None:
+ issue_title_prefix = getIssueTitlePrefix(trackername)
+ items.append((item, issue_title_prefix))
+
+print "Found", len(items), "items in", len(trackers), "trackers."
+
+userVerify("Everything ok, should I really start?")
+github_password = getpass('%s\'s GitHub password: ' % github_user)
+for item, issue_title_prefix in items:
+ handle_tracker_item(item, issue_title_prefix)
+