From d290bf34a342a263126b60f7a2c228e9c9da91db Mon Sep 17 00:00:00 2001 From: Thomas ten Cate Date: Sun, 13 Mar 2011 18:31:27 +0100 Subject: Fix encoding crash. --- README | 38 ++++++++++++++++++++++++++++++++++++++ issues.py | 12 +++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 README diff --git a/README b/README new file mode 100644 index 0000000..7a2fb8f --- /dev/null +++ b/README @@ -0,0 +1,38 @@ +sf2github README +================ + +`sf2github` is a Python program that reads an XML export from a SourceForge project and pushes this data to GitHub via its REST API. + +The script is currently very incomplete and barely tested. If it works for you, great; if not, fix it up and send me a pull request! Currently, only migration of tracker issues is partly implemented, and there's no error handling. + +Also note that the GitHub API is quite slow, taking about 5 seconds per request on my machine and internet connection. Migration of a large project will take a while. + +Issue migration +--------------- + +What works (for me): + +* SF tracker issues become GitHub tracker issues. +* Comments on SF become comments in GitHub. +* Groups and categories on SF both become labels on GitHub. +* Issues with a status that is exactly the text "Closed" or "Deleted" will be closed on GitHub. + +Limitations: + +* Only a single tracker is supported, though this could be easily fixed. +* All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter. +* There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export. +* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character + +Usage +----- + +Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`: + + ./issues.py foo.xml john/bar + +License +------- + +This software is in the public domain. I accept no responsibility for any damage resulting from it. Use at your own risk. + diff --git a/issues.py b/issues.py index 70b88c1..4b275f5 100755 --- a/issues.py +++ b/issues.py @@ -4,6 +4,7 @@ import sys import optparse parser = optparse.OptionParser(usage='Usage: %prog [options] sfexport.xml githubuser/repo') +parser.add_option('-s', '--start', dest='start_id', action='store', help='id of first issue to import; useful for aborted runs') opts, args = parser.parse_args() try: @@ -35,7 +36,10 @@ github_password = getpass('%s\'s GitHub password: ' % github_user) def rest_call(before, after, data_dict=None): url = 'https://github.com/api/v2/xml/%s/%s/%s' % (before, github_repo, after) - data = urlencode(data_dict or {}) + if data_dict is None: + data = None + else: + data = urlencode([(unicode(key).encode('utf-8'), unicode(value).encode('utf-8')) for key, value in data_dict.iteritems()]) headers = { 'Authorization': 'Basic %s' % b64encode('%s:%s' % (github_user, github_password)), } @@ -63,7 +67,13 @@ categories = {} for category in tracker.categories('category', recursive=False): categories[category.id.string] = category.category_name.string +started = opts.start_id is None for item in tracker.tracker_items('tracker_item', recursive=False): + if not started: + if item.id.string == opts.start_id: + started = True + else: + continue title = item.summary.string body = '\n\n'.join([ 'Converted from [SourceForge issue %s](%s), submitted by %s' % (item.id.string, item.url.string, item.submitter.string), -- cgit v1.2.1