summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas ten Cate <ttencate@gmail.com>2011-03-13 18:31:27 +0100
committerThomas ten Cate <ttencate@gmail.com>2011-03-13 18:52:22 +0100
commitd290bf34a342a263126b60f7a2c228e9c9da91db (patch)
treed8b62b5de0589d429741a5bd400bd4d47096814e
parent52567d3d5c844ab0c82e0362bc92c326ec669400 (diff)
Fix encoding crash.
-rw-r--r--README38
-rwxr-xr-xissues.py12
2 files changed, 49 insertions, 1 deletions
diff --git a/README b/README
new file mode 100644
index 0000000..7a2fb8f
--- /dev/null
+++ b/README
@@ -0,0 +1,38 @@
+sf2github README
+================
+
+`sf2github` is a Python program that reads an XML export from a SourceForge project and pushes this data to GitHub via its REST API.
+
+The script is currently very incomplete and barely tested. If it works for you, great; if not, fix it up and send me a pull request! Currently, only migration of tracker issues is partly implemented, and there's no error handling.
+
+Also note that the GitHub API is quite slow, taking about 5 seconds per request on my machine and internet connection. Migration of a large project will take a while.
+
+Issue migration
+---------------
+
+What works (for me):
+
+* SF tracker issues become GitHub tracker issues.
+* Comments on SF become comments in GitHub.
+* Groups and categories on SF both become labels on GitHub.
+* Issues with a status that is exactly the text "Closed" or "Deleted" will be closed on GitHub.
+
+Limitations:
+
+* Only a single tracker is supported, though this could be easily fixed.
+* All issues and comments will be owned by the project's owner on GitHub, but mention the SF username of the original submitter.
+* There's some rubbish in the comment text sometimes (Logged In, user_id, Originator) but this is in the SF XML export.
+* There are encoding errors in the SF export of (at least) comments. Non-ASCII characters are encoded with UTF-8, then decoded (interpreted) as CP1252, and those code points gets encoded as XML entities. The script does not work around this. See also http://stackoverflow.com/questions/5291081/how-did-sourceforge-maim-this-unicode-character
+
+Usage
+-----
+
+Run the `issues.py` script and it will print instructions. Basically, if your SF XML export is in `foo.xml`, your GitHub username is `john` and your repository is `bar`:
+
+ ./issues.py foo.xml john/bar
+
+License
+-------
+
+This software is in the public domain. I accept no responsibility for any damage resulting from it. Use at your own risk.
+
diff --git a/issues.py b/issues.py
index 70b88c1..4b275f5 100755
--- a/issues.py
+++ b/issues.py
@@ -4,6 +4,7 @@ import sys
import optparse
parser = optparse.OptionParser(usage='Usage: %prog [options] sfexport.xml githubuser/repo')
+parser.add_option('-s', '--start', dest='start_id', action='store', help='id of first issue to import; useful for aborted runs')
opts, args = parser.parse_args()
try:
@@ -35,7 +36,10 @@ github_password = getpass('%s\'s GitHub password: ' % github_user)
def rest_call(before, after, data_dict=None):
url = 'https://github.com/api/v2/xml/%s/%s/%s' % (before, github_repo, after)
- data = urlencode(data_dict or {})
+ if data_dict is None:
+ data = None
+ else:
+ data = urlencode([(unicode(key).encode('utf-8'), unicode(value).encode('utf-8')) for key, value in data_dict.iteritems()])
headers = {
'Authorization': 'Basic %s' % b64encode('%s:%s' % (github_user, github_password)),
}
@@ -63,7 +67,13 @@ categories = {}
for category in tracker.categories('category', recursive=False):
categories[category.id.string] = category.category_name.string
+started = opts.start_id is None
for item in tracker.tracker_items('tracker_item', recursive=False):
+ if not started:
+ if item.id.string == opts.start_id:
+ started = True
+ else:
+ continue
title = item.summary.string
body = '\n\n'.join([
'Converted from [SourceForge issue %s](%s), submitted by %s' % (item.id.string, item.url.string, item.submitter.string),