Simple GitHub Issue Export using Python

We needed a way to export GitHub issues and comments to import into Jira.

This Python example easily exports these via the Rest api, and is quite simple to customize to suit your needs.

Create a Personal Access Token and use this in place of the one in line 10 (after the word "token" of course).

You'll need to pick through the GitHub Api if you want more things like attachments.

I'm also a big fan of the Google Advanced Rest Client to review the data.

Interesting coincident that M$ is in talks to buy GitHub...


import requests
import json
import csv

api_url = 'https://github.ibm.com/api/v3/'
repo = 'repos/CloudBI/reports'

headers = {
    'Accept': 'application/vnd.github.v3+json', 
    'Authorization': 'token c0ns1d3rg3tt1ingy0ur0wn'
}

# this will return the number pages of issues to return
def get_num_pages():
    r = requests.request("GET", api_url + repo, headers=headers)
    if r.status_code == 200:
        print 'Success'
    else:
        print 'Response:', r.content
    jdata = json.loads(r.text)
    num_issues = jdata["open_issues"]
    num_pages = num_issues/30
    return num_pages

# set up the comments header
with open('comments.txt', "wb") as myfile:
    wrtr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    # write the csv header
    wrtr.writerow(['ISSUE_NUM','COMMENT_ID','ASSIGNEE','COMMENT'])
    myfile.flush()

# set up the issue header
with open('issues.csv', 'wb') as myfile:
    wrtr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
    # write the csv header
    wrtr.writerow(['ISSUE_NUM', 'URL', 'REPORTER', 'TITLE', 'BODY', 'ASSIGNEE'])
    myfile.flush()

# get the user information
def get_user_email(user_name):
    r = requests.request("GET", api_url + 'users/' + str(user_name), headers=headers)
    if r.status_code != 200:
        print 'Response:', r.content
        return False
    user = json.loads(r.text)
    return user['email']

# get comments for an issue
def get_comments(issue_num, num_comments):
    r = requests.request("GET", api_url + repo + '/issues/' + str(issue_num) + '/comments', headers=headers)
    if r.status_code != 200:
        print 'Response:', r.content
        return False
    comments = json.loads(r.text)
    for i in xrange(0, num_comments):
        #print 'Id : {0}. User : {1}, Comment : {2}'.format(comments[i]['id'], get_user_email(comments[i]['user']['login']), comments[i]['body'])
        with open('comments.txt', 'a') as myfile:
            wrtr = csv.writer(myfile, quoting=csv.QUOTE_ALL) 
            wrtr.writerow([
                issue_num, 
                comments[i]['id'], 
                get_user_email(comments[i]['user']['login']), 
                comments[i]['body'].encode('utf-8').replace('"','').replace('\n','').replace('\r','')
            ])
    return True

# used for filtering pages
q = '/issues?filter=all&state=open&page='
 
# first get the number of pages (30 per page) 
num_pages = get_num_pages()


# Get the page
for i in xrange(0, num_pages):
    # get the first page of tickets
    print('Getting page range ' + str(i))
    r = requests.request("GET", api_url + repo + q + str(i), headers=headers)
    if r.status_code != 200:
        print 'Response:', r.content
        exit()
    page = json.loads(r.text)
    for i in xrange(0, 29):
        print 'Getting comments for : {0}'.format(page[i]['number'])
        res = get_comments(str(page[i]['number']), page[i]['comments'])
        with open('issues.csv', 'a') as myfile:
            wrtr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
            # write the csv header
            try:
                assignee_email = get_user_email(page[i]['assignee']['login'])
            except:
                assignee_email = ''

            try:
                reporter_email = get_user_email(page[i]['user']['login'])
            except:
                reporter_email = ''

                
            wrtr.writerow([
                page[i]['number'], 
                page[i]['url'], 
                reporter_email, 
                page[i]['title'].encode('utf-8').replace('"','').replace('\n','').replace('\r',''), 
                page[i]['body'].encode('utf-8').replace('"','').replace('\n','').replace('\r',''), 
                assignee_email
            ])
            myfile.flush()

Comments

Anonymous said…
Hi John, quick question. I'm running the script, which is awesome btw, but after running for a while and hitting like page 11, I'm getting the "list index out of range" error.

Any thoughts on what might be causing that?

Popular posts from this blog

NPI Search Redundancy

freeFTPD

Using ImageMagick and Tesseract to sort TIFFs on Windows