[CentOS-devel] [PATCH] Update centos.git.repolist.py to use pagure api

Tue Apr 9 17:59:57 UTC 2019
Pat Riehecky <riehecky at fnal.gov>

---
 centos.git.repolist.py | 142 +++++++++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 51 deletions(-)

diff --git a/centos.git.repolist.py b/centos.git.repolist.py
index e135ca7..3485a38 100755
--- a/centos.git.repolist.py
+++ b/centos.git.repolist.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+#pylint: disable=line-too-long
 #
 #  License: GPLv3
 #
@@ -6,73 +7,112 @@
 #         Updates:
 #                  Pat Riehecky <riehecky at fnal.gov>
 #
-'''Get list of repos from gitblit RPC, to grab CentOS sources'''
+'''Get list of repos from pagure, to grab CentOS sources'''
 
-import optparse
-import requests
-import simplejson as json
+# for python3 compat
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from __future__ import print_function
+
+import logging
 import sys
+import json
+import textwrap
+import time
 
-RPCURL = "https://git.centos.org/rpc/?req=LIST_REPOSITORIES"
+sys.setrecursionlimit(500)
 
-def read_args():
-    '''
-        read in the command line args and set things up
-    '''
+try:
+    from argparse import ArgumentParser
+except ImportError:  # pragma: no cover
+    print("Please install argparse - rpm: python-argparse", file=sys.stderr)
+    raise
 
-    desc = '''Get list of git repositories from the GitBlit json RPC
-           '''
+try:
+    import requests
+except ImportError:  # pragma: no cover
+    print("Please install requests - rpm: python-requests", file=sys.stderr)
+    raise
 
-    usage = "usage: %prog [options] "
-    parser = optparse.OptionParser(usage=usage, description=desc)
-    parser.add_option('-p', '--project', metavar="<PROJECTS>",
-                      help='''project path (default 'rpms', could be 'all', 'core-sig'...)''',
-                      default='rpms')
 
-    parser.add_option('-b', '--branch', metavar="<branch name>",
-                      help='Only list repos with this branch (default master)',
-                      default = 'master')
+def setup_args():
+    '''
+        Setup the argparse object.
 
-    parser.add_option('-u', '--url', metavar="<URL>",
-                      help='URL to check (default %s)' % (RPCURL),
-                      default = RPCURL)
+        Make sure all fields have defaults so we could use this as an object
+    '''
+    parser = ArgumentParser(description=textwrap.dedent(__doc__))
+
+    parser.add_argument('--debug', action='store_true', default=False,
+                        help='Print debugging information')
+    parser.add_argument('--hostname', default='git.centos.org',
+                        type=str, help='What host should we query?')
+    parser.add_argument('--apiver', default='0',
+                        type=str, help='What api version is the host?')
+    parser.add_argument('--namespace', default='rpms',
+                        type=str, help='What project namespace?')
+    parser.add_argument('--show-forks', action='store_true', default=False,
+                        help='Should we also show project forks?')
+
+    return parser
+
+def run_query(hostname, api, namespace, forks):
+    '''
+        Actually call the API version
+    '''
+    list_of_urls = []
+    if str(api) == '0':
+        query = 'https://{hostname}/api/0/projects?per_page=50&namespace={namespace}'.format(hostname=hostname, namespace=namespace)
+        if forks:
+            query = query + '&forks=1'
+        else:
+            query = query + '&forks=0'
 
-    (options, args) = parser.parse_args()
-    return options
+        fetch_prefix = 'https://{hostname}/'.format(hostname=hostname)
 
-def get_repo_list(url, branch, projectpath):
-    '''return a list of repo URLs'''
+        fetch_next_v0(query, fetch_prefix, list_of_urls)
+    else:
+        raise NotImplementedError("Unknown API version %s", api)
+
+    list_of_urls.sort()
+    return list_of_urls
+
+def fetch_next_v0(page, fetch_prefix, list_of_urls):
+    '''
+        Recursively fetch the page until we are done
+    '''
+    logging.debug('Trying to fetch %s', page)
     try:
-        req = requests.get(url)
+        req = requests.get(page)
     except requests.exceptions.RequestException as err_msg:
-        print err_msg
-        sys.exit(1)
+        print(err_msg, file=sys.stderr)
+        raise
+
+    try:
+        message = json.loads(req.text)
+    except ValueError as err_msg:
+        print(page, file=sys.stderr)
+        print(req.text, file=sys.stderr)
+        print(err_msg, file=sys.stderr)
+        raise
 
-    if req.status_code != 200:
-        print "Unable to access gitblit api at " + url
-        sys.exit(1)
+    for project in message['projects']:
+        list_of_urls.append(fetch_prefix + project['fullname'])
 
+    if 'next' in message['pagination']:
+        if message['pagination']['next']:
+            time.sleep(0.25) # Add a smallish delay to help with load
+            fetch_next_v0(message['pagination']['next'], fetch_prefix, list_of_urls)
 
-    payload = req.text
-    repos = json.loads(payload)
-    branchname = 'refs/heads/' + branch
+if __name__ == '__main__':
 
-    for repo in repos.keys():
-        if projectpath != 'all':
-            if repos[repo]['projectPath'] != projectpath:
-                del repos[repo]
-                continue
-        if branchname not in repos[repo]['availableRefs']:
-            del repos[repo]
+    PARSER = setup_args()
+    ARGS = PARSER.parse_args()
 
-    return repos.keys()
+    if ARGS.debug:
+        logging.basicConfig(level=logging.DEBUG)
 
-def main():
-    '''Broken out so it can be inherited if someone wants'''
-    options = read_args()
-    repos = get_repo_list(url=options.url, branch=options.branch, projectpath=options.project)
-    if repos:
-        print '\n'.join(repos)
+    URLS = run_query(ARGS.hostname, ARGS.apiver, ARGS.namespace, ARGS.show_forks)
 
-if __name__ == "__main__":
-    main()
+    for URL in URLS:
+        print(URL)
-- 
1.8.3.1