[python]github-issueをapi経由でcsvファイル化する方法

2024-09-16 2024-09-17

import requests
import pandas as pd

def fetch_issues(owner, repo, token):
    url = f"https://api.github.com/repos/{owner}/{repo}/issues"
    headers = {"Authorization": f"token {token}"}
    params = {"per_page": 100}  # 一度に取得するIssueの数

    issues = []
    while url:
        response = requests.get(url, headers=headers, params=params)
        issues.extend(response.json())
        url = response.links.get("next", {}).get("url")

    return pd.DataFrame(issues)

# 使用例
owner = "your_username"
repo = "your_repository"
token = "your_token"

df = fetch_issues(owner, repo, token)
df.to_csv("issues.csv", index=False)

your_username: githubアカウント名を設定
your_repository: 取得したいgithubのリポジトリ名を指定
your_token: githubアカウント上で発行したAPIトークンを設定

上記が正常終了すれば、以下のようなデータが取得される

df.info()

><class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 29 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   url                       1 non-null      object
 1   repository_url            1 non-null      object
 2   labels_url                1 non-null      object
 3   comments_url              1 non-null      object
 4   events_url                1 non-null      object
 5   html_url                  1 non-null      object
 6   id                        1 non-null      int64 
 7   node_id                   1 non-null      object
 8   number                    1 non-null      int64 
 9   title                     1 non-null      object
 10  user                      1 non-null      object
 11  labels                    1 non-null      object
 12  state                     1 non-null      object
 13  locked                    1 non-null      bool  
 14  assignee                  1 non-null      object
 15  assignees                 1 non-null      object
 16  milestone                 0 non-null      object
 17  comments                  1 non-null      int64 
 18  created_at                1 non-null      object
 19  updated_at                1 non-null      object
 20  closed_at                 0 non-null      object
 21  author_association        1 non-null      object
 22  active_lock_reason        0 non-null      object
 23  body                      1 non-null      object
 24  closed_by                 0 non-null      object
 25  reactions                 1 non-null      object
 26  timeline_url              1 non-null      object
 27  performed_via_github_app  0 non-null      object
 28  state_reason              0 non-null      object
dtypes: bool(1), int64(3), object(25)
memory usage: 353.0+ bytes