import requests import pandas as pd def fetch_issues(owner, repo, token): url = f"https://api.github.com/repos/{owner}/{repo}/issues" headers = {"Authorization": f"token {token}"} params = {"per_page": 100} # 一度に取得するIssueの数 issues = [] while url: response = requests.get(url, headers=headers, params=params) issues.extend(response.json()) url = response.links.get("next", {}).get("url") return pd.DataFrame(issues) # 使用例 owner = "your_username" repo = "your_repository" token = "your_token" df = fetch_issues(owner, repo, token) df.to_csv("issues.csv", index=False)
your_username: githubアカウント名を設定
your_repository: 取得したいgithubのリポジトリ名を指定
your_token: githubアカウント上で発行したAPIトークンを設定
上記が正常終了すれば、以下のようなデータが取得される
df.info() ><class 'pandas.core.frame.DataFrame'> RangeIndex: 1 entries, 0 to 0 Data columns (total 29 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 url 1 non-null object 1 repository_url 1 non-null object 2 labels_url 1 non-null object 3 comments_url 1 non-null object 4 events_url 1 non-null object 5 html_url 1 non-null object 6 id 1 non-null int64 7 node_id 1 non-null object 8 number 1 non-null int64 9 title 1 non-null object 10 user 1 non-null object 11 labels 1 non-null object 12 state 1 non-null object 13 locked 1 non-null bool 14 assignee 1 non-null object 15 assignees 1 non-null object 16 milestone 0 non-null object 17 comments 1 non-null int64 18 created_at 1 non-null object 19 updated_at 1 non-null object 20 closed_at 0 non-null object 21 author_association 1 non-null object 22 active_lock_reason 0 non-null object 23 body 1 non-null object 24 closed_by 0 non-null object 25 reactions 1 non-null object 26 timeline_url 1 non-null object 27 performed_via_github_app 0 non-null object 28 state_reason 0 non-null object dtypes: bool(1), int64(3), object(25) memory usage: 353.0+ bytes