-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathazure-pipelines-github-telemetry.yml
202 lines (191 loc) · 8.61 KB
/
azure-pipelines-github-telemetry.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# Starter pipeline
# Start with a minimal pipeline that you can customize to build and deploy your code.
# Add steps that build, run tests, deploy, and more:
# https://aka.ms/yaml
trigger: none
pr: none
schedules:
- cron: "0 */2 * * *"
displayName: Hourly build
branches:
include:
- main
always: true
name: $(TeamProject)_$(Build.DefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd)$(Rev:.r)
stages:
- stage: Build
pool:
vmImage: 'ubuntu-latest'
jobs:
- job: Build
timeoutInMinutes: 120
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.x'
addToPath: true
architecture: 'x64'
- script: |
pip install azure-storage-queue azure-storage-blob pytz python-dateutil
displayName: Install build tools
- task: PythonScript@0
displayName: Publish SONiC telemetry
env:
AZURE_STORAGE_CONNECTION_STRING: '$(AZURE_STORAGE_CONNECTION_STRING)'
GITHUB_TOKEN: '$(GITHUB_TOKEN)'
inputs:
scriptSource: 'inline'
script: |
import datetime, base64, json, time, os, re, pytz, math
from urllib import request
from urllib.error import HTTPError
from http.client import IncompleteRead
from azure.core.exceptions import ResourceNotFoundError
from dateutil import parser
import http.client
from azure.storage.blob import BlobServiceClient
CONTAINER = 'build'
INFO_PULLREQUESTS_FILE = "info/pullrequests.json"
GITHUB_TOKEN = '$(GITHUB_TOKEN)'
AZURE_STORAGE_CONNECTION_STRING = '$(AZURE_STORAGE_CONNECTION_STRING)'
blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
url="https://api.github.com/graphql"
timestamp = datetime.datetime.utcnow()
timeoffset = datetime.timedelta(minutes=5)
until = (timestamp - timeoffset).replace(tzinfo=pytz.UTC)
if 'END_TIMESTAMP' in os.environ and os.environ['END_TIMESTAMP']:
until = parser.isoparse(os.environ['END_TIMESTAMP']).replace(tzinfo=pytz.UTC)
delta = datetime.timedelta(minutes=60)
if 'TIMEDELTA_IN_MINUTES' in os.environ and os.environ['TIMEDELTA_IN_MINUTES']:
timedelta_in_minutes = max(int(os.environ['TIMEDELTA_IN_MINUTES']), 30)
delta = datetime.timedelta(minutes=timedelta_in_minutes)
max_timedelta_in_days = 35
# Upload a list of lines to blob
def upload_to_blob(lines, blob_prefix, file_prefix=""):
now = datetime.datetime.now()
if not lines:
print("no lines to upload, skipped")
return
local_file_name = file_prefix + now.strftime("_%Y%m%d-%H%M%S-%f") + '.json'
with open(local_file_name, "w") as file:
count = file.write('\n'.join(lines))
blob_file_name = blob_prefix + now.strftime("/%Y/%m/%d/") + local_file_name
blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=blob_file_name)
with open(local_file_name, "rb") as data:
blob_client.upload_blob(data)
os.remove(local_file_name)
def get_start_timestamp(force=False):
if not force and 'START_TIMESTAMP' in os.environ and os.environ['START_TIMESTAMP']:
return parser.isoparse(os.environ['START_TIMESTAMP']).replace(tzinfo=pytz.UTC)
blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=INFO_PULLREQUESTS_FILE)
try:
download_stream = blob_client.download_blob()
info = json.loads(download_stream.readall())
return parser.isoparse(info['timestamp']).replace(tzinfo=pytz.UTC)
except ResourceNotFoundError:
pass
start_timestamp = datetime.datetime.utcnow() - datetime.timedelta(days=max_timedelta_in_days)
return start_timestamp.replace(tzinfo=pytz.UTC)
def update_start_timestamp():
if 'END_TIMESTAMP' in os.environ and os.environ['END_TIMESTAMP']:
last = get_start_timestamp(True)
if last > until:
print('skipped update the start timestamp, until:%s < last:%s'.format(until.isoformat(), last.isoformat()))
return
blob_file_name="info/pullrequests.json"
blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=INFO_PULLREQUESTS_FILE)
info = {}
info['timestamp'] = until.isoformat()
data = json.dumps(info)
blob_client.upload_blob(data, overwrite=True)
# The GitHub Graphql supports to query 100 items per page, and 10 page in max.
# To workaround it, split the query into several time range "delta", in a time range, need to make sure less than 1000 items.
def get_pullrequests():
results = []
start_timestamp = get_start_timestamp()
print('start: {0}, until: {1}'.format(start_timestamp.isoformat(), until.isoformat()), flush=True)
query_pattern = '''
{
search(query: "org:azure org:sonic-net is:pr updated:%s..%s sort:updated", %s type: ISSUE, first: 100) {
issueCount
pageInfo {
hasNextPage
endCursor
}
edges {
cursor
node {
... on PullRequest {
url
number
assignees (first: 10) {
nodes {
login
}
}
title
createdAt
closedAt
merged
mergedAt
updatedAt
mergedBy {login}
author {login}
baseRefName
baseRepository {name, url, owner{login}}
repository {name, url, owner{login}}
mergeCommit {id, oid, committedDate}
commits (first: 3) {nodes{commit{oid, message}}}
state
}
}
}
}
}
'''
start = start_timestamp
count = math.ceil((until - start) / delta)
for index in range(count):
end = min(start+delta, until)
condition = ""
while True: # pagination, support 1000 total, support 100 per page
print("Query: index:%s, count:%s, start:%s, end:%s, page:%s" % (index, count, start.isoformat(), end.isoformat(), condition), flush=True)
query = query_pattern %(start.isoformat(), end.isoformat(), condition)
req = request.Request(url, method="POST")
req.add_header('Content-Type', 'application/json')
req.add_header('Authorization', "Bearer {0}".format(GITHUB_TOKEN))
body = {}
body['query'] = query
data = bytes(json.dumps(body), encoding="utf-8")
content = {}
for i in range(10):
try:
r = request.urlopen(req, data=data)
content = json.loads(r.read())
break
except HTTPError as e:
print('Try count: {0}, error code: {1}, reason: {2}'.format(i, e.code, e.reason))
time.sleep(3)
except IncompleteRead as e:
print("IncompleteRead", e)
time.sleep(3)
if 'data' not in content:
print(content)
break
edges = content['data']['search']['edges']
for edge in edges:
node = edge['node']
node['dumpedAt'] = timestamp.isoformat()
results.append(json.dumps(node))
print("Read edge count: {0}, total count: {1}".format(len(results), content['data']['search']['issueCount']), flush=True)
hasNextPage = content['data']['search']['pageInfo']['hasNextPage']
print(content['data']['search']['pageInfo'])
if not hasNextPage:
break
condition = 'after: "{0}",'.format(edges[-1]['cursor'])
print(condition)
start = end
return results
results = get_pullrequests()
upload_to_blob(results, 'pullrequests')
update_start_timestamp()