@@ -18,23 +18,185 @@ name: $(TeamProject)_$(Build.DefinitionName)_$(SourceBranchName)_$(Date:yyyyMMdd
18
18
19
19
stages :
20
20
- stage : Build
21
- pool : sonicbld-1es
21
+ pool :
22
+ vmImage : ' ubuntu-latest'
22
23
jobs :
23
24
- job : Build
24
25
timeoutInMinutes : 120
25
26
steps :
27
+ - task : UsePythonVersion@0
28
+ inputs :
29
+ versionSpec : ' 3.x'
30
+ addToPath : true
31
+ architecture : ' x64'
26
32
- script : |
27
- set -ex
28
- # Install Azure cli
29
- curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
30
- az login --service-principal --use-cert-sn-issuer -u 08fd13c1-63ab-4b08-9007-f4ff86b61248 -p $CERTPATH --tenant 72f988bf-86f1-41af-91ab-2d7cd011db47
31
- pip3 install azure-storage-queue azure-storage-blob pytz python-dateutil azure.core azure.kusto.data azure.kusto.ingest
32
- env:
33
- CERTPATH: $(CERTPATH)
33
+ pip install azure-storage-queue azure-storage-blob pytz python-dateutil
34
34
displayName: Install build tools
35
- - script : |
36
- python3 azure-pipelines/scripts/publish-github-prs.py $GITHUB_TOKEN $AZURE_STORAGE_CONNECTION_STRING
35
+ - task : PythonScript@0
36
+ displayName : Publish SONiC telemetry
37
37
env :
38
38
AZURE_STORAGE_CONNECTION_STRING : ' $(AZURE_STORAGE_CONNECTION_STRING)'
39
39
GITHUB_TOKEN : ' $(GITHUB_TOKEN)'
40
- displayName: Upload PR info to kusto
40
+ inputs :
41
+ scriptSource : ' inline'
42
+ script : |
43
+ import datetime, base64, json, time, os, re, pytz, math
44
+ from urllib import request
45
+ from urllib.error import HTTPError
46
+ from http.client import IncompleteRead
47
+ from azure.core.exceptions import ResourceNotFoundError
48
+ from dateutil import parser
49
+ import http.client
50
+ from azure.storage.blob import BlobServiceClient
51
+
52
+ CONTAINER = 'build'
53
+ INFO_PULLREQUESTS_FILE = "info/pullrequests.json"
54
+ GITHUB_TOKEN = '$(GITHUB_TOKEN)'
55
+ AZURE_STORAGE_CONNECTION_STRING = '$(AZURE_STORAGE_CONNECTION_STRING)'
56
+ blob_service_client = BlobServiceClient.from_connection_string(AZURE_STORAGE_CONNECTION_STRING)
57
+
58
+ url="https://api.github.com/graphql"
59
+ timestamp = datetime.datetime.utcnow()
60
+ timeoffset = datetime.timedelta(minutes=5)
61
+ until = (timestamp - timeoffset).replace(tzinfo=pytz.UTC)
62
+ if 'END_TIMESTAMP' in os.environ and os.environ['END_TIMESTAMP']:
63
+ until = parser.isoparse(os.environ['END_TIMESTAMP']).replace(tzinfo=pytz.UTC)
64
+ delta = datetime.timedelta(minutes=60)
65
+ if 'TIMEDELTA_IN_MINUTES' in os.environ and os.environ['TIMEDELTA_IN_MINUTES']:
66
+ timedelta_in_minutes = max(int(os.environ['TIMEDELTA_IN_MINUTES']), 30)
67
+ delta = datetime.timedelta(minutes=timedelta_in_minutes)
68
+ max_timedelta_in_days = 35
69
+
70
+ # Upload a list of lines to blob
71
+ def upload_to_blob(lines, blob_prefix, file_prefix=""):
72
+ now = datetime.datetime.now()
73
+ if not lines:
74
+ print("no lines to upload, skipped")
75
+ return
76
+ local_file_name = file_prefix + now.strftime("_%Y%m%d-%H%M%S-%f") + '.json'
77
+ with open(local_file_name, "w") as file:
78
+ count = file.write('\n'.join(lines))
79
+ blob_file_name = blob_prefix + now.strftime("/%Y/%m/%d/") + local_file_name
80
+ blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=blob_file_name)
81
+ with open(local_file_name, "rb") as data:
82
+ blob_client.upload_blob(data)
83
+ os.remove(local_file_name)
84
+
85
+ def get_start_timestamp(force=False):
86
+ if not force and 'START_TIMESTAMP' in os.environ and os.environ['START_TIMESTAMP']:
87
+ return parser.isoparse(os.environ['START_TIMESTAMP']).replace(tzinfo=pytz.UTC)
88
+ blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=INFO_PULLREQUESTS_FILE)
89
+ try:
90
+ download_stream = blob_client.download_blob()
91
+ info = json.loads(download_stream.readall())
92
+ return parser.isoparse(info['timestamp']).replace(tzinfo=pytz.UTC)
93
+ except ResourceNotFoundError:
94
+ pass
95
+ start_timestamp = datetime.datetime.utcnow() - datetime.timedelta(days=max_timedelta_in_days)
96
+ return start_timestamp.replace(tzinfo=pytz.UTC)
97
+
98
+ def update_start_timestamp():
99
+ if 'END_TIMESTAMP' in os.environ and os.environ['END_TIMESTAMP']:
100
+ last = get_start_timestamp(True)
101
+ if last > until:
102
+ print('skipped update the start timestamp, until:%s < last:%s'.format(until.isoformat(), last.isoformat()))
103
+ return
104
+ blob_file_name="info/pullrequests.json"
105
+ blob_client = blob_service_client.get_blob_client(container=CONTAINER, blob=INFO_PULLREQUESTS_FILE)
106
+ info = {}
107
+ info['timestamp'] = until.isoformat()
108
+ data = json.dumps(info)
109
+ blob_client.upload_blob(data, overwrite=True)
110
+
111
+ # The GitHub Graphql supports to query 100 items per page, and 10 page in max.
112
+ # To workaround it, split the query into several time range "delta", in a time range, need to make sure less than 1000 items.
113
+ def get_pullrequests():
114
+ results = []
115
+ start_timestamp = get_start_timestamp()
116
+ print('start: {0}, until: {1}'.format(start_timestamp.isoformat(), until.isoformat()), flush=True)
117
+ query_pattern = '''
118
+ {
119
+ search(query: "org:azure org:sonic-net is:pr updated:%s..%s sort:updated", %s type: ISSUE, first: 100) {
120
+ issueCount
121
+ pageInfo {
122
+ hasNextPage
123
+ endCursor
124
+ }
125
+ edges {
126
+ cursor
127
+ node {
128
+ ... on PullRequest {
129
+ url
130
+ number
131
+ assignees (first: 10) {
132
+ nodes {
133
+ login
134
+ }
135
+ }
136
+ title
137
+ createdAt
138
+ closedAt
139
+ merged
140
+ mergedAt
141
+ updatedAt
142
+ mergedBy {login}
143
+ author {login}
144
+ baseRefName
145
+ baseRepository {name, url, owner{login}}
146
+ repository {name, url, owner{login}}
147
+ mergeCommit {id, oid, committedDate}
148
+ commits (first: 3) {nodes{commit{oid, message}}}
149
+ state
150
+ }
151
+ }
152
+ }
153
+ }
154
+ }
155
+ '''
156
+ start = start_timestamp
157
+ count = math.ceil((until - start) / delta)
158
+ for index in range(count):
159
+ end = min(start+delta, until)
160
+ condition = ""
161
+ while True: # pagination, support 1000 total, support 100 per page
162
+ print("Query: index:%s, count:%s, start:%s, end:%s, page:%s" % (index, count, start.isoformat(), end.isoformat(), condition), flush=True)
163
+ query = query_pattern %(start.isoformat(), end.isoformat(), condition)
164
+ req = request.Request(url, method="POST")
165
+ req.add_header('Content-Type', 'application/json')
166
+ req.add_header('Authorization', "Bearer {0}".format(GITHUB_TOKEN))
167
+ body = {}
168
+ body['query'] = query
169
+ data = bytes(json.dumps(body), encoding="utf-8")
170
+ content = {}
171
+ for i in range(10):
172
+ try:
173
+ r = request.urlopen(req, data=data)
174
+ content = json.loads(r.read())
175
+ break
176
+ except HTTPError as e:
177
+ print('Try count: {0}, error code: {1}, reason: {2}'.format(i, e.code, e.reason))
178
+ time.sleep(3)
179
+ except IncompleteRead as e:
180
+ print("IncompleteRead", e)
181
+ time.sleep(3)
182
+ if 'data' not in content:
183
+ print(content)
184
+ break
185
+ edges = content['data']['search']['edges']
186
+ for edge in edges:
187
+ node = edge['node']
188
+ node['dumpedAt'] = timestamp.isoformat()
189
+ results.append(json.dumps(node))
190
+ print("Read edge count: {0}, total count: {1}".format(len(results), content['data']['search']['issueCount']), flush=True)
191
+ hasNextPage = content['data']['search']['pageInfo']['hasNextPage']
192
+ print(content['data']['search']['pageInfo'])
193
+ if not hasNextPage:
194
+ break
195
+ condition = 'after: "{0}",'.format(edges[-1]['cursor'])
196
+ print(condition)
197
+ start = end
198
+ return results
199
+
200
+ results = get_pullrequests()
201
+ upload_to_blob(results, 'pullrequests')
202
+ update_start_timestamp()
0 commit comments