Skip to content

Commit

Permalink
Improved "GetPublishedAtUtcAsync" method efficiency (#436)
Browse files Browse the repository at this point in the history
* Updated GetPublishedTimeStamp Api to fetch the published time stamp alone from the
cached json doc.

* fix tests.

* Addressed PR comments.
Increased cache size.
Added CacheInvalidation and CacheExpiration.

* Addressed PR comments.

---------

Co-authored-by: Mounika Rendedla <[email protected]>
  • Loading branch information
morended and Mounika Rendedla authored Jul 6, 2023
1 parent d2c21b9 commit 8da1fcc
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 15 deletions.
20 changes: 14 additions & 6 deletions src/Shared/PackageManagers/BaseProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ namespace Microsoft.CST.OpenSource.PackageManagers
using Utilities;
using Version = SemanticVersioning.Version;
using PackageUrl;
using System.IO;

public abstract class BaseProjectManager : IBaseProjectManager
{
Expand Down Expand Up @@ -230,13 +231,14 @@ public static async Task<JsonDocument> GetJsonCache(HttpClient client, string ur
Logger.Trace("Loading Uri...");
HttpResponseMessage result = await client.GetAsync(uri);
result.EnsureSuccessStatusCode(); // Don't cache error codes.
long contentLength = result.Content.Headers.ContentLength ?? 8192;
long contentLength = 0;
JsonDocument doc;

switch (jsonParsingOption)
{
case JsonParsingOption.NotInArrayNotCsv:
string data = await result.Content.ReadAsStringAsync();
contentLength = data.Length;
data = Regex.Replace(data, @"\r\n?|\n", ",");
data = $"[{data}]";

Expand All @@ -245,16 +247,22 @@ public static async Task<JsonDocument> GetJsonCache(HttpClient client, string ur
AllowTrailingCommas = true,
});
break;
default:
doc = await JsonDocument.ParseAsync(await result.Content.ReadAsStreamAsync());
default:
Stream responseStream = await result.Content.ReadAsStreamAsync();
contentLength = responseStream.Length;
doc = await JsonDocument.ParseAsync(responseStream);
break;
}

if (useCache)
{
lock (DataCache)
{
MemoryCacheEntryOptions? mce = new() { Size = contentLength };
MemoryCacheEntryOptions? mce = new()
{
Size = contentLength,
AbsoluteExpirationRelativeToNow = TimeSpan.FromMinutes(30),
};
DataCache.Set<JsonDocument>(uri, doc, mce);
}
}
Expand Down Expand Up @@ -436,7 +444,7 @@ public async Task<Dictionary<PackageURL, double>> IdentifySourceRepositoryAsync(
}

/// <inheritdoc />
public async Task<DateTime?> GetPublishedAtUtcAsync(PackageURL purl, bool useCache = true)
public virtual async Task<DateTime?> GetPublishedAtUtcAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
DateTime? uploadTime = (await GetPackageMetadataAsync(purl, useCache))?.UploadTime?.ToUniversalTime();
Expand All @@ -449,7 +457,7 @@ public async Task<Dictionary<PackageURL, double>> IdentifySourceRepositoryAsync(
protected static readonly MemoryCache DataCache = new(
new MemoryCacheOptions
{
SizeLimit = 1024 * 1024 * 8
SizeLimit = 1024 * 1024 * 100
}
);

Expand Down
33 changes: 24 additions & 9 deletions src/Shared/PackageManagers/NPMProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -314,15 +314,7 @@ public override Uri GetPackageAbsoluteUri(PackageURL purl)
{
Version versionToGet = new(metadata.PackageVersion);
JsonElement? versionElement = GetVersionElement(contentJSON, versionToGet);

if (root.TryGetProperty("time", out JsonElement time))
{
string? uploadTime = OssUtilities.GetJSONPropertyStringIfExists(time, metadata.PackageVersion);
if (uploadTime != null)
{
metadata.UploadTime = DateTime.Parse(uploadTime);
}
}
metadata.UploadTime = ParseUploadTime(contentJSON, metadata.PackageVersion);

if (versionElement != null)
{
Expand Down Expand Up @@ -482,6 +474,29 @@ is JsonElement.ArrayEnumerator enumeratorElement &&
return metadata;
}

public override async Task<DateTime?> GetPublishedAtUtcAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
HttpClient client = CreateHttpClient();
string? packageName = purl.HasNamespace() ? $"{purl.GetNamespaceFormatted()}/{purl.Name}" : purl.Name;
JsonDocument jsonDoc = await GetJsonCache(client, $"{ENV_NPM_API_ENDPOINT}/{packageName}", useCache);
return ParseUploadTime(jsonDoc, purl.Version);
}

private DateTime? ParseUploadTime(JsonDocument jsonDoc, string versionKey)
{
if (jsonDoc.RootElement.TryGetProperty("time", out JsonElement time))
{
string? uploadTime = OssUtilities.GetJSONPropertyStringIfExists(time, versionKey);
if (uploadTime != null)
{
return DateTime.Parse(uploadTime).ToUniversalTime();
}
}
return null;

}

public override JsonElement? GetVersionElement(JsonDocument? contentJSON, Version version)
{
if (contentJSON is null) { return null; }
Expand Down
31 changes: 31 additions & 0 deletions src/Shared/PackageManagers/PyPIProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,37 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
return metadata;
}

public override async Task<DateTime?> GetPublishedAtUtcAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
HttpClient client = CreateHttpClient();

JsonDocument contentJSON = await GetJsonCache(client, $"{ENV_PYPI_ENDPOINT}/pypi/{purl.Name}/{purl.Version}/json");
JsonElement root = contentJSON.RootElement;

JsonElement.ArrayEnumerator? urlsArray = OssUtilities.GetJSONEnumerator(root.GetProperty("urls"));
DateTime? uploadTime = null;
if (urlsArray is not null)
{
foreach (JsonElement url in urlsArray.Value)
{
string? urlStr = OssUtilities.GetJSONPropertyStringIfExists(url, "url");
string? uploadTimeStr = OssUtilities.GetJSONPropertyStringIfExists(url, "upload_time");
if (uploadTimeStr != null)
{
DateTime newUploadTime = DateTime.Parse(uploadTimeStr).ToUniversalTime();
// Used to set the minimum upload time for all associated files for this version to get the publish time.
if (uploadTime == null || uploadTime > newUploadTime)
{
uploadTime = newUploadTime;
}
}
}
}

return uploadTime;
}

public override List<Version> GetVersions(JsonDocument? contentJSON)
{
List<Version> allVersions = new();
Expand Down
19 changes: 19 additions & 0 deletions src/oss-tests/ProjectManagerTests/PyPIProjectManagerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,25 @@ public async Task PackageVersionExistsAsyncSucceeds(string purlString)
Assert.IsTrue(await _projectManager.PackageVersionExistsAsync(purl, useCache: false));
}

[DataTestMethod]
[DataRow("pkg:pypi/[email protected]", "2022-04-02T10:32:27")]
[DataRow("pkg:pypi/[email protected]", "2022-04-05T16:26:03")]
[DataRow("pkg:pypi/[email protected]", "2022-01-05T15:40:49")]
public async Task GetPublishedAtUtcSucceeds(string purlString, string? expectedTime = null)
{
PackageURL purl = new(purlString);
DateTime? time = await _projectManager.GetPublishedAtUtcAsync(purl, useCache: false);

if (expectedTime == null)
{
Assert.IsNull(time);
}
else
{
Assert.AreEqual(DateTime.Parse(expectedTime).ToUniversalTime(), time);
}
}

[DataTestMethod]
[DataRow("pkg:pypi/pandas", true)]
[DataRow("pkg:pypi/[email protected]", true)]
Expand Down

0 comments on commit 8da1fcc

Please sign in to comment.