Skip to content

Commit

Permalink
Improve PyPI GetArtifactDownloadUris. (#361)
Browse files Browse the repository at this point in the history
* Add optional property UploadTime to ArtifactUri<T>.
* Implement PyPI GetArtifactDownloadUrisAsync, it now gets all associated artifacts.
* PyPI uploadTime is now populated by the upload_time value of the file with the oldest upload_time to get the oldest uploadTime (publish time) for this package version.
* Implement tests for these changes.
* Deprecated TypedManager.GetArtifactDownloadUris in favor of TypedManager.GetArtifactDownloadUrisAsync
  • Loading branch information
jpinz authored Oct 27, 2022
1 parent 6f8e826 commit 7cf3a26
Show file tree
Hide file tree
Showing 12 changed files with 387 additions and 63 deletions.
18 changes: 18 additions & 0 deletions src/Shared/Extensions/PackageUrlExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,24 @@ public static PackageURL CreateWithNewNames(this PackageURL packageUrl, string n
{
return new PackageURL(packageUrl.Type, namespaceStr, name, null, null, null);
}

/// <summary>
/// Returns a new <see cref="PackageURL"/> instance with version.
/// </summary>
/// <param name="packageUrl">The <see cref="PackageURL"/>.</param>
/// <param name="version">The version to set.</param>
/// <returns>Returns a new <see cref="PackageURL"/> instance with a version.</returns>
public static PackageURL WithVersion(this PackageURL packageUrl, string version)
{
PackageURL purl = new PackageURL(
type: packageUrl.Type,
@namespace: packageUrl.Namespace,
name: packageUrl.Name,
version: version,
qualifiers: packageUrl.Qualifiers,
subpath: packageUrl.Subpath);
return purl;
}

/// <summary>
/// Gets the <paramref name="packageUrl"/> as a valid file name.
Expand Down
11 changes: 9 additions & 2 deletions src/Shared/Model/ArtifactUri.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,20 @@ public record ArtifactUri<T> where T : Enum
/// </summary>
/// <param name="type">The type of artifact for this <see cref="ArtifactUri{T}"/>.</param>
/// <param name="uri">The <see cref="Uri"/> this artifact can be found at.</param>
public ArtifactUri(T type, Uri uri)
/// <param name="uploadTime">The <see cref="DateTime"/> for when this artifact was uploaded to the repository.</param>
public ArtifactUri(T type, Uri uri, DateTime? uploadTime = null)
{
Type = type;
Uri = uri;
UploadTime = uploadTime;
}

/// <summary>
/// Initializes a new instance of <see cref="ArtifactUri{T}"/>.
/// </summary>
/// <param name="type">The type of artifact for this <see cref="ArtifactUri{T}"/>.</param>
/// <param name="uri">The string of the uri this artifact can be found at.</param>
public ArtifactUri(T type, string uri) : this(type, new Uri(uri)) { }
public ArtifactUri(T type, string uri, DateTime? uploadTime = null) : this(type, new Uri(uri), uploadTime) { }

/// <summary>
/// The enum representing the artifact type for the project manager associated with this artifact.
Expand All @@ -39,6 +41,11 @@ public ArtifactUri(T type, Uri uri)
/// The <see cref="Uri"/> for where this artifact can be found online.
/// </summary>
public Uri Uri { get; }

/// <summary>
/// The <see cref="DateTime"/> for when this artifact was uploaded to the repository.
/// </summary>
public DateTime? UploadTime { get; }

/// <summary>
/// The file extension for this artifact file. Including the '.' at the beginning.
Expand Down
8 changes: 8 additions & 0 deletions src/Shared/PackageManagers/NPMProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,16 @@ public NPMProjectManager(
}

/// <inheritdoc />
[Obsolete("Deprecated in favor of GetArtifactDownloadUrisAsync.")]
public override IEnumerable<ArtifactUri<NPMArtifactType>> GetArtifactDownloadUris(PackageURL purl)
{
return GetArtifactDownloadUrisAsync(purl).ToListAsync().Result;
}

/// <inheritdoc />
public override async IAsyncEnumerable<ArtifactUri<NPMArtifactType>> GetArtifactDownloadUrisAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
string feedUrl = (purl.Qualifiers?["repository_url"] ?? ENV_NPM_API_ENDPOINT).EnsureTrailingSlash();

string artifactUri = purl.HasNamespace() ?
Expand Down
14 changes: 10 additions & 4 deletions src/Shared/PackageManagers/NuGetProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,16 @@ public NuGetProjectManager(
}

/// <inheritdoc />
[Obsolete("Deprecated in favor of GetArtifactDownloadUrisAsync.")]
public override IEnumerable<ArtifactUri<NuGetArtifactType>> GetArtifactDownloadUris(PackageURL purl)
{
return GetArtifactDownloadUrisAsync(purl).ToListAsync().Result;
}

/// <inheritdoc />
public override async IAsyncEnumerable<ArtifactUri<NuGetArtifactType>> GetArtifactDownloadUrisAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
if (purl.Qualifiers?.TryGetValue("repository_url", out var repositoryUrlQualifier) == true && repositoryUrlQualifier != NUGET_DEFAULT_INDEX)
{
// Throw an exception until we implement proper support for service indices other than nuget.org
Expand All @@ -64,13 +72,11 @@ public override IEnumerable<ArtifactUri<NuGetArtifactType>> GetArtifactDownloadU
var nameLowercase = purl.Name.ToLowerInvariant();
var versionLowercase = purl.Version.ToLowerInvariant();

var nupkgArtifactUri = new ArtifactUri<NuGetArtifactType>(NuGetArtifactType.Nupkg,
yield return new ArtifactUri<NuGetArtifactType>(NuGetArtifactType.Nupkg,
$"{basePath}{nameLowercase}/{versionLowercase}/{nameLowercase}.{versionLowercase}.nupkg");

var nuspecArtifactUri = new ArtifactUri<NuGetArtifactType>(NuGetArtifactType.Nuspec,
yield return new ArtifactUri<NuGetArtifactType>(NuGetArtifactType.Nuspec,
$"{basePath}{nameLowercase}/{versionLowercase}/{nameLowercase}.nuspec");

return ImmutableList.Create(nupkgArtifactUri, nuspecArtifactUri);
}

/// <summary>
Expand Down
136 changes: 90 additions & 46 deletions src/Shared/PackageManagers/PyPIProjectManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Microsoft.CST.OpenSource.PackageManagers
{
using Contracts;
using Extensions;
using Helpers;
using Model;
using NLog.LayoutRenderers.Wrappers;
Expand Down Expand Up @@ -40,15 +41,45 @@ public PyPIProjectManager(
}

/// <inheritdoc />
[Obsolete("Deprecated in favor of GetArtifactDownloadUrisAsync.")]
public override IEnumerable<ArtifactUri<PyPIArtifactType>> GetArtifactDownloadUris(PackageURL purl)
{
string feedUrl = (purl.Qualifiers?["repository_url"] ?? ENV_PYPI_ENDPOINT).EnsureTrailingSlash();
return GetArtifactDownloadUrisAsync(purl).ToListAsync().Result;
}

/// <inheritdoc />
public override async IAsyncEnumerable<ArtifactUri<PyPIArtifactType>> GetArtifactDownloadUrisAsync(PackageURL purl, bool useCache = true)
{
Check.NotNull(nameof(purl.Version), purl.Version);
string? content = await GetMetadataAsync(purl, useCache);
if (string.IsNullOrEmpty(content))
{
throw new InvalidOperationException();
}

// Format: https://pypi.org/packages/source/{ package_name_first_letter }/{ package_name }/{ package_name }-{ package_version }.tar.gz
string artifactUri =
$"{feedUrl}packages/source/{char.ToLower(purl.Name[0])}/{purl.Name.ToLower()}/{purl.Name.ToLower()}-{purl.Version}.tar.gz";
yield return new ArtifactUri<PyPIArtifactType>(PyPIArtifactType.Tarball, artifactUri);
// TODO: Figure out how to generate .whl file uris.
JsonDocument contentJSON = JsonDocument.Parse(content);
JsonElement root = contentJSON.RootElement;

JsonElement.ArrayEnumerator? urlsArray = OssUtilities.GetJSONEnumerator(root.GetProperty("urls"));
if (urlsArray is not null)
{
foreach (JsonElement url in urlsArray.Value)
{
string? urlStr = OssUtilities.GetJSONPropertyStringIfExists(url, "url");
string? uploadTimeStr = OssUtilities.GetJSONPropertyStringIfExists(url, "upload_time");
DateTime uploadTime = DateTime.Parse(uploadTimeStr);

if (OssUtilities.GetJSONPropertyStringIfExists(url, "packagetype") == "sdist")
{
yield return new ArtifactUri<PyPIArtifactType>(PyPIArtifactType.Tarball, urlStr, uploadTime);
}

if (OssUtilities.GetJSONPropertyStringIfExists(url, "packagetype") == "bdist_wheel")
{
yield return new ArtifactUri<PyPIArtifactType>(PyPIArtifactType.Wheel, urlStr, uploadTime);
}
}
}
}

/// <summary>
Expand Down Expand Up @@ -212,6 +243,11 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
{
HttpClient httpClient = CreateHttpClient();

if (purl.Version.IsNotBlank())
{
return await GetHttpStringCache(httpClient, $"{ENV_PYPI_ENDPOINT}/pypi/{purl.Name}/{purl.Version}/json", useCache);
}

return await GetHttpStringCache(httpClient, $"{ENV_PYPI_ENDPOINT}/pypi/{purl.Name}/json", useCache);
}
catch (Exception ex)
Expand All @@ -232,12 +268,21 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
JsonElement root = contentJSON.RootElement;

JsonElement infoElement = root.GetProperty("info");

metadata.LatestPackageVersion = OssUtilities.GetJSONPropertyStringIfExists(infoElement, "version"); // Ran in the root, always points to latest version.

if (purl.Version.IsBlank() && metadata.LatestPackageVersion.IsNotBlank())
{
content = await GetMetadataAsync(purl.WithVersion(metadata.LatestPackageVersion), useCache);
contentJSON = JsonDocument.Parse(content);
root = contentJSON.RootElement;

infoElement = root.GetProperty("info");
}

metadata.Name = OssUtilities.GetJSONPropertyStringIfExists(infoElement, "name");
metadata.Description = OssUtilities.GetJSONPropertyStringIfExists(infoElement, "summary"); // Summary is the short description. Description is usually the readme.

metadata.LatestPackageVersion = OssUtilities.GetJSONPropertyStringIfExists(infoElement, "version"); // Ran in the root, always points to latest version.

metadata.PackageManagerUri = ENV_PYPI_ENDPOINT;
metadata.PackageUri = OssUtilities.GetJSONPropertyStringIfExists(infoElement, "package_url");
metadata.Keywords = OssUtilities.ConvertJSONToList(OssUtilities.GetJSONPropertyIfExists(infoElement, "keywords"));
Expand Down Expand Up @@ -292,54 +337,53 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(PackageUR
// if we found any version at all, get the information.
if (metadata.PackageVersion is not null)
{
Version versionToGet = new(metadata.PackageVersion);
JsonElement? versionElement = GetVersionElement(contentJSON, versionToGet);
if (versionElement is not null)
JsonElement.ArrayEnumerator? urlsArray = OssUtilities.GetJSONEnumerator(root.GetProperty("urls"));
if (urlsArray is not null)
{
// fill the version specific entries

if (versionElement.Value.ValueKind == JsonValueKind.Array) // I think this should always be true.
foreach (JsonElement url in urlsArray.Value)
{
foreach (JsonElement releaseFile in versionElement.Value.EnumerateArray())
// digests
if (OssUtilities.GetJSONPropertyIfExists(url, "digests")?.EnumerateObject()
is JsonElement.ObjectEnumerator digests)
{
// digests
if (OssUtilities.GetJSONPropertyIfExists(releaseFile, "digests")?.EnumerateObject()
is JsonElement.ObjectEnumerator digests)
metadata.Signature ??= new List<Digest>();
foreach (JsonProperty digest in digests)
{
metadata.Signature ??= new List<Digest>();
foreach (JsonProperty digest in digests)
metadata.Signature.Add(new Digest()
{
metadata.Signature.Add(new Digest()
{
Algorithm = digest.Name,
Signature = digest.Value.ToString()
});
}
Algorithm = digest.Name,
Signature = digest.Value.ToString()
});
}
}

// TODO: Want to figure out how to store info for .whl files as well.
if (OssUtilities.GetJSONPropertyStringIfExists(releaseFile, "packagetype") == "sdist")
// TODO: Want to figure out how to store info for .whl files as well.
if (OssUtilities.GetJSONPropertyStringIfExists(url, "packagetype") == "sdist")
{
// downloads
if (OssUtilities.GetJSONPropertyIfExists(url, "downloads")?.GetInt64() is long downloads
&& downloads != -1)
{
// downloads
if (OssUtilities.GetJSONPropertyIfExists(releaseFile, "downloads")?.GetInt64() is long downloads
&& downloads != -1)
metadata.Downloads ??= new Downloads()
{
metadata.Downloads ??= new Downloads()
{
Overall = downloads
};
}

metadata.Size = OssUtilities.GetJSONPropertyIfExists(releaseFile, "size")?.GetInt64();
metadata.Active = !OssUtilities.GetJSONPropertyIfExists(releaseFile, "yanked")?.GetBoolean();
metadata.VersionUri = $"{ENV_PYPI_ENDPOINT}/project/{purl.Name}/{purl.Version}";
metadata.VersionDownloadUri = OssUtilities.GetJSONPropertyStringIfExists(releaseFile, "url");
Overall = downloads
};
}

string? uploadTime = OssUtilities.GetJSONPropertyStringIfExists(releaseFile, "upload_time");
if (uploadTime != null)
{
metadata.UploadTime = DateTime.Parse(uploadTime);
}
metadata.Size = OssUtilities.GetJSONPropertyIfExists(url, "size")?.GetInt64();
metadata.Active = !OssUtilities.GetJSONPropertyIfExists(url, "yanked")?.GetBoolean();
metadata.VersionUri = $"{ENV_PYPI_ENDPOINT}/project/{purl.Name}/{purl.Version}";
metadata.VersionDownloadUri = OssUtilities.GetJSONPropertyStringIfExists(url, "url");
}

string? uploadTime = OssUtilities.GetJSONPropertyStringIfExists(url, "upload_time");
if (uploadTime != null)
{
DateTime newUploadTime = DateTime.Parse(uploadTime);
// Used to set the minimum upload time for all associated files for this version to get the publish time.
if (metadata.UploadTime == null || metadata.UploadTime > newUploadTime)
{
metadata.UploadTime = newUploadTime;
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions src/Shared/PackageManagers/TypedManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,18 @@ public override async Task<IEnumerable<string>> EnumerateVersionsAsync(
/// <param name="purl">The <see cref="PackageURL"/> to get the URI(s) for.</param>
/// <returns>A list of the relevant <see cref="ArtifactUri{TArtifactUriType}"/>.</returns>
/// <remarks>Returns the expected URIs for resources. Does not validate that the URIs resolve at the moment of enumeration.</remarks>
[Obsolete(message: $"Deprecated in favor of {nameof(GetArtifactDownloadUrisAsync)}.")]
public abstract IEnumerable<ArtifactUri<TArtifactUriType>> GetArtifactDownloadUris(PackageURL purl);

/// <summary>
/// Gets the relevant URI(s) to download the files related to a <see cref="PackageURL"/>.
/// </summary>
/// <param name="purl">The <see cref="PackageURL"/> to get the URI(s) for.</param>
/// <param name="useCache">If the data should be retrieved from the cache. Defaults to <c>true</c>.</param>
/// <returns>A list of the relevant <see cref="ArtifactUri{TArtifactUriType}"/>.</returns>
/// <remarks>Returns the expected URIs for resources. Does not validate that the URIs resolve at the moment of enumeration.</remarks>
public abstract IAsyncEnumerable<ArtifactUri<TArtifactUriType>> GetArtifactDownloadUrisAsync(PackageURL purl, bool useCache = true);


/// <summary>
/// Check to see if the <see cref="Uri"/> exists.
Expand Down
Loading

0 comments on commit 7cf3a26

Please sign in to comment.