Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

go: analyze package metadata #12429

Merged
merged 14 commits into from
Jul 28, 2021
43 changes: 43 additions & 0 deletions src/python/pants/backend/go/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pants.backend.go.distribution import GoLangDistribution
from pants.backend.go.target_types import GoModuleSources
from pants.base.specs import AddressSpecs, AscendantAddresses, MaybeEmptySiblingAddresses
from pants.build_graph.address import Address
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
Expand Down Expand Up @@ -79,6 +80,9 @@ def basic_parse_go_mod(raw_text: bytes) -> Tuple[Optional[str], Optional[str]]:

# Parse the output of `go mod download` into a list of module descriptors.
def parse_module_descriptors(raw_json: bytes) -> List[ModuleDescriptor]:
if len(raw_json) == 0:
return []

module_descriptors = []
for raw_module_descriptor in ijson.items(raw_json, "", multiple_values=True):
module_descriptor = ModuleDescriptor(
Expand Down Expand Up @@ -117,6 +121,7 @@ async def resolve_go_module(

# Note: The `go` tool requires GOPATH to be an absolute path which can only be resolved from within the
# execution sandbox. Thus, this code uses a bash script to be able to resolve that path.
# TODO: Merge all duplicate versions of this script into a single script and invoke rule.
analyze_script_digest = await Get(
Digest,
CreateDigest(
Expand Down Expand Up @@ -174,6 +179,44 @@ async def resolve_go_module(
)


@dataclass(frozen=True)
class FindOwningGoModuleRequest:
address: Address


@dataclass(frozen=True)
class ResolvedOwningGoModule:
module_address: Optional[Address]


@rule
async def find_nearest_go_module(request: FindOwningGoModuleRequest) -> ResolvedOwningGoModule:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was refactored out of the target_type_rules to allow reuse. Probably can be a separate PR.

# Obtain unexpanded targets and ensure file targets are filtered out. Unlike Python, file targets do not
# make sense semantically for Go source since Go builds entire packages at a time. The filtering is
Comment on lines +205 to +206
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: this is somewhat true of Java as well: the entire JVM package is implicitly available without an import statement, but separate compilation is still supported.

I expect that @patricklaw will have the inference implementation for the JVM infer a dependency on all files in the package (which will cause it to automatically be coarsened to a single CoarsenedTarget) rather than taking the approach you're taking here. Part of the reason for that is that the JVM allows import cycles between files, and so he already needs to be able to support cycles and thus coarsening.

The equivalent case for go would be if multiple packages had cyclic imports for one another: but it looks like that isn't possible: https://jogendra.dev/import-cycles-in-golang-and-how-to-deal-with-them

So yea: this seems like the way to go for go.

# accomplished by requesting `UnexpandedTargets` and also filtering on `is_file_target`.
spec_path = request.address.spec_path
candidate_targets = await Get(
UnexpandedTargets,
AddressSpecs([AscendantAddresses(spec_path), MaybeEmptySiblingAddresses(spec_path)]),
)
go_module_targets = [
tgt
for tgt in candidate_targets
if tgt.has_field(GoModuleSources) and not tgt.address.is_file_target
]

# Sort by address.spec_path in descending order so the nearest go_module target is sorted first.
sorted_go_module_targets = sorted(
go_module_targets, key=lambda tgt: tgt.address.spec_path, reverse=True
)
if sorted_go_module_targets:
nearest_go_module_target = sorted_go_module_targets[0]
return ResolvedOwningGoModule(module_address=nearest_go_module_target.address)
else:
# TODO: Consider eventually requiring all go_package's to associate with a go_module.
return ResolvedOwningGoModule(module_address=None)


# TODO: Add integration tests for the `go-resolve` goal once we figure out its final form. For now, it is a debug
# tool to help update go.sum while developing the Go plugin and will probably change.
class GoResolveSubsystem(GoalSubsystem):
Expand Down
163 changes: 163 additions & 0 deletions src/python/pants/backend/go/pkg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import json
import textwrap
import typing
from dataclasses import dataclass
from typing import Optional, Tuple

from pants.backend.go.distribution import GoLangDistribution
from pants.backend.go.module import (
FindOwningGoModuleRequest,
ResolvedGoModule,
ResolvedOwningGoModule,
ResolveGoModuleRequest,
)
from pants.backend.go.target_types import GoImportPath, GoModuleSources
from pants.build_graph.address import Address
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.engine.addresses import Addresses
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests, RemovePrefix, Snapshot
from pants.engine.internals.selectors import Get
from pants.engine.platform import Platform
from pants.engine.process import BashBinary, Process, ProcessResult
from pants.engine.rules import collect_rules, rule
from pants.engine.target import UnexpandedTargets
from pants.util.logging import LogLevel


@dataclass(frozen=True)
class ResolvedGoPackage:
address: Address
import_path: str
module_address: Optional[Address]
package_name: str
imported_import_paths: Tuple[str]
dependency_import_paths: Tuple[str]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This field is imported paths plus what appears to be transitive dependencies.



@dataclass(frozen=True)
class ResolveGoPackageRequest:
address: Address


@rule
async def resolve_go_package(
request: ResolveGoPackageRequest,
goroot: GoLangDistribution,
platform: Platform,
bash: BashBinary,
) -> ResolvedGoPackage:
# TODO: Use MultiGet where applicable.

downloaded_goroot = await Get(
DownloadedExternalTool,
ExternalToolRequest,
goroot.get_request(platform),
)

targets = await Get(UnexpandedTargets, Addresses([request.address]))
if not targets:
raise AssertionError(f"Address `{request.address}` did not resolve to any targets.")
elif len(targets) > 1:
raise AssertionError(f"Address `{request.address}` resolved to multiple targets.")
target = targets[0]

owning_go_module_result = await Get(
ResolvedOwningGoModule, FindOwningGoModuleRequest(request.address)
)

# Compute the import_path for this go_package.
import_path_field = target.get(GoImportPath)
if import_path_field and import_path_field.value:
# Use any explicit import path set on the `go_package` target.
import_path = import_path_field.value
elif owning_go_module_result.module_address:
# Otherwise infer the import path from the owning `go_module` target. The inferred import path will be the
# module's import path plus any subdirectories in the spec_path between the go_module and go_package target.
resolved_go_module = await Get(
ResolvedGoModule, ResolveGoModuleRequest(owning_go_module_result.module_address)
)
if not resolved_go_module.import_path:
raise ValueError(
f"Unable to infer import path for the `go_package` at address {request.address} "
f"because the owning go_module at address {resolved_go_module.address} "
"does not have an import path defined."
)
assert request.address.spec_path.startswith(resolved_go_module.address.spec_path)
spec_path_difference = request.address.spec_path[
len(resolved_go_module.address.spec_path) :
]
import_path = f"{resolved_go_module.import_path}{spec_path_difference}"
else:
raise ValueError(
f"Unable to infer import path for the `go_package` at address {request.address} "
"because no owning go_module was found (which would define an import path for the module) "
"and no explicit `import_path` was set on the go_package"
)

sources = await Get(SourceFiles, SourceFilesRequest([target.get(GoModuleSources)]))
flattened_sources_snapshot = await Get(
Snapshot, RemovePrefix(sources.snapshot.digest, request.address.spec_path)
)

# Note: The `go` tool requires GOPATH to be an absolute path which can only be resolved from within the
# execution sandbox. Thus, this code uses a bash script to be able to resolve that path.
# TODO: Merge all duplicate versions of this script into a single script and invoke rule.
analyze_script_digest = await Get(
Digest,
CreateDigest(
[
FileContent(
"analyze.sh",
textwrap.dedent(
"""\
export GOROOT="./go"
export GOPATH="$(/bin/pwd)/gopath"
export GOCACHE="$(/bin/pwd)/cache"
mkdir -p "$GOPATH" "$GOCACHE"
exec ./go/bin/go list -json .
"""
).encode("utf-8"),
)
]
),
)

input_root_digest = await Get(
Digest,
MergeDigests(
[flattened_sources_snapshot.digest, downloaded_goroot.digest, analyze_script_digest]
),
)

process = Process(
argv=[bash.path, "./analyze.sh"],
input_digest=input_root_digest,
description="Resolve go_package metadata.",
output_files=["go.mod", "go.sum"],
level=LogLevel.DEBUG,
)

result = await Get(ProcessResult, Process, process)
print(f"stdout={result.stdout}") # type: ignore[str-bytes-safe]
print(f"stderr={result.stderr}") # type: ignore[str-bytes-safe]

metadata = json.loads(result.stdout)
package_name: str = metadata["Name"]
imported_import_paths = typing.cast(Tuple[str], tuple(metadata["Imports"]))
dependency_import_paths = typing.cast(Tuple[str], tuple(metadata["Deps"]))

return ResolvedGoPackage(
address=request.address,
import_path=import_path,
module_address=owning_go_module_result.module_address,
package_name=package_name,
imported_import_paths=imported_import_paths,
dependency_import_paths=dependency_import_paths,
)


def rules():
return collect_rules()
67 changes: 67 additions & 0 deletions src/python/pants/backend/go/pkg_integration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import textwrap

import pytest

from pants.backend.go import module, pkg
from pants.backend.go.pkg import ResolvedGoPackage, ResolveGoPackageRequest
from pants.backend.go.target_types import GoExternalModule, GoModule, GoPackage
from pants.build_graph.address import Address
from pants.core.util_rules import external_tool, source_files
from pants.engine.rules import QueryRule
from pants.testutil.rule_runner import RuleRunner


@pytest.fixture
def rule_runner() -> RuleRunner:
rule_runner = RuleRunner(
rules=[
*external_tool.rules(),
*source_files.rules(),
*module.rules(),
*pkg.rules(),
QueryRule(ResolvedGoPackage, [ResolveGoPackageRequest]),
],
target_types=[GoPackage, GoModule, GoExternalModule],
)
rule_runner.set_options(["--backend-packages=pants.backend.experimental.go"])
return rule_runner


def test_resolve_go_module(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
{
"foo/BUILD": "go_module()\n",
"foo/go.mod": textwrap.dedent(
"""\
module go.example.com/foo
go 1.16"""
),
"foo/go.sum": "",
"foo/pkg/BUILD": "go_package()\n",
"foo/pkg/foo.go": textwrap.dedent(
"""\
package pkg
func Grok() string {
return "Hello World"
}"""
),
"foo/cmd/BUILD": "go_package()\n",
"foo/cmd/main.go": textwrap.dedent(
"""\
package main
import (
"fmt"
"go.example.com/foo/pkg"
)
func main() {
fmt.Printf("%s\n", pkg.Grok())
}"""
),
}
)
resolved_go_package = rule_runner.request(
ResolvedGoPackage, [ResolveGoPackageRequest(Address("foo/cmd"))]
)
assert resolved_go_package.import_path == "go.example.com/foo/cmd"
31 changes: 7 additions & 24 deletions src/python/pants/backend/go/target_type_rules.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
from pants.backend.go.target_types import GoModuleSources, GoPackageDependencies
from pants.base.specs import AddressSpecs, AscendantAddresses, SiblingAddresses
from pants.backend.go.module import FindOwningGoModuleRequest, ResolvedOwningGoModule
from pants.backend.go.target_types import GoPackageDependencies
from pants.engine.internals.selectors import Get
from pants.engine.rules import collect_rules, rule
from pants.engine.target import InjectDependenciesRequest, InjectedDependencies, UnexpandedTargets
from pants.engine.target import InjectDependenciesRequest, InjectedDependencies
from pants.engine.unions import UnionRule


Expand All @@ -14,29 +14,12 @@ class InjectGoModuleDependency(InjectDependenciesRequest):

@rule
async def inject_go_module_dependency(request: InjectGoModuleDependency) -> InjectedDependencies:
# Obtain unexpanded targets and ensure file targets are filtered out. Unlike Python, file targets do not
# make sense semantically for Go source since Go builds entire packages at a time. The filtering is
# accomplished by requesting `UnexpandedTargets` and also filtering on `is_file_target`.
spec_path = request.dependencies_field.address.spec_path
candidate_targets = await Get(
UnexpandedTargets,
AddressSpecs([AscendantAddresses(spec_path), SiblingAddresses(spec_path)]),
owning_go_module_result = await Get(
ResolvedOwningGoModule, FindOwningGoModuleRequest(request.dependencies_field.address)
)
go_module_targets = [
tgt
for tgt in candidate_targets
if tgt.has_field(GoModuleSources) and not tgt.address.is_file_target
]

# Sort by address.spec_path in descending order so the nearest go_module target is sorted first.
sorted_go_module_targets = sorted(
go_module_targets, key=lambda tgt: tgt.address.spec_path, reverse=True
)
if sorted_go_module_targets:
nearest_go_module_target = sorted_go_module_targets[0]
return InjectedDependencies([nearest_go_module_target.address])
if owning_go_module_result.module_address:
return InjectedDependencies([owning_go_module_result.module_address])
else:
# TODO: Consider eventually requiring all go_package's to associate with a go_module.
return InjectedDependencies()


Expand Down