Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

go: analyze package metadata #12429

Merged
merged 14 commits into from
Jul 28, 2021
11 changes: 10 additions & 1 deletion src/python/pants/backend/experimental/go/register.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pants.backend.go import build, distribution, import_analysis, module, tailor, target_type_rules
from pants.backend.go import (
build,
distribution,
import_analysis,
module,
pkg,
tailor,
target_type_rules,
)
from pants.backend.go import target_types as go_target_types
from pants.backend.go.target_types import GoBinary, GoExternalModule, GoModule, GoPackage

Expand All @@ -17,6 +25,7 @@ def rules():
*go_target_types.rules(),
*import_analysis.rules(),
*module.rules(),
*pkg.rules(),
*tailor.rules(),
*target_type_rules.rules(),
]
59 changes: 58 additions & 1 deletion src/python/pants/backend/go/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from pants.backend.go.distribution import GoLangDistribution
from pants.backend.go.target_types import GoModuleSources
from pants.base.specs import AddressSpecs, AscendantAddresses, MaybeEmptySiblingAddresses
from pants.build_graph.address import Address
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
Expand All @@ -28,7 +29,7 @@
from pants.engine.platform import Platform
from pants.engine.process import BashBinary, Process, ProcessResult
from pants.engine.rules import collect_rules, goal_rule, rule
from pants.engine.target import UnexpandedTargets
from pants.engine.target import Target, UnexpandedTargets
from pants.util.logging import LogLevel
from pants.util.ordered_set import FrozenOrderedSet

Expand All @@ -44,10 +45,22 @@ class ModuleDescriptor:

@dataclass(frozen=True)
class ResolvedGoModule:
# Address of the resolved go_module target.
address: Address

# The go_module target.
target: Target

# Import path of the Go module. Inferred from the import path in the go.mod file.
import_path: str

# Minimum Go version of the module from `go` statement in go.mod.
minimum_go_version: Optional[str]

# Metadata of referenced modules.
modules: FrozenOrderedSet[ModuleDescriptor]

# Digest containing go.mod and updated go.sum.
digest: Digest


Expand Down Expand Up @@ -79,6 +92,10 @@ def basic_parse_go_mod(raw_text: bytes) -> Tuple[Optional[str], Optional[str]]:

# Parse the output of `go mod download` into a list of module descriptors.
def parse_module_descriptors(raw_json: bytes) -> List[ModuleDescriptor]:
# `ijson` cannot handle empty input so short-circuit if there is no data.
if len(raw_json) == 0:
return []

module_descriptors = []
for raw_module_descriptor in ijson.items(raw_json, "", multiple_values=True):
module_descriptor = ModuleDescriptor(
Expand Down Expand Up @@ -117,6 +134,7 @@ async def resolve_go_module(

# Note: The `go` tool requires GOPATH to be an absolute path which can only be resolved from within the
# execution sandbox. Thus, this code uses a bash script to be able to resolve that path.
# TODO: Merge all duplicate versions of this script into a single script and invoke rule.
analyze_script_digest = await Get(
Digest,
CreateDigest(
Expand Down Expand Up @@ -167,13 +185,52 @@ async def resolve_go_module(

return ResolvedGoModule(
address=request.address,
target=target,
import_path=module_path,
minimum_go_version=minimum_go_version,
modules=FrozenOrderedSet(parse_module_descriptors(result.stdout)),
digest=result.output_digest,
)


@dataclass(frozen=True)
class FindNearestGoModuleRequest:
spec_path: str


@dataclass(frozen=True)
class ResolvedOwningGoModule:
module_address: Optional[Address]


@rule
async def find_nearest_go_module(request: FindNearestGoModuleRequest) -> ResolvedOwningGoModule:
# Obtain unexpanded targets and ensure file targets are filtered out. Unlike Python, file targets do not
# make sense semantically for Go source since Go builds entire packages at a time. The filtering is
Comment on lines +205 to +206
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note: this is somewhat true of Java as well: the entire JVM package is implicitly available without an import statement, but separate compilation is still supported.

I expect that @patricklaw will have the inference implementation for the JVM infer a dependency on all files in the package (which will cause it to automatically be coarsened to a single CoarsenedTarget) rather than taking the approach you're taking here. Part of the reason for that is that the JVM allows import cycles between files, and so he already needs to be able to support cycles and thus coarsening.

The equivalent case for go would be if multiple packages had cyclic imports for one another: but it looks like that isn't possible: https://jogendra.dev/import-cycles-in-golang-and-how-to-deal-with-them

So yea: this seems like the way to go for go.

# accomplished by requesting `UnexpandedTargets` and also filtering on `is_file_target`.
spec_path = request.spec_path
candidate_targets = await Get(
UnexpandedTargets,
AddressSpecs([AscendantAddresses(spec_path), MaybeEmptySiblingAddresses(spec_path)]),
)
go_module_targets = [
tgt
for tgt in candidate_targets
if tgt.has_field(GoModuleSources) and not tgt.address.is_file_target
]

# Sort by address.spec_path in descending order so the nearest go_module target is sorted first.
sorted_go_module_targets = sorted(
go_module_targets, key=lambda tgt: tgt.address.spec_path, reverse=True
)
if sorted_go_module_targets:
nearest_go_module_target = sorted_go_module_targets[0]
return ResolvedOwningGoModule(module_address=nearest_go_module_target.address)
else:
# TODO: Consider eventually requiring all go_package's to associate with a go_module.
return ResolvedOwningGoModule(module_address=None)


# TODO: Add integration tests for the `go-resolve` goal once we figure out its final form. For now, it is a debug
# tool to help update go.sum while developing the Go plugin and will probably change.
class GoResolveSubsystem(GoalSubsystem):
Expand Down
256 changes: 256 additions & 0 deletions src/python/pants/backend/go/pkg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).
import json
import logging
import textwrap
import typing
from dataclasses import dataclass
from typing import Optional, Tuple

from pants.backend.go.distribution import GoLangDistribution
from pants.backend.go.module import (
FindNearestGoModuleRequest,
ResolvedGoModule,
ResolvedOwningGoModule,
ResolveGoModuleRequest,
)
from pants.backend.go.target_types import GoImportPath, GoModuleSources, GoPackageSources
from pants.build_graph.address import Address
from pants.core.util_rules.external_tool import DownloadedExternalTool, ExternalToolRequest
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.engine.addresses import Addresses
from pants.engine.fs import CreateDigest, Digest, FileContent, MergeDigests
from pants.engine.internals.selectors import Get
from pants.engine.platform import Platform
from pants.engine.process import BashBinary, Process, ProcessResult
from pants.engine.rules import collect_rules, rule
from pants.engine.target import UnexpandedTargets
from pants.util.logging import LogLevel

_logger = logging.getLogger(__name__)


# A fully-resolved Go package. The metadata is obtained by invoking `go list` on the package.
@dataclass(frozen=True)
class ResolvedGoPackage:
# Address of the `go_package` target. Copied from the `ResolveGoPackageRequest` for ease of access.
address: Address

# Import path of this package. The import path will be inferred from an owning `go_module` if present.
import_path: str

# Address of the owning `go_module` if present. The owning `go_module` is the nearest go_module at the same
# or higher level of the source tree.
module_address: Optional[Address]

# Name of the package as given by `package` directives in the source files. Obtained from `Name` key in
# package metadata.
package_name: str

# Import paths used by this package. Obtained from `Imports` key in package metadata.
imports: Tuple[str, ...]

# Imports from test files. Obtained from `TestImports` key in package metadata.
test_imports: Tuple[str, ...]

# Explicit and transitive import paths required to build the code. Obtained from `Deps` key in package metadata.
dependency_import_paths: Tuple[str, ...]

# .go source files (excluding CgoFiles, TestGoFiles, XTestGoFiles). Obtained from `GoFiles` key in package metadata.
go_files: Tuple[str, ...]

# .go source files that import "C". Obtained from `CgoFiles` key in package metadata.
cgo_files: Tuple[str, ...]

# .go source files ignored due to build constraints. Obtained from `IgnoredGoFiles` key in package metadata.
ignored_go_files: Tuple[str, ...]

# non-.go source files ignored due to build constraints. Obtained from `IgnoredOtherFiles` key in package metadata.
ignored_other_files: Tuple[str, ...]

# _test.go files in package. Obtained from `TestGoFiles` key in package metadata.
test_go_files: Tuple[str, ...]

# _test.go files outside package. Obtained from `XTestGoFiles` key in package metadata.
xtest_go_files: Tuple[str, ...]


@dataclass(frozen=True)
class ResolveGoPackageRequest:
address: Address


def error_to_string(d: dict) -> str:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be dict[str, str], or is this trickier than that?

pos = d.get("Pos", "")
if pos:
pos = f"{pos}: "

import_stack_items = d.get("ImportStack", [])
import_stack = f" (import stack: {', '.join(import_stack_items)})" if import_stack_items else ""
return f"{pos}{d['Err']}{import_stack}"


@rule
async def resolve_go_package(
request: ResolveGoPackageRequest,
goroot: GoLangDistribution,
platform: Platform,
bash: BashBinary,
) -> ResolvedGoPackage:
# TODO: Use MultiGet where applicable.

downloaded_goroot = await Get(
DownloadedExternalTool,
ExternalToolRequest,
goroot.get_request(platform),
)

targets = await Get(UnexpandedTargets, Addresses([request.address]))
if not targets:
raise AssertionError(f"Address `{request.address}` did not resolve to any targets.")
elif len(targets) > 1:
raise AssertionError(f"Address `{request.address}` resolved to multiple targets.")
target = targets[0]

owning_go_module_result = await Get(
ResolvedOwningGoModule, FindNearestGoModuleRequest(request.address.spec_path)
)

if not owning_go_module_result.module_address:
raise ValueError(f"The go_package at address {request.address} has no owning go_module.")
resolved_go_module = await Get(
ResolvedGoModule, ResolveGoModuleRequest(owning_go_module_result.module_address)
)
assert request.address.spec_path.startswith(resolved_go_module.address.spec_path)
spec_subpath = request.address.spec_path[len(resolved_go_module.address.spec_path) :]

# Compute the import_path for this go_package.
import_path_field = target.get(GoImportPath)
if import_path_field and import_path_field.value:
# Use any explicit import path set on the `go_package` target.
import_path = import_path_field.value
else:
# Otherwise infer the import path from the owning `go_module` target. The inferred import path will be the
# module's import path plus any subdirectories in the spec_path between the go_module and go_package target.
if not resolved_go_module.import_path:
raise ValueError(
f"Unable to infer import path for the `go_package` at address {request.address} "
f"because the owning go_module at address {resolved_go_module.address} "
"does not have an import path defined nor could one be inferred."
)
import_path = f"{resolved_go_module.import_path}{spec_subpath}"

sources = await Get(
SourceFiles,
SourceFilesRequest(
[
target.get(GoPackageSources),
resolved_go_module.target.get(GoModuleSources),
]
),
)

# Note: The `go` tool requires GOPATH to be an absolute path which can only be resolved from within the
# execution sandbox. Thus, this code uses a bash script to be able to resolve that path.
# TODO: Merge all duplicate versions of this script into a single script and add an invoke rule that will
# insert the desired `go` command into the boilerplate portions.
analyze_script_digest = await Get(
Digest,
CreateDigest(
[
FileContent(
"analyze.sh",
textwrap.dedent(
f"""\
export GOROOT="$(/bin/pwd)/go"
export GOPATH="$(/bin/pwd)/gopath"
export GOCACHE="$(/bin/pwd)/cache"
/bin/mkdir -p "$GOPATH" "$GOCACHE"
cd {resolved_go_module.address.spec_path}
exec "${{GOROOT}}/bin/go" list -json ./{spec_subpath}
"""
).encode("utf-8"),
)
]
),
)

input_root_digest = await Get(
Digest,
MergeDigests([sources.snapshot.digest, downloaded_goroot.digest, analyze_script_digest]),
)

process = Process(
argv=[bash.path, "./analyze.sh"],
input_digest=input_root_digest,
description="Resolve go_package metadata.",
level=LogLevel.DEBUG,
)

result = await Get(ProcessResult, Process, process)

metadata = json.loads(result.stdout)

# TODO: Raise an exception on errors. They are only emitted as warnings for now because the `go` tool is
# flagging missing first-party code as a dependency error. But we want dependency inference and won't know
# what the dependency actually is unless we first resolve the package with that dependency. So circular
# reasoning. We may need to hydrate the sources for all go_package targets that share a `go_module`.
if metadata.get("Incomplete"):
error_dict = metadata.get("Error", {})
if error_dict:
error_str = error_to_string(error_dict)
_logger.warning(
f"Error while resolving Go package at address {request.address}: {error_str}"
)
# TODO: Check DepsErrors key as well.

# Raise an exception if any unsupported source file keys are present in the metadata.
for key in (
"CompiledGoFiles",
"CFiles",
"CXXFiles",
"MFiles",
"HFiles",
"FFiles",
"SFiles",
"SwigFiles",
"SwigCXXFiles",
"SysoFiles",
):
files = metadata.get(key, [])
if files:
raise ValueError(
f"The go_package at address {request.address} contains the following unsupported source files "
f"that were detected under the key '{key}': {', '.join(files)}."
)

package_name: str = metadata["Name"]
imports = typing.cast(Tuple[str, ...], tuple(metadata.get("Imports", [])))
test_imports = typing.cast(Tuple[str, ...], tuple(metadata.get("TestImports", [])))
dependency_import_paths = typing.cast(Tuple[str, ...], tuple(metadata.get("Deps", [])))
go_files = typing.cast(Tuple[str, ...], tuple(metadata.get("GoFiles", [])))
cgo_files = typing.cast(Tuple[str, ...], tuple(metadata.get("CgoFiles", [])))
ignored_go_files = typing.cast(Tuple[str, ...], tuple(metadata.get("IgnoredGoFiles", [])))
ignored_other_files = typing.cast(Tuple[str, ...], tuple(metadata.get("IgnoredOtherFiles", [])))
test_go_files = typing.cast(Tuple[str, ...], tuple(metadata.get("TestGoFiles", [])))
xtest_go_files = typing.cast(Tuple[str, ...], tuple(metadata.get("XTestGoFiles", [])))

return ResolvedGoPackage(
address=request.address,
import_path=import_path,
module_address=owning_go_module_result.module_address,
package_name=package_name,
imports=imports,
test_imports=test_imports,
dependency_import_paths=dependency_import_paths,
go_files=go_files,
cgo_files=cgo_files,
ignored_go_files=ignored_go_files,
ignored_other_files=ignored_other_files,
test_go_files=test_go_files,
xtest_go_files=xtest_go_files,
)


def rules():
return collect_rules()
Loading