Skip to content

Commit

Permalink
Auto merge of #119912 - notriddle:notriddle/reexport-dedup, r=<try>
Browse files Browse the repository at this point in the history
rustdoc-search: single result for items with multiple paths

Part of #15723

Preview: https://notriddle.com/rustdoc-html-demo-9/reexport-dup/std/index.html?search=hashmap

This change uses the same "exact" paths as trait implementors and type alias inlining to track items with multiple reachable paths. This way, if you search for `vec`, you get only the `std` exports of it, and not the one from `alloc`.

It still includes all the items in the search index so that you can search for them by all available paths. For example, try `core::option` and `std::option`, and notice that the results page doesn't show duplicates, but still shows all the items in their respective crates.
  • Loading branch information
bors committed Jan 13, 2024
2 parents 23148b1 + f6f69e8 commit 8d4d4b4
Show file tree
Hide file tree
Showing 13 changed files with 315 additions and 26 deletions.
12 changes: 12 additions & 0 deletions src/librustdoc/formats/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,16 +348,28 @@ impl<'a, 'tcx> DocFolder for CacheBuilder<'a, 'tcx> {
{
let desc =
short_markdown_summary(&item.doc_value(), &item.link_names(self.cache));
// For searching purposes, a re-export is a duplicate if:
//
// - It's either an inline, or a true re-export
// - It's got the same name
// - Both of them have the same exact path
let defid = (match &*item.kind {
&clean::ItemKind::ImportItem(ref import) => import.source.did,
_ => None,
})
.or_else(|| item.item_id.as_def_id());
// In case this is a field from a tuple struct, we don't add it into
// the search index because its name is something like "0", which is
// not useful for rustdoc search.
self.cache.search_index.push(IndexItem {
ty,
defid,
name: s,
path: join_with_double_colon(path),
desc,
parent,
parent_idx: None,
exact_path: None,
impl_id: if let Some(ParentStackItem::Impl { item_id, .. }) =
self.cache.parent_stack.last()
{
Expand Down
2 changes: 2 additions & 0 deletions src/librustdoc/html/render/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,13 @@ pub(crate) enum RenderMode {
#[derive(Debug)]
pub(crate) struct IndexItem {
pub(crate) ty: ItemType,
pub(crate) defid: Option<DefId>,
pub(crate) name: Symbol,
pub(crate) path: String,
pub(crate) desc: String,
pub(crate) parent: Option<DefId>,
pub(crate) parent_idx: Option<isize>,
pub(crate) exact_path: Option<String>,
pub(crate) impl_id: Option<DefId>,
pub(crate) search_type: Option<IndexItemFunctionType>,
pub(crate) aliases: Box<[Symbol]>,
Expand Down
151 changes: 135 additions & 16 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::collections::{BTreeMap, VecDeque};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap};
use rustc_middle::ty::TyCtxt;
use rustc_span::def_id::DefId;
use rustc_span::sym;
use rustc_span::symbol::Symbol;
use serde::ser::{Serialize, SerializeSeq, SerializeStruct, Serializer};
use thin_vec::ThinVec;
Expand All @@ -22,10 +23,13 @@ pub(crate) fn build_index<'tcx>(
cache: &mut Cache,
tcx: TyCtxt<'tcx>,
) -> String {
// Maps from ID to position in the `crate_paths` array.
let mut itemid_to_pathid = FxHashMap::default();
let mut primitives = FxHashMap::default();
let mut associated_types = FxHashMap::default();
let mut crate_paths = vec![];

// item type, display path, re-exported internal path
let mut crate_paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)> = vec![];

// Attach all orphan items to the type's definition if the type
// has since been learned.
Expand All @@ -35,11 +39,13 @@ pub(crate) fn build_index<'tcx>(
let desc = short_markdown_summary(&item.doc_value(), &item.link_names(cache));
cache.search_index.push(IndexItem {
ty: item.type_(),
defid: item.item_id.as_def_id(),
name: item.name.unwrap(),
path: join_with_double_colon(&fqp[..fqp.len() - 1]),
desc,
parent: Some(parent),
parent_idx: None,
exact_path: None,
impl_id,
search_type: get_function_type_for_search(
item,
Expand Down Expand Up @@ -88,17 +94,22 @@ pub(crate) fn build_index<'tcx>(
map: &mut FxHashMap<F, isize>,
itemid: F,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
item_type: ItemType,
path: &[Symbol],
exact_path: Option<&[Symbol]>,
) -> RenderTypeId {
match map.entry(itemid) {
Entry::Occupied(entry) => RenderTypeId::Index(*entry.get()),
Entry::Vacant(entry) => {
let pathid = *lastpathid;
entry.insert(pathid);
*lastpathid += 1;
crate_paths.push((item_type, path.to_vec()));
crate_paths.push((
item_type,
path.to_vec(),
exact_path.map(|path| path.to_vec()),
));
RenderTypeId::Index(pathid)
}
}
Expand All @@ -111,21 +122,30 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) -> Option<RenderTypeId> {
let Cache { ref paths, ref external_paths, .. } = *cache;
let Cache { ref paths, ref external_paths, ref exact_paths, .. } = *cache;
match id {
RenderTypeId::DefId(defid) => {
if let Some(&(ref fqp, item_type)) =
paths.get(&defid).or_else(|| external_paths.get(&defid))
{
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
.filter(|fqp| fqp.last() == fqp.last());
Some(insert_into_map(
itemid_to_pathid,
ItemId::DefId(defid),
lastpathid,
crate_paths,
item_type,
fqp,
exact_fqp.map(|x| &x[..]).filter(|exact_fqp| exact_fqp != fqp),
))
} else {
None
Expand All @@ -140,6 +160,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::Primitive,
&[sym],
None,
))
}
RenderTypeId::Index(_) => Some(id),
Expand All @@ -150,6 +171,7 @@ pub(crate) fn build_index<'tcx>(
crate_paths,
ItemType::AssocType,
&[sym],
None,
)),
}
}
Expand All @@ -161,7 +183,7 @@ pub(crate) fn build_index<'tcx>(
primitives: &mut FxHashMap<Symbol, isize>,
associated_types: &mut FxHashMap<Symbol, isize>,
lastpathid: &mut isize,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>)>,
crate_paths: &mut Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
) {
if let Some(generics) = &mut ty.generics {
for item in generics {
Expand Down Expand Up @@ -258,7 +280,7 @@ pub(crate) fn build_index<'tcx>(
}
}

let Cache { ref paths, .. } = *cache;
let Cache { ref paths, ref exact_paths, ref external_paths, .. } = *cache;

// Then, on parent modules
let crate_items: Vec<&IndexItem> = search_index
Expand All @@ -273,14 +295,54 @@ pub(crate) fn build_index<'tcx>(
lastpathid += 1;

if let Some(&(ref fqp, short)) = paths.get(&defid) {
crate_paths.push((short, fqp.clone()));
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp))
.filter(|exact_fqp| {
exact_fqp.last() == Some(&item.name) && *exact_fqp != fqp
});
crate_paths.push((short, fqp.clone(), exact_fqp.cloned()));
Some(pathid)
} else {
None
}
}
});

if let Some(defid) = item.defid
&& item.parent_idx.is_none()
{
// If this is a re-export, retain the original path.
// Associated items don't use this.
// Their parent carries the exact fqp instead.
let exact_fqp = exact_paths
.get(&defid)
.or_else(|| external_paths.get(&defid).map(|&(ref fqp, _)| fqp));
item.exact_path = exact_fqp.and_then(|fqp| {
// re-exports only count if the name is exactly the same
// this is a size optimization, as well as a DWIM attempt
// since if the names are not the same, the intent probably
// isn't, either
if fqp.last() != Some(&item.name) {
return None;
}
let path =
if item.ty == ItemType::Macro && tcx.has_attr(defid, sym::macro_export) {
// `#[macro_export]` always exports to the crate root.
tcx.crate_name(defid.krate).to_string()
} else {
if fqp.len() < 2 {
return None;
}
join_with_double_colon(&fqp[..fqp.len() - 1])
};
if path == item.path {
return None;
}
Some(path)
});
}

// Omit the parent path if it is same to that of the prior item.
if lastpath == &item.path {
item.path.clear();
Expand Down Expand Up @@ -319,7 +381,7 @@ pub(crate) fn build_index<'tcx>(
struct CrateData<'a> {
doc: String,
items: Vec<&'a IndexItem>,
paths: Vec<(ItemType, Vec<Symbol>)>,
paths: Vec<(ItemType, Vec<Symbol>, Option<Vec<Symbol>>)>,
// The String is alias name and the vec is the list of the elements with this alias.
//
// To be noted: the `usize` elements are indexes to `items`.
Expand All @@ -332,6 +394,7 @@ pub(crate) fn build_index<'tcx>(
ty: ItemType,
name: Symbol,
path: Option<usize>,
exact_path: Option<usize>,
}

impl Serialize for Paths {
Expand All @@ -345,6 +408,10 @@ pub(crate) fn build_index<'tcx>(
if let Some(ref path) = self.path {
seq.serialize_element(path)?;
}
if let Some(ref path) = self.exact_path {
assert!(self.path.is_some());
seq.serialize_element(path)?;
}
seq.end()
}
}
Expand All @@ -367,43 +434,94 @@ pub(crate) fn build_index<'tcx>(
mod_paths.insert(&item.path, index);
}
let mut paths = Vec::with_capacity(self.paths.len());
for (ty, path) in &self.paths {
for (ty, path, exact) in &self.paths {
if path.len() < 2 {
paths.push(Paths { ty: *ty, name: path[0], path: None });
paths.push(Paths { ty: *ty, name: path[0], path: None, exact_path: None });
continue;
}
let full_path = join_with_double_colon(&path[..path.len() - 1]);
let full_exact_path = exact
.as_ref()
.filter(|exact| exact.last() == path.last() && exact.len() >= 2)
.map(|exact| join_with_double_colon(&exact[..exact.len() - 1]));
let exact_path = extra_paths.len() + self.items.len();
let exact_path = full_exact_path.as_ref().map(|full_exact_path| match extra_paths
.entry(full_exact_path.clone())
{
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
if let Some(index) = mod_paths.get(&full_exact_path) {
return *index;
}
entry.insert(exact_path);
if !revert_extra_paths.contains_key(&exact_path) {
revert_extra_paths.insert(exact_path, full_exact_path.clone());
}
exact_path
}
});
if let Some(index) = mod_paths.get(&full_path) {
paths.push(Paths { ty: *ty, name: *path.last().unwrap(), path: Some(*index) });
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*index),
exact_path,
});
continue;
}
// It means it comes from an external crate so the item and its path will be
// stored into another array.
//
// `index` is put after the last `mod_paths`
let index = extra_paths.len() + self.items.len();
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path.clone());
}
match extra_paths.entry(full_path) {
match extra_paths.entry(full_path.clone()) {
Entry::Occupied(entry) => {
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(*entry.get()),
exact_path,
});
}
Entry::Vacant(entry) => {
entry.insert(index);
if !revert_extra_paths.contains_key(&index) {
revert_extra_paths.insert(index, full_path);
}
paths.push(Paths {
ty: *ty,
name: *path.last().unwrap(),
path: Some(index),
exact_path,
});
}
}
}

// Direct exports use adjacent arrays for the current crate's items,
// but re-exported exact paths don't.
let mut re_exports = Vec::new();
for (item_index, item) in self.items.iter().enumerate() {
if let Some(exact_path) = item.exact_path.as_ref() {
if let Some(path_index) = mod_paths.get(&exact_path) {
re_exports.push((item_index, *path_index));
} else {
let path_index = extra_paths.len() + self.items.len();
let path_index = match extra_paths.entry(exact_path.clone()) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
entry.insert(path_index);
if !revert_extra_paths.contains_key(&path_index) {
revert_extra_paths.insert(path_index, exact_path.clone());
}
path_index
}
};
re_exports.push((item_index, path_index));
}
}
}

let mut names = Vec::with_capacity(self.items.len());
let mut types = String::with_capacity(self.items.len());
let mut full_paths = Vec::with_capacity(self.items.len());
Expand Down Expand Up @@ -463,6 +581,7 @@ pub(crate) fn build_index<'tcx>(
crate_data.serialize_field("f", &functions)?;
crate_data.serialize_field("c", &deprecated)?;
crate_data.serialize_field("p", &paths)?;
crate_data.serialize_field("r", &re_exports)?;
crate_data.serialize_field("b", &self.associated_item_disambiguators)?;
if has_aliases {
crate_data.serialize_field("a", &self.aliases)?;
Expand Down
Loading

0 comments on commit 8d4d4b4

Please sign in to comment.