Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update #44

Merged
merged 11 commits into from
Jan 29, 2020
12 changes: 12 additions & 0 deletions conf/solr/7.3.1/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@
<field name="variableName" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableLabel" type="text_en" stored="true" indexed="true" multiValued="true"/>

<field name="literalQuestion" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="interviewInstructions" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="postQuestion" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableUniverse" type="text_en" stored="true" indexed="true" multiValued="true"/>
<field name="variableNotes" type="text_en" stored="true" indexed="true" multiValued="true"/>

<field name="fileDescription" type="text_en" stored="true" indexed="true" multiValued="false"/>

<field name="fileTypeGroupFacet" type="string" stored="true" indexed="true" multiValued="false"/>
Expand Down Expand Up @@ -229,6 +235,12 @@
<!-- Added for Dataverse 4.0 Beta: make variable names and labels searchable in basic search https://redmine.hmdc.harvard.edu/issues/3945 -->
<copyField source="variableName" dest="_text_" maxChars="3000"/>
<copyField source="variableLabel" dest="_text_" maxChars="3000"/>
<!-- Added variable level metadata that can be updated from DCT -->
<copyField source="literalQuestion" dest="_text_" maxChars="3000"/>
<copyField source="interviewInstructions" dest="_text_" maxChars="3000"/>
<copyField source="postQuestion" dest="_text_" maxChars="3000"/>
<copyField source="variableUniverse" dest="_text_" maxChars="3000"/>
<copyField source="variableNotes" dest="_text_" maxChars="3000"/>
<!-- Make dataverse subject and affiliation searchable from basic search: https://github.com/IQSS/dataverse/issues/1431 -->
<copyField source="dvSubject" dest="_text_" maxChars="3000"/>
<copyField source="dvAlias" dest="_text_" maxChars="3000"/>
Expand Down
2 changes: 2 additions & 0 deletions doc/release-notes/6545-solr-var-meta.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
File schema.xml for solr search was changed. New fields such as literalQuestion, interviewInstruction, postQuestion, variableUniverse, variableNotes were added.
Full reindexing is needed if one wants to search and see updates to variable level metadata before this change. Otherwise were is no need to reindex, new updates with DCT will be authomaticaly indexed.
4 changes: 2 additions & 2 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -330,13 +330,13 @@ Next you need to figure out the alias or database id of the "parent" dataverse i
export PARENT=root
export SERVER_URL=https://demo.dataverse.org

curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/dataverses/$PARENT/datasets --upload-file dataset-finch1.json
curl -H X-Dataverse-key:$API_TOKEN -X POST "$SERVER_URL/api/dataverses/$PARENT/datasets" --upload-file dataset-finch1.json

The fully expanded example above (without the environment variables) looks like this:

.. code-block:: bash

curl -H X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx -X POST https://demo.dataverse.org/api/dataverses/root/datasets --upload-file dataset-finch1.json
curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/datasets" --upload-file "dataset-finch1.json"

You should expect an HTTP 200 ("OK") response and JSON indicating the database ID and Persistent ID (PID such as DOI or Handle) that has been assigned to your newly created dataset.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin
changedFileMetadata.add(fmdo);
changedFileMetadata.add(fmdn);
}
if (!compareVariableMetadata(fmdo,fmdn) || !compareVarGroup(fmdo, fmdn)) {
if (!variableMetadataUtil.compareVariableMetadata(fmdo,fmdn) || !compareVarGroup(fmdo, fmdn)) {
changedVariableMetadata.add(fmdo);
changedVariableMetadata.add(fmdn);
}
Expand Down Expand Up @@ -524,33 +524,6 @@ private void addToNoteSummary(DatasetField dsfo, int added, int deleted, int cha
summaryDataForNote.add(noteArray);
}

private boolean compareVariableMetadata(FileMetadata fmdo, FileMetadata fmdn) {
Collection<VariableMetadata> vmlo = fmdo.getVariableMetadatas();
Collection<VariableMetadata> vmln = fmdn.getVariableMetadatas();

int count = 0;
if (vmlo.size() != vmln.size()) {
return false;
} else {
for (VariableMetadata vmo : vmlo) {
for (VariableMetadata vmn : vmln) {
if (vmo.getDataVariable().getId().equals(vmn.getDataVariable().getId())) {
count++;
if (!variableMetadataUtil.compareVarMetadata(vmo, vmn)) {
return false;
}
}
}
}
}
if (count == vmlo.size()) {
return true;
} else {
return false;
}

}

private boolean compareVarGroup(FileMetadata fmdo, FileMetadata fmdn) {
List<VarGroup> vglo = fmdo.getVarGroups();
List<VarGroup> vgln = fmdn.getVarGroups();
Expand Down
19 changes: 16 additions & 3 deletions src/main/java/edu/harvard/iq/dataverse/FilePage.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public class FilePage implements java.io.Serializable {
private List<ExternalTool> configureTools;
private List<ExternalTool> exploreTools;
private List<ExternalTool> toolsWithPreviews;
private Long datasetVersionId;

@EJB
DataFileServiceBean datafileService;
Expand Down Expand Up @@ -170,10 +171,14 @@ public String init() {

return permissionsWrapper.notFound();
}

RetrieveDatasetVersionResponse retrieveDatasetVersionResponse;
retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(file.getOwner().getVersions(), version);
Long getDatasetVersionID = retrieveDatasetVersionResponse.getDatasetVersion().getId();
Long getDatasetVersionID = null;
if (datasetVersionId == null) {
retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(file.getOwner().getVersions(), version);
getDatasetVersionID = retrieveDatasetVersionResponse.getDatasetVersion().getId();
} else {
getDatasetVersionID = datasetVersionId;
}
fileMetadata = datafileService.findFileMetadataByDatasetVersionIdAndDataFileId(getDatasetVersionID, fileId);

if (fileMetadata == null) {
Expand Down Expand Up @@ -236,6 +241,14 @@ private boolean canViewUnpublishedDataset() {
public FileMetadata getFileMetadata() {
return fileMetadata;
}

public Long getDatasetVersionId() {
return datasetVersionId;
}

public void setDatasetVersionId(Long datasetVersionId) {
this.datasetVersionId = datasetVersionId;
}

private List<ExternalTool> addMapLayerAndSortExternalTools(){
List<ExternalTool> retList = externalToolService.findFileToolsByTypeContentTypeAndAvailablePreview(ExternalTool.Type.EXPLORE, file.getContentType());
Expand Down
49 changes: 41 additions & 8 deletions src/main/java/edu/harvard/iq/dataverse/api/EditDDI.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.batch.util.LoggingUtil;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.UpdateDatasetVersionCommand;
Expand All @@ -24,6 +26,8 @@
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VariableCategory;
import edu.harvard.iq.dataverse.datavariable.VariableMetadataDDIParser;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import org.apache.solr.client.solrj.SolrServerException;

import javax.ejb.EJB;
import javax.ejb.EJBException;
Expand All @@ -41,8 +45,10 @@
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
import java.io.InputStream;

import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.List;
Expand Down Expand Up @@ -78,6 +84,9 @@ public class EditDDI extends AbstractApiBean {
@Inject
DataverseRequestServiceBean dvRequestService;

@EJB
IndexServiceBean indexService;

@Inject
DataverseSession session;

Expand Down Expand Up @@ -141,7 +150,7 @@ public Response edit (InputStream body, @PathParam("fileId") String fileId) {
boolean varUpdate = varUpdates(mapVarToVarMet, fml, neededToUpdateVM, false);
if (varUpdate || groupUpdate) {

if (!updateDraftVersion(neededToUpdateVM, varGroupMap, dataset, latestVersion, groupUpdate, fml)) {
if (!updateDraftVersion(neededToUpdateVM, varGroupMap, dataset, apiTokenUser, groupUpdate, fml)) {
return error(Response.Status.INTERNAL_SERVER_ERROR, "Failed to update draft version" );
}
} else {
Expand Down Expand Up @@ -226,7 +235,6 @@ private boolean createNewDraftVersion(ArrayList<VariableMetadata> neededToUpdate
}
}


//add New groups
for (VarGroup varGroup : varGroupMap.values()) {
varGroup.setFileMetadata(fml);
Expand All @@ -239,6 +247,12 @@ private boolean createNewDraftVersion(ArrayList<VariableMetadata> neededToUpdate
}
}

boolean doNormalSolrDocCleanUp = true;
try {
Future<String> indexDatasetFuture = indexService.indexDataset(dataset, doNormalSolrDocCleanUp);
} catch (IOException | SolrServerException ex) {
logger.log(Level.SEVERE, "Couldn''t index dataset: " + ex.getMessage());
}

return true;
}
Expand Down Expand Up @@ -273,13 +287,8 @@ private void updateCategoryMetadata(VariableMetadata vmNew, VariableMetadata vmO

}

private boolean updateDraftVersion(ArrayList<VariableMetadata> neededToUpdateVM, Map<Long,VarGroup> varGroupMap, Dataset dataset, DatasetVersion newDatasetVersion, boolean groupUpdate, FileMetadata fml ) {

private boolean updateDraftVersion(ArrayList<VariableMetadata> neededToUpdateVM, Map<Long,VarGroup> varGroupMap, Dataset dataset, User apiTokenUser, boolean groupUpdate, FileMetadata fml ) {

Timestamp updateTime = new Timestamp(new Date().getTime());

newDatasetVersion.setLastUpdateTime(updateTime);
dataset.setModificationTime(updateTime);

for (int i = 0; i < neededToUpdateVM.size(); i++) {
VariableMetadata vm = neededToUpdateVM.get(i);
Expand Down Expand Up @@ -324,6 +333,30 @@ private boolean updateDraftVersion(ArrayList<VariableMetadata> neededToUpdateVM,
em.merge(varGroup);
}
}
Command<Dataset> cmd;
try {
DataverseRequest dr = new DataverseRequest(apiTokenUser, httpRequest);
cmd = new UpdateDatasetVersionCommand(dataset, dr);
((UpdateDatasetVersionCommand) cmd).setValidateLenient(true);
commandEngine.submit(cmd);

} catch (EJBException ex) {
StringBuilder error = new StringBuilder();
error.append(ex).append(" ");
error.append(ex.getMessage()).append(" ");
Throwable cause = ex;
while (cause.getCause() != null) {
cause = cause.getCause();
error.append(cause).append(" ");
error.append(cause.getMessage()).append(" ");
}
logger.log(Level.SEVERE, "Couldn''t save dataset: {0}", error.toString());

return false;
} catch (CommandException ex) { ;
logger.log(Level.SEVERE, "Couldn''t save dataset: {0}", ex.getMessage());
return false;
}

return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,38 @@
package edu.harvard.iq.dataverse.datavariable;

import edu.harvard.iq.dataverse.FileMetadata;

import java.util.Collection;

public class VariableMetadataUtil {

public static boolean compareVariableMetadata(FileMetadata fmdo, FileMetadata fmdn) {
Collection<VariableMetadata> vmlo = fmdo.getVariableMetadatas();
Collection<VariableMetadata> vmln = fmdn.getVariableMetadatas();

int count = 0;
if (vmlo.size() != vmln.size()) {
return false;
} else {
for (VariableMetadata vmo : vmlo) {
for (VariableMetadata vmn : vmln) {
if (vmo.getDataVariable().getId().equals(vmn.getDataVariable().getId())) {
count++;
if (!compareVarMetadata(vmo, vmn)) {
return false;
}
}
}
}
}
if (count == vmlo.size()) {
return true;
} else {
return false;
}

}

public static boolean compareVarMetadata(VariableMetadata vmOld, VariableMetadata vmNew) {
boolean thesame = true;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import edu.harvard.iq.dataverse.dataaccess.DataAccessRequest;
import edu.harvard.iq.dataverse.dataaccess.StorageIO;
import edu.harvard.iq.dataverse.datavariable.DataVariable;
import edu.harvard.iq.dataverse.datavariable.VariableMetadata;
import edu.harvard.iq.dataverse.datavariable.VariableMetadataUtil;
import edu.harvard.iq.dataverse.datavariable.VariableServiceBean;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.FileUtil;
Expand Down Expand Up @@ -111,6 +114,9 @@ public class IndexServiceBean {
@EJB
SolrClientService solrClientService;

@EJB
VariableServiceBean variableService;

public static final String solrDocIdentifierDataverse = "dataverse_";
public static final String solrDocIdentifierFile = "datafile_";
public static final String solrDocIdentifierDataset = "dataset_";
Expand All @@ -131,6 +137,8 @@ public class IndexServiceBean {
private Dataverse rootDataverseCached;
private SolrClient solrServer;

private VariableMetadataUtil variableMetadataUtil;

@PostConstruct
public void init() {
String urlString = "http://" + systemConfig.getSolrHostColonPort() + "/solr/collection1";
Expand Down Expand Up @@ -918,7 +926,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
* whether full text indexing is on now.
*/
if ((fileMetadata.getDataFile().isRestricted() == releasedFileMetadata.getDataFile().isRestricted())) {
if (fileMetadata.contentEquals(releasedFileMetadata)) {
if (fileMetadata.contentEquals(releasedFileMetadata) && variableMetadataUtil.compareVariableMetadata(releasedFileMetadata,fileMetadata)) {
indexThisMetadata = false;
logger.fine("This file metadata hasn't changed since the released version; skipping indexing.");
} else {
Expand Down Expand Up @@ -1145,12 +1153,46 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
// is something social science-specific...
// anyway -- needs to be reviewed. -- L.A. 4.0alpha1

//Variable Name
if (var.getName() != null && !var.getName().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName());
}
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());


List<VariableMetadata> vmList = variableService.findByDataVarIdAndFileMetaId(var.getId(), fileMetadata.getId());
VariableMetadata vm = null;
if (vmList != null && vmList.size() >0) {
vm = vmList.get(0);
}

if (vmList.size() == 0 ) {
//Variable Label
if (var.getLabel() != null && !var.getLabel().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel());
}

} else if (vm != null) {
if (vm.getLabel() != null && !vm.getLabel().equals("") ) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, vm.getLabel());
}
if (vm.getLiteralquestion() != null && !vm.getLiteralquestion().equals("")) {
datafileSolrInputDocument.addField(SearchFields.LITERAL_QUESTION, vm.getLiteralquestion());
}
if (vm.getInterviewinstruction() != null && !vm.getInterviewinstruction().equals("")) {
datafileSolrInputDocument.addField(SearchFields.INTERVIEW_INSTRUCTIONS, vm.getInterviewinstruction());
}
if (vm.getPostquestion() != null && !vm.getPostquestion().equals("")) {
datafileSolrInputDocument.addField(SearchFields.POST_QUESTION, vm.getPostquestion());
}
if (vm.getUniverse() != null && !vm.getUniverse().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_UNIVERSE, vm.getUniverse());
}
if (vm.getNotes() != null && !vm.getNotes().equals("")) {
datafileSolrInputDocument.addField(SearchFields.VARIABLE_NOTES, vm.getNotes());
}

}

}
// TABULAR DATA TAGS:
// (not to be confused with the file categories, indexed above!)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@ more targeted results for just datasets. The format is YYYY (i.e.

public static final String VARIABLE_NAME = "variableName";
public static final String VARIABLE_LABEL = "variableLabel";
public static final String LITERAL_QUESTION = "literalQuestion";
public static final String INTERVIEW_INSTRUCTIONS = "interviewInstructions";
public static final String POST_QUESTION = "postQuestion";
public static final String VARIABLE_UNIVERSE = "variableUniverse";
public static final String VARIABLE_NOTES = "variableNotes";


public static final String FULL_TEXT = "_text_";

}
Loading