Skip to content

Commit 1ec2260

Browse files
authored
Merge pull request #6686 from IQSS/4964-harvesting-issues
4964 harvesting issues
2 parents c94e11a + 641c73f commit 1ec2260

File tree

6 files changed

+128
-17
lines changed

6 files changed

+128
-17
lines changed

src/main/java/edu/harvard/iq/dataverse/Dataset.java

+26
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import javax.persistence.Temporal;
3333
import javax.persistence.TemporalType;
3434
import edu.harvard.iq.dataverse.util.BundleUtil;
35+
import edu.harvard.iq.dataverse.util.StringUtil;
3536

3637
/**
3738
*
@@ -753,7 +754,32 @@ public String getRemoteArchiveURL() {
753754
}
754755
}
755756
return this.getHarvestedFrom().getArchiveUrl();
757+
} else if (HarvestingClient.HARVEST_STYLE_DEFAULT.equals(this.getHarvestedFrom().getHarvestStyle())) {
758+
// This is a generic OAI archive.
759+
// The metadata we harvested for this dataset is most likely a
760+
// simple DC record that does not contain a URL pointing back at
761+
// the specific location on the source archive. But, it probably
762+
// has a global identifier, a DOI or a Handle - so we should be
763+
// able to redirect to the proper global resolver.
764+
// But since this is a harvested dataset, we will assume that
765+
// there is a possibility tha this object does NOT have all the
766+
// valid persistent identifier components.
767+
768+
if (StringUtil.nonEmpty(this.getProtocol())
769+
&& StringUtil.nonEmpty(this.getAuthority())
770+
&& StringUtil.nonEmpty(this.getIdentifier())) {
771+
return this.getPersistentURL();
772+
}
773+
774+
// All we can do is redirect them to the top-level URL we have
775+
// on file for this remote archive:
776+
return this.getHarvestedFrom().getArchiveUrl();
756777
} else {
778+
// This is really not supposed to happen - this is a harvested
779+
// dataset for which we don't have ANY information on the nature
780+
// of the archive we got it from. So all we can do is redirect
781+
// the user to the top-level URL we have on file for this remote
782+
// archive:
757783
return this.getHarvestedFrom().getArchiveUrl();
758784
}
759785
}

src/main/java/edu/harvard/iq/dataverse/HarvestingClientsPage.java

+59-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import edu.harvard.iq.dataverse.util.BundleUtil;
2121
import edu.harvard.iq.dataverse.util.JsfHelper;
2222
import static edu.harvard.iq.dataverse.util.JsfHelper.JH;
23+
import edu.harvard.iq.dataverse.util.StringUtil;
2324
import java.util.ArrayList;
2425
import java.util.Arrays;
2526
import java.util.List;
@@ -77,6 +78,7 @@ public class HarvestingClientsPage implements java.io.Serializable {
7778
private Dataverse dataverse;
7879
private Long dataverseId = null;
7980
private HarvestingClient selectedClient;
81+
private boolean setListTruncated = false;
8082

8183
//private static final String solrDocIdentifierDataset = "dataset_";
8284

@@ -206,6 +208,9 @@ public boolean isCreateStepFour() {
206208
return CreateStep.FOUR == this.createStep;
207209
}
208210

211+
public boolean isSetListTruncated() {
212+
return setListTruncated;
213+
}
209214

210215
public void runHarvest(HarvestingClient harvestingClient) {
211216
try {
@@ -246,7 +251,7 @@ public void editClient(HarvestingClient harvestingClient) {
246251
// and if not, what do we do?
247252
// alternatively, should we make these 2 fields not editable at all?
248253

249-
this.newOaiSet = !StringUtils.isEmpty(harvestingClient.getHarvestingSet()) ? harvestingClient.getHarvestingSet() : "none";
254+
this.newOaiSet = !StringUtils.isEmpty(harvestingClient.getHarvestingSet()) ? harvestingClient.getHarvestingSet() : "";
250255
this.newMetadataFormat = harvestingClient.getMetadataPrefix();
251256
this.newHarvestingStyle = harvestingClient.getHarvestStyle();
252257

@@ -492,6 +497,23 @@ public void validateMetadataFormat(FacesContext context, UIComponent toValidate,
492497
}
493498
}
494499

500+
public void validateRemoteArchiveStyle(FacesContext context, UIComponent toValidate, Object rawValue) {
501+
String value = (String) rawValue;
502+
UIInput input = (UIInput) toValidate;
503+
input.setValid(true); // Optimistic approach
504+
505+
// the only validation we want is to make sure the select one of the
506+
// values from the menu.
507+
if (context.getExternalContext().getRequestParameterMap().get("DO_VALIDATION") != null
508+
&& StringUtils.isEmpty(value)) {
509+
510+
input.setValid(false);
511+
context.addMessage(toValidate.getClientId(),
512+
new FacesMessage(FacesMessage.SEVERITY_ERROR, "", BundleUtil.getStringFromBundle("harvestclients.newClientDialog.harvestingStyle.required")));
513+
514+
}
515+
}
516+
495517
public boolean validateNickname() {
496518

497519
if ( !StringUtils.isEmpty(getNewNickname()) ) {
@@ -559,10 +581,15 @@ public boolean validateServerUrlOAI() {
559581
}
560582
// And if that worked, the list of sets provided:
561583

584+
ArrayList<String> sets = null;
585+
586+
// reset the sets menu:
587+
setOaiSetsSelectItems(null);
588+
setListTruncated = false;
589+
562590
if (success) {
563591
try {
564-
List<String> sets = oaiHandler.runListSets();
565-
createOaiSetsSelectItems(sets);
592+
sets = oaiHandler.runListSets();
566593
} catch (Exception ex) {
567594
//success = false;
568595
// ok - we'll try and live without sets for now...
@@ -576,6 +603,24 @@ public boolean validateServerUrlOAI() {
576603
}
577604

578605
if (success) {
606+
if (sets != null) {
607+
if (oaiHandler.isSetListTruncated()) {
608+
// If it was taking too long to retrieve the full list
609+
// of sets (oai.datacite.org/oai - looking at you! -
610+
// and we had to truncate it:
611+
setListTruncated = true;
612+
613+
// And if we are re-configuring an existing client, with
614+
// a specific set in place - let's make sure it's on the pull down
615+
// menu list; even if we have failed to retrieve it from the server.
616+
if (StringUtil.nonEmpty(this.newOaiSet)) {
617+
if (!sets.contains(this.newOaiSet)) {
618+
sets.add(0, this.newOaiSet);
619+
}
620+
}
621+
}
622+
createOaiSetsSelectItems(sets);
623+
}
579624
return true;
580625
}
581626

@@ -643,7 +688,8 @@ public void backToStepThree() {
643688
UIInput newClientNicknameInputField;
644689
UIInput newClientUrlInputField;
645690
UIInput hiddenInputField;
646-
/*UISelectOne*/ UIInput metadataFormatMenu;
691+
/*UISelectOne*/ UIInput metadataFormatMenu;
692+
UIInput remoteArchiveStyleMenu;
647693
UIInput selectedDataverseMenu;
648694

649695
private String newNickname = "";
@@ -674,7 +720,7 @@ public void initNewClient(ActionEvent ae) {
674720
this.initialSettingsValidated = false;
675721
this.newOaiSet = "";
676722
this.newMetadataFormat = "";
677-
this.newHarvestingStyle = HarvestingClient.HARVEST_STYLE_DATAVERSE;
723+
this.newHarvestingStyle = "";
678724

679725
this.harvestTypeRadio = harvestTypeRadioOAI;
680726
this.harvestingScheduleRadio = harvestingScheduleRadioNone;
@@ -840,6 +886,14 @@ public void setMetadataFormatMenu(UIInput metadataFormatMenu) {
840886
this.metadataFormatMenu = metadataFormatMenu;
841887
}
842888

889+
public UIInput getRemoteArchiveStyleMenu() {
890+
return remoteArchiveStyleMenu;
891+
}
892+
893+
public void setRemoteArchiveStyleMenu(UIInput remoteArchiveStyleMenu) {
894+
this.remoteArchiveStyleMenu = remoteArchiveStyleMenu;
895+
}
896+
843897
public UIInput getSelectedDataverseMenu() {
844898
return selectedDataverseMenu;
845899
}

src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ public void setId(Long id) {
9090
public static final String HARVEST_STYLE_DESCRIPTION_NESSTAR="Nesstar archive";
9191
public static final String HARVEST_STYLE_DESCRIPTION_ROPER="Roper Archive";
9292
public static final String HARVEST_STYLE_DESCRIPTION_HGL="HGL";
93-
public static final String HARVEST_STYLE_DESCRIPTION_DEFAULT="Generic OAI resource (DC)";
93+
public static final String HARVEST_STYLE_DESCRIPTION_DEFAULT="Generic OAI archive";
9494

9595

9696
public static final List<String> HARVEST_STYLE_LIST = Arrays.asList(HARVEST_STYLE_DATAVERSE, HARVEST_STYLE_VDC, HARVEST_STYLE_ICPSR, HARVEST_STYLE_NESSTAR, HARVEST_STYLE_ROPER, HARVEST_STYLE_HGL, HARVEST_STYLE_DEFAULT);

src/main/java/edu/harvard/iq/dataverse/harvest/client/oai/OaiHandler.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ public OaiHandler(HarvestingClient harvestingClient) throws OaiHandlerException
8282
private String metadataPrefix; // = harvestingClient.getMetadataPrefix();
8383
private String setName;
8484
private Date fromDate;
85+
private Boolean setListTruncated = false;
8586

8687
private ServiceProvider serviceProvider;
8788

@@ -123,6 +124,9 @@ public void setHarvestingClient(HarvestingClient harvestingClient) {
123124
this.harvestingClient = harvestingClient;
124125
}
125126

127+
public boolean isSetListTruncated() {
128+
return setListTruncated;
129+
}
126130

127131
private ServiceProvider getServiceProvider() throws OaiHandlerException {
128132
if (serviceProvider == null) {
@@ -141,12 +145,14 @@ private ServiceProvider getServiceProvider() throws OaiHandlerException {
141145
return serviceProvider;
142146
}
143147

144-
public List<String> runListSets() throws OaiHandlerException {
148+
public ArrayList<String> runListSets() throws OaiHandlerException {
145149

146150
ServiceProvider sp = getServiceProvider();
147151

148152
Iterator<Set> setIter;
149153

154+
long startMilSec = new Date().getTime();
155+
150156
try {
151157
setIter = sp.listSets();
152158
} catch (NoSetHierarchyException nshe) {
@@ -155,9 +161,12 @@ public List<String> runListSets() throws OaiHandlerException {
155161
throw new OaiHandlerException("No valid response received from the OAI server.");
156162
}
157163

158-
List<String> sets = new ArrayList<>();
164+
ArrayList<String> sets = new ArrayList<>();
159165

166+
int count = 0;
167+
160168
while ( setIter.hasNext()) {
169+
count++;
161170
Set set = setIter.next();
162171
String setSpec = set.getSpec();
163172
/*
@@ -166,6 +175,15 @@ public List<String> runListSets() throws OaiHandlerException {
166175
167176
}
168177
*/
178+
179+
if (count >= 100) {
180+
// Have we been waiting more than 30 seconds?
181+
if (new Date().getTime() - startMilSec > 30000) {
182+
setListTruncated = true;
183+
break;
184+
}
185+
}
186+
169187
if (!StringUtils.isEmpty(setSpec)) {
170188
sets.add(setSpec);
171189
}

src/main/java/propertyFiles/Bundle.properties

+2
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ harvestclients.newClientDialog.oaiSets.tip=Harvesting sets offered by this OAI s
482482
harvestclients.newClientDialog.oaiSets.noset=None
483483
harvestclients.newClientDialog.oaiSets.helptext=Selecting "none" will harvest the default set, as defined by the server. Often this will be the entire body of content across all sub-sets.
484484
harvestclients.newClientDialog.oaiSets.helptext.noset=This OAI server does not support named sets. The entire body of content offered by the server will be harvested.
485+
harvestclients.newClientDialog.oaiSets.listTruncated=Please note that the remote server was taking too long to return the full list of available OAI sets, so the list was truncated. Please select a set from the current list (or select the "no set" option), and try again later, if you need to change it.
485486
harvestclients.newClientDialog.oaiMetadataFormat=Metadata Format
486487
harvestclients.newClientDialog.oaiMetadataFormat.tip=Metadata formats offered by the remote server.
487488
harvestclients.newClientDialog.oaiMetadataFormat.required=Please select the metadata format to harvest from this archive.
@@ -503,6 +504,7 @@ harvestclients.newClientDialog.step4=Step 4 of 4 - Display
503504
harvestclients.newClientDialog.harvestingStyle=Archive Type
504505
harvestclients.newClientDialog.harvestingStyle.tip=Type of remote archive.
505506
harvestclients.newClientDialog.harvestingStyle.helptext=Select the archive type that best describes this remote server in order to properly apply formatting rules and styles to the harvested metadata as they are shown in the search results. Note that improperly selecting the type of the remote archive can result in incomplete entries in the search results, and a failure to redirect the user to the archival source of the data.
507+
harvestclients.newClientDialog.harvestingStyle.required=Please select one of the values from the menu.
506508
harvestclients.viewEditDialog.title=Edit Harvesting Client
507509
harvestclients.viewEditDialog.archiveUrl=Archive URL
508510
harvestclients.viewEditDialog.archiveUrl.tip=The URL of the archive that serves the data harvested by this client, which is used in search results for links to the original sources of the harvested content.

src/main/webapp/harvestclients.xhtml

+20-9
Original file line numberDiff line numberDiff line change
@@ -320,13 +320,18 @@
320320
<f:selectItems value="#{harvestingClientsPage.oaiSetsSelectItems}" />
321321
</p:selectOneMenu>
322322
<p:message for="serverOaiSets"/>
323-
<!-- a readonly label, in "edit existing client" mode: -->
323+
<!-- a readonly label, in "edit existing client" mode: (No longer needed, since we now allow editing sets in existing clients! -->
324324
<p:inputText rendered="#{false}"
325325
readonly="true"
326326
styleClass="form-control"
327327
value="#{harvestingClientsPage.newOaiSet}"/>
328328
<p class="help-block" jsf:rendered="#{harvestingClientsPage.createMode and !empty harvestingClientsPage.oaiSetsSelectItems}">#{bundle['harvestclients.newClientDialog.oaiSets.helptext']}</p>
329329
<p class="help-block" jsf:rendered="#{harvestingClientsPage.createMode and empty harvestingClientsPage.oaiSetsSelectItems}">#{bundle['harvestclients.newClientDialog.oaiSets.helptext.noset']}</p>
330+
331+
<p class="help-block" jsf:rendered="#{harvestingClientsPage.setListTruncated}">
332+
<span class="glyphicon glyphicon-warning-sign text-warning"/>
333+
#{bundle['harvestclients.newClientDialog.oaiSets.listTruncated']}
334+
</p>
330335
</div>
331336
</div>
332337

@@ -339,7 +344,7 @@
339344
data-toggle="tooltip" data-placement="auto right" data-original-title="#{bundle['harvestclients.newClientDialog.oaiMetadataFormat.tip']}"></span>
340345
</label>
341346
<div class="col-sm-6">
342-
<!-- select menu, shown in "create new client" mode, after the server has been contacted and we got the list of available metadata formats: -->
347+
<!-- select menu; it's populated by contacting the remote server and obtaining the list of available metadata formats: -->
343348
<p:selectOneMenu id="oaiMetadataFormat"
344349
binding="#{harvestingClientsPage.metadataFormatMenu}"
345350
styleClass="form-control"
@@ -350,11 +355,11 @@
350355
<f:selectItems value="#{harvestingClientsPage.oaiMetadataFormatSelectItems}" />
351356
</p:selectOneMenu>
352357
<p:message for="oaiMetadataFormat"/>
353-
<!-- a readonly label, in "edit existing client" mode: -->
354-
<p:inputText rendered="#{harvestingClientsPage.editMode}"
355-
readonly="true"
358+
<!-- a readonly label, in "edit existing client" mode: (No longer needed! - since we now allow formats to be edited) -->
359+
<!-- p:inputText rendered="#{harvestingClientsPage.editMode}"
360+
readonly="false"
356361
styleClass="form-control"
357-
value="#{harvestingClientsPage.selectedClient.metadataPrefix}"/>
362+
value="#{harvestingClientsPage.selectedClient.metadataPrefix}"/ -->
358363
</div>
359364
</div>
360365

@@ -487,8 +492,12 @@
487492
</label>
488493
<div class="col-sm-6">
489494
<p:selectOneMenu id="harvestingStyle"
490-
styleClass="form-control" value="#{harvestingClientsPage.newHarvestingStyle}"
491-
rendered="#{harvestingClientsPage.initialSettingsValidated}">
495+
binding="#{harvestingClientsPage.remoteArchiveStyleMenu}"
496+
styleClass="form-control"
497+
value="#{harvestingClientsPage.newHarvestingStyle}"
498+
rendered="#{harvestingClientsPage.initialSettingsValidated}"
499+
validator="#{harvestingClientsPage.validateRemoteArchiveStyle}">
500+
<f:selectItem itemLabel="#{bundle.select}" itemValue="" noSelectionOption="true" />
492501
<f:selectItems value="#{harvestingClientsPage.harvestingStylesSelectItems}" />
493502
</p:selectOneMenu>
494503
<p:message for="harvestingStyle"/>
@@ -547,7 +556,9 @@
547556
value="#{bundle['harvestclients.viewEditDialog.btn.save']}"
548557
update="newHarvestingClientDialogContent :messagePanel :harvestingClientsForm clientsTable emptyClientsTable"
549558
actionListener="#{harvestingClientsPage.saveClient}"
550-
oncomplete="if (args &amp;&amp; !args.validationFailed) PF('newHarvestingClientForm').hide(); else PF('newHarvestingClientForm').show();bind_bsui_components();"/>
559+
oncomplete="if (args &amp;&amp; !args.validationFailed) PF('newHarvestingClientForm').hide(); else PF('newHarvestingClientForm').show();bind_bsui_components();">
560+
<f:param name="DO_VALIDATION" value="true"/>
561+
</p:commandLink>
551562
<button class="btn btn-link" jsf:rendered="#{harvestingClientsPage.initialSettingsValidated}" onclick="PF('newHarvestingClientForm').hide()" type="button">
552563
#{bundle.cancel}
553564
</button>

0 commit comments

Comments
 (0)