Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Met 1298 #177

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions tools/dataset-cleaner/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/target
/.settings
/.idea
/.classpath
/.project
*.log
*.parms
dataset-cleaner/target


9 changes: 9 additions & 0 deletions tools/dataset-cleaner/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
DataSet cleaner Tool
1- The tool is designed to remove all versions assigned to a dataSet. The tool will
remove all the versions files, revisions and unassign them from all the dataSets and
NOT JUST THE PASSED DATASET Because in the end it will REMOVE the representation version ENTIRELY
from the System.

2- To execute the tool (pass the correct parameters to the jar file). Example:
java -jar dataset-cleaner-1.1-SNAPSHOT-jar-with-dependencies --DATASET_URL http://localhost:8080/mcs/data-providers/provider/data-sets/dataset --MCS_URL http://localhost:8080/mcs --USERNAME username --PASSWORD password

153 changes: 153 additions & 0 deletions tools/dataset-cleaner/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>ecloud-tools</artifactId>
<groupId>eu.europeana.cloud</groupId>
<version>1.1-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>dataset-cleaner</artifactId>

<name>dataset-cleaner</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>eu.europeana.cloud</groupId>
<artifactId>ecloud-common</artifactId>
<version>1.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.europeana.cloud</groupId>
<artifactId>ecloud-service-commons</artifactId>
<version>1.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>eu.europeana.cloud</groupId>
<artifactId>ecloud-service-mcs-rest-client-java</artifactId>
<version>1.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>

<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-core</artifactId>
<version>${version.powermock}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-api-mockito</artifactId>
<version>${version.powermock-mockito}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.powermock</groupId>
<artifactId>powermock-module-junit4</artifactId>
<version>${version.powermock}</version>
<scope>test</scope>
</dependency>
</dependencies>


<build>
<sourceDirectory>src/main/java</sourceDirectory>
<testSourceDirectory>src/test/java</testSourceDirectory>
<resources>
<resource>
<directory>${basedir}/multilang</directory>
</resource>
<resource>
<directory>${basedir}/src/main/resources</directory>
</resource>
</resources>

<plugins>
<!-- Bind the maven-assembly-plugin to the package phase this will create
a jar file without the storm dependencies suitable for deployment to a cluster. -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>eu.europeana.cloud.DataSetCleanerTool

</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>${maven-surefire-plugin.version}</version>
<configuration>
<argLine>@{jacocoArgLine} -XX:MaxPermSize=100m</argLine>
</configuration>
</plugin>

</plugins>
<pluginManagement>
<plugins>
<!--This plugin's configuration is used to store Eclipse m2e settings only. It has no influence on the Maven build itself.-->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>
eu.europeana.cloud.DataSetCleanerTool
</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package eu.europeana.cloud;

import eu.europeana.cloud.common.model.Representation;
import eu.europeana.cloud.jobs.VersionRemoverJob;
import eu.europeana.cloud.mcs.driver.DataSetServiceClient;
import eu.europeana.cloud.mcs.driver.RecordServiceClient;
import eu.europeana.cloud.mcs.driver.RepresentationIterator;
import eu.europeana.cloud.service.commons.urls.UrlParser;
import eu.europeana.cloud.service.commons.urls.UrlPart;
import eu.europeana.cloud.utils.CommandLineHelper;
import org.apache.commons.cli.*;
import org.apache.log4j.Logger;

import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

public class DataSetCleanerTool {
private static final String MCS_URL = "MCS_URL";
private static final String USERNAME = "USERNAME";
private static final String PASSWORD = "PASSWORD";
private static final String DATA_SET_URL = "DATASET_URL";
private static final String THREADS_COUNT = "THREADS_COUNT";
private static final String DEBUG = "DEBUG";
private static final int DEFAULT_THREADS_COUNT = 100;


private static String dataSetUrl;
private static String mcsURL;
private static String userName;
private static String password;
private static int threadsCount;
private static String providerId;
private static String dataSetName;
private static boolean debug = false;


private static final Logger LOGGER = Logger.getLogger(DataSetCleanerTool.class);
public static final int MAXIMUM_FUTURE_NUMBER = 500;

private static List<String> errorLists = new ArrayList<>();
private static long successCount = 0;


public static void main(String[] args) {
Options options = getParametersHelperOptions();
CommandLineParser parser = new DefaultParser();
CommandLine cmd;
try {
cmd = parser.parse(options, args);
setExecutionParameters(cmd);
UrlParser urlParser = new UrlParser(dataSetUrl);
if (urlParser.isUrlToDataset()) {
providerId = urlParser.getPart(UrlPart.DATA_PROVIDERS);
dataSetName = urlParser.getPart(UrlPart.DATA_SETS);
removeVersionsFromDataSet();
} else
LOGGER.error("The provided dataSet url is not formulated correctly");

} catch (ParseException exp) {
LOGGER.error(exp.getMessage());
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("Please provide those parameters to the tool:", options);

} catch (Exception e) {
LOGGER.error("An exception happened caused by: " + e.getMessage());
System.exit(1);
}
}

private static void setExecutionParameters(CommandLine cmd) {
dataSetUrl = cmd.getOptionValue(DATA_SET_URL);
mcsURL = cmd.getOptionValue(MCS_URL);
userName = cmd.getOptionValue(USERNAME);
password = cmd.getOptionValue(PASSWORD);
threadsCount = DEFAULT_THREADS_COUNT;
if (cmd.getOptionValue(THREADS_COUNT) != null)
threadsCount = Integer.parseInt(cmd.getOptionValue(THREADS_COUNT));
if ("true".equalsIgnoreCase(cmd.getOptionValue(DEBUG)))
debug = true;
}

private static void removeVersionsFromDataSet() {
ExecutorService service = Executors.newFixedThreadPool(threadsCount);

DataSetServiceClient dataSetServiceClient = new DataSetServiceClient(mcsURL, userName, password);
RecordServiceClient recordServiceClient = new RecordServiceClient(mcsURL, userName, password);
RepresentationIterator representationIterator = dataSetServiceClient.getRepresentationIterator(providerId, dataSetName);
int threadsInWorkCount = 0;
Set<Future<String>> futures = new HashSet<>(MAXIMUM_FUTURE_NUMBER);
LOGGER.info("The tool Started its Job...");

while (representationIterator.hasNext()) {
Representation representation = representationIterator.next();
Future<String> future = service.submit(new VersionRemoverJob(recordServiceClient, representation));
futures.add(future);
threadsInWorkCount++;
if (threadsInWorkCount == MAXIMUM_FUTURE_NUMBER) {
getExcisionResultAndWait(futures);
viewReport();
threadsInWorkCount = 0;
}
}

if (!futures.isEmpty())
getExcisionResultAndWait(futures);

LOGGER.info("The tool finished its Job. The final report:");
viewReport();
service.shutdown();
}

private static void getExcisionResultAndWait(Set<Future<String>> futures) {
int errorsCountInThisBatch = 0;
for (Future<String> futureItem : futures) {
try {
if (debug == true)
LOGGER.info(futureItem.get());
else
futureItem.get();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
LOGGER.error(e.getMessage());
} catch (ExecutionException e) {
errorsCountInThisBatch++;
errorLists.add(e.getMessage());
}
}
successCount += (futures.size() - errorsCountInThisBatch);
futures.clear();
}

private static void viewReport() {
LOGGER.info("You correctly Removed " + successCount + " versions From data set:" + dataSetUrl + " and encountered " + errorLists.size() + " errors");
if (!errorLists.isEmpty()) {
LOGGER.info("The detailed error report till now is: ");
for (String errorMessage : errorLists) {
LOGGER.error(errorMessage);
}
}
}


private static Options getParametersHelperOptions() {
CommandLineHelper commandLineHelper = new CommandLineHelper();
commandLineHelper.addOption(MCS_URL, "URL for mcs", true);
commandLineHelper.addOption(USERNAME, "User name", true);
commandLineHelper.addOption(PASSWORD, "Password", true);
commandLineHelper.addOption(DATA_SET_URL, "Data set URL", true);
commandLineHelper.addOption(THREADS_COUNT, "Threads count (int)(optional)(default=100)", false);
commandLineHelper.addOption(DEBUG, "Log every execution (true,false)(optional)(default=false)", false);

return commandLineHelper.getOptions();

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package eu.europeana.cloud.jobs;

import eu.europeana.cloud.common.model.Representation;
import eu.europeana.cloud.mcs.driver.RecordServiceClient;
import org.apache.log4j.Logger;

import java.util.concurrent.Callable;

/**
* Created by Tarek on 8/3/2018.
*/
public class VersionRemoverJob implements Callable<String> {
private static final int DEFAULT_RETRIES = 3;
public static final int SLEEP_TIME = 5000;

private Representation representation;
private RecordServiceClient recordServiceClient;

private static final Logger LOGGER = Logger.getLogger(VersionRemoverJob.class);


public VersionRemoverJob(RecordServiceClient recordServiceClient,
Representation representation) {

this.recordServiceClient = recordServiceClient;
this.representation = representation;
}

@Override
public String call() throws Exception {
removeVersion();
return "The version " + representation.getUri() + " was cleaned successfully";
}

/*
This method should remove all the version files, revisions and unassign it from all the dataSets and Finally Remove the representation version entirely
*/
private void removeVersion() throws Exception {
int retries = DEFAULT_RETRIES;
while (true) {
try {
recordServiceClient.deleteRepresentation(representation.getCloudId(), representation.getRepresentationName(), representation.getVersion());
break;
} catch (Exception e) {
if (--retries > 0) {
waitForSpecificTime();
} else {
throw new Exception("Error while removing representation version " + representation.getUri() + " from dataSet because of " + e.getMessage());
}
}
}
}

private void waitForSpecificTime() {
try {
Thread.sleep(SLEEP_TIME);
} catch (InterruptedException e1) {
Thread.currentThread().interrupt();
LOGGER.error(e1.getMessage());
}
}
}
Loading