forked from NatLibFi/RecordManager
-
Notifications
You must be signed in to change notification settings - Fork 0
/
harvest.php
113 lines (103 loc) · 4 KB
/
harvest.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
<?php
/**
* Command line interface for harvesting records
*
* PHP version 5
*
* Copyright (C) The National Library of Finland 2011-2016.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category DataManagement
* @package RecordManager
* @author Ere Maijala <[email protected]>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link https://github.com/KDK-Alli/RecordManager
*/
require_once 'cmdline.php';
/**
* Main function
*
* @param string[] $argv Program parameters
*
* @return void
* @throws Exception
*/
function main($argv)
{
$params = parseArgs($argv);
$basePath = !empty($params['basepath']) ? $params['basepath'] : __DIR__;
$config = parse_ini_file($basePath . '/conf/recordmanager.ini', true);
$config = applyConfigOverrides($params, $config);
if (empty($params['source']) || !is_string($params['source'])) {
echo <<<EOT
Usage: $argv[0] --source=... [...]
Parameters:
--source Repository id ('*' for all, separate multiple sources
with commas)
--exclude Repository id's to exclude when using '*' for source
(separate multiple sources with commas)
--from Override harvesting start date
--until Override harvesting end date
--all Harvest from beginning (overrides --from)
--verbose Enable verbose output
--override Override initial resumption token e.g. to resume harvesting after
a connection failure. For Sierra API harvesting this is the
offset to start from.
--reharvest[=date] This is a full reharvest, delete all records that were not
received during the harvesting (or were modified before [date]).
Implies --all.
--config.section.name=value
Set configuration directive to given value overriding any
setting in recordmanager.ini
--lockfile=file Use a lock file to avoid executing the command multiple times in
parallel (useful when running from crontab)
--basepath=path Use path as the base directory for conf, mappings and
transformations directories. Normally automatically determined.
EOT;
exit(1);
}
$lockfile = isset($params['lockfile']) ? $params['lockfile'] : '';
$lockhandle = false;
try {
if (($lockhandle = acquireLock($lockfile)) === false) {
die();
}
$harvest = new \RecordManager\Base\Controller\Harvest(
$basePath,
$config,
true,
isset($params['verbose']) ? $params['verbose'] : false
);
$from = isset($params['from']) ? $params['from'] : null;
if (isset($params['all']) || isset($params['reharvest'])) {
$from = '-';
}
foreach (explode(',', $params['source']) as $source) {
$harvest->launch(
$source,
$from,
isset($params['until']) ? $params['until'] : null,
isset($params['override']) ? urldecode($params['override']) : '',
isset($params['exclude']) ? $params['exclude'] : null,
isset($params['reharvest']) ? $params['reharvest'] : ''
);
}
} catch (\Exception $e) {
releaseLock($lockhandle);
throw $e;
}
releaseLock($lockhandle);
}
main($argv);