Skip to content

Commit

Permalink
Conflicts resolved with rel-v7r1
Browse files Browse the repository at this point in the history
  • Loading branch information
atsareg committed Apr 28, 2021
2 parents 01f3ff4 + 3835d71 commit 867a987
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 5 deletions.
13 changes: 13 additions & 0 deletions release.notes
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ NEW: (#4910) --runslow option on unit tests to allow faster local tests
NEW: (#4938) added a helloworld test for the (yet to be implemented) cloud testing in certification
CHANGE: (#4968) Change the defaults for tests (to MySQL 8 and ES 7)

[v7r1p39]

*WMS
CHANGE: (#5121) for HTCondor, the SiteDirectory write the executable in the globally defined working directory

[v7r1p38]

FIX: fixes from v7r0p55
Expand Down Expand Up @@ -805,6 +810,14 @@ FIX: (#4551) align ProxyDB test to current changes
NEW: (#4289) Document how to run integration tests in docker
NEW: (#4551) add DNProperties description to Registry/Users subsection

[v7r0p56]

*Resources
FIX: (#5119) HTCondorCE: Limit calls to actual cleanup (find and delete files on disk) to
once per minute per SiteDirector, fixes #5118
CHANGE: (#5119) HTCondorCE cleanup: Run the DIRAC_ executable purge with -O3 and -maxdepth
1 to speed up the find

[v7r0p55]

*TS
Expand Down
26 changes: 22 additions & 4 deletions src/DIRAC/Resources/Computing/HTCondorCEComputingElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@
except ImportError:
# Python 3's subprocess module contains a compatibility layer
import subprocess as commands
import datetime
import errno
import threading

from DIRAC import S_OK, S_ERROR, gConfig
from DIRAC.Resources.Computing.ComputingElement import ComputingElement
Expand Down Expand Up @@ -164,6 +166,10 @@ class HTCondorCEComputingElement(ComputingElement):
implementing the functions jobSubmit, getJobOutput
"""

# static variables to ensure single cleanup every minute
_lastCleanupTime = datetime.datetime.utcnow()
_cleanupLock = threading.Lock()

#############################################################################
def __init__(self, ceUniqueID):
""" Standard constructor.
Expand Down Expand Up @@ -529,21 +535,33 @@ def __cleanup(self):
# FIXME: again some issue with the working directory...
# workingDirectory = self.ceParameters.get( 'WorkingDirectory', DEFAULT_WORKINGDIRECTORY )

if not HTCondorCEComputingElement._cleanupLock.acquire(False):
return

now = datetime.datetime.utcnow()
if (now - HTCondorCEComputingElement._lastCleanupTime).total_seconds() < 60:
HTCondorCEComputingElement._cleanupLock.release()
return

HTCondorCEComputingElement._lastCleanupTime = now

self.log.debug("Cleaning working directory: %s" % self.workingDirectory)

# remove all files older than 120 minutes starting with DIRAC_ Condor will
# push files on submission, but it takes at least a few seconds until this
# happens so we can't directly unlink after condor_submit
status, stdout = commands.getstatusoutput('find %s -mmin +120 -name "DIRAC_*" -delete ' % self.workingDirectory)
status, stdout = commands.getstatusoutput('find -O3 %s -maxdepth 1 -mmin +120 -name "DIRAC_*" -delete ' %
self.workingDirectory)
if status:
self.log.error("Failure during HTCondorCE __cleanup", stdout)

# remove all out/err/log files older than "DaysToKeepLogs" days in the CE part of the working Directory
workDir = os.path.join(self.workingDirectory, self.ceName)
findPars = dict(workDir=workDir, days=self.daysToKeepLogs)
# remove all out/err/log files older than "DaysToKeepLogs" days in the working directory
# not running this for each CE so we do global cleanup
findPars = dict(workDir=self.workingDirectory, days=self.daysToKeepLogs)
# remove all out/err/log files older than "DaysToKeepLogs" days
status, stdout = commands.getstatusoutput(
r'find %(workDir)s -mtime +%(days)s -type f \( -name "*.out" -o -name "*.err" -o -name "*.log" \) -delete ' %
findPars)
if status:
self.log.error("Failure during HTCondorCE __cleanup", stdout)
self._cleanupLock.release()
12 changes: 11 additions & 1 deletion src/DIRAC/WorkloadManagementSystem/Agent/SiteDirector.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,7 +1027,17 @@ def getExecutable(self, queue, proxy=None, jobExecDir='', envVariables=None,
pilotOptions = []
pilotOptions = ' '.join(pilotOptions)
self.log.verbose('pilotOptions: %s' % pilotOptions)
executable = self._writePilotScript(workingDirectory=self.workingDirectory,

# if a global workingDirectory is defined for the CEType (like HTCondor)
# use it (otherwise the __cleanup done by HTCondor will be in the wrong folder !)
# Note that this means that if you run multiple HTCondorCE
# in your machine, the executable files will be in the same place
# but it does not matter since they are very temporary

ce = self.queueCECache[queue]['CE']
workingDirectory = getattr(ce, 'workingDirectory', self.workingDirectory)

executable = self._writePilotScript(workingDirectory=workingDirectory,
pilotOptions=pilotOptions,
proxy=proxy,
pilotExecDir=jobExecDir,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,14 @@ def test__submitPilotsToQueue(mocker):
'OwnerGroup': ['lhcb_user'],
'Setup': 'LHCb-Production',
'Site': 'LCG.CERN.cern'}}}

# Create a MagicMock that does not have the workingDirectory
# attribute (https://cpython-test-docs.readthedocs.io/en/latest/library/unittest.mock.html#deleting-attributes)
# This is to use the SiteDirector's working directory, not the CE one
ceMock = MagicMock()
del ceMock.workingDirectory

sd.queueCECache = {'aQueue': {'CE': ceMock}}
sd.queueSlots = {'aQueue': {'AvailableSlots': 10}}
res = sd._submitPilotsToQueue(1, MagicMock(), 'aQueue')
assert res['OK'] is True
Expand Down

0 comments on commit 867a987

Please sign in to comment.