Resiliency: allow takeover to retry renaming until it works.
[slapos.git] / slapos / recipe / addresiliency / takeover.py
1 # -*- coding: utf-8 -*-
2 import logging
3 import time
4
5 import slapos
6 from slapos.slap.slap import NotFoundError
7
8 log = logging.getLogger(__name__)
9 logging.basicConfig(level=logging.DEBUG)
10
11 def takeover(server_url, key_file, cert_file, computer_guid,
12 partition_id, software_release, namebase,
13 winner_instance_suffix = None):
14 """
15 This function does
16
17 - retrieve the broken computer partition
18 - change its reference to 'broken-...' and its software type to 'frozen'
19 - retrieve the winner computer partition (attached to this process)
20 - change its reference to replace the broken one.
21 later, slapgrid will change its software_type as well.
22
23 Then, after running slapgrid-cp a few times, the winner takes over and
24 a new cp is created to replace it as an importer.
25 """
26
27 slap = slapos.slap.slap()
28 slap.initializeConnection(server_url, key_file, cert_file)
29 current_partition = slap.registerComputerPartition(computer_guid=computer_guid,
30 partition_id=partition_id)
31
32 # partition that will take over.
33 if winner_instance_suffix:
34 winner_instance_name = namebase + winner_instance_suffix
35 # XXX: we hardcode a lot of values here, because request is a settergetter, all at once.
36 cp_winner = current_partition.request(software_release=software_release,
37 software_type='%s-import' % namebase,
38 partition_reference=winner_instance_name)
39 else:
40 # This script is run in the winning partition: use this one as winner
41 cp_winner = current_partition
42 # XXX although we can already rename cp_winner, to change its software type we need to
43 # get hold of the root cp as well
44
45 cp_exporter_ref = namebase + '0' # this is ok. the boss is always number zero.
46
47 # partition to be deactivated
48 cp_broken = cp_winner.request(software_release=software_release,
49 software_type='frozen',
50 state='stopped',
51 partition_reference=cp_exporter_ref)
52
53 broken_new_ref = 'broken-{}'.format(time.strftime("%d-%b_%H:%M:%S", time.gmtime()))
54
55 log.debug("Renaming {}: {}".format(cp_broken.getId(), broken_new_ref))
56
57 cp_broken.rename(new_name=broken_new_ref)
58
59 log.debug("Renaming {}: {}".format(cp_winner.getId(), cp_exporter_ref))
60
61 # update name (and later, software type) for the partition that will take over
62 while True:
63 time.sleep(10)
64 try:
65 cp_winner.rename(new_name=cp_exporter_ref)
66 break
67 except NotFoundError:
68 log.warning('Impossible to rename. Retrying in a few seconds...')
69 log.debug('Renamed.')
70
71 cp_winner.bang(message='partitions have been renamed!')
72 # Note: Root instance will reconfigure itself the winning instance (software_type
73 # and parameters.)
74
75 def run(args):
76 slapos.recipe.addresiliency.takeover.takeover(server_url = args.pop('server_url'),
77 key_file = args.pop('key_file'),
78 cert_file = args.pop('cert_file'),
79 computer_guid = args.pop('computer_id'),
80 partition_id = args.pop('partition_id'),
81 software_release = args.pop('software'),
82 namebase = args.pop('namebase'))
83