Skip to content

Commit 6ec1030

Browse files
authored
[tools/shoestring]: add recovery tool
problem: There is no way to recover from lock files in the data folder solution: create a docker-compose-recovery.yaml which will run the recovery tool
1 parent 6f82104 commit 6ec1030

File tree

9 files changed

+94
-30
lines changed

9 files changed

+94
-30
lines changed

tools/shoestring/README.md

+19
Original file line numberDiff line numberDiff line change
@@ -401,3 +401,22 @@ cd symbol-product-directory/tools/shoestring
401401
python3 -m pip install -r requirements.txt
402402
PYTHONPATH=. python3 -m shoestring --help
403403
```
404+
405+
# Troubleshooting
406+
407+
## Server or Broker failed to start due to lock files
408+
409+
The docker-compose-recovery.yaml file is used to recover your node when there is a `server.lock` or `broker.lock`
410+
present in the `data` folder after stopping your node.
411+
412+
```sh
413+
docker compose -f docker-compose-recovery.yaml up --abort-on-container-exit
414+
```
415+
416+
## Need to resync your node
417+
418+
If recovery failed, get the latest copy for the mainnet node data for linux OS:
419+
dual: https://catapultmainnetdata.s3.us-west-2.amazonaws.com/weekly/catapult_dual_data.tar.gz
420+
peer: https://catapultmainnetdata.s3.us-west-2.amazonaws.com/weekly/catapult_peer_data.tar.gz
421+
422+
Note: keep a copy of your node's `harvesters.dat` from the data.

tools/shoestring/shoestring/commands/upgrade.py

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ async def run_main(args):
4040
harvester_config_patches = _load_harvester_configuration_patches(config_manager)
4141

4242
(directories.output_directory / 'docker-compose.yaml').unlink()
43+
(directories.output_directory / 'docker-compose-recovery.yaml').unlink()
4344

4445
_purge_directory(directories.userconfig)
4546
_recreate_directory(directories.userconfig)

tools/shoestring/shoestring/internal/Preparer.py

+6
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,12 @@ def configure_docker(self, template_mapping):
362362
apply_template(compose_template_filename, template_mapping, compose_output_filepath)
363363
compose_output_filepath.chmod(0o400)
364364

365+
recovery_compose_output_filepath = self.directory / 'docker-compose-recovery.yaml'
366+
recovery_template_mapping = template_mapping.copy()
367+
recovery_template_mapping['recovery'] = True
368+
apply_template(compose_template_filename, recovery_template_mapping, recovery_compose_output_filepath)
369+
recovery_compose_output_filepath.chmod(0o400)
370+
365371
def prepare_linking_transaction(self, account_public_key, existing_links, timestamp):
366372
"""Creates an aggregate transaction containing account key link and unlink transactions """
367373

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
ulimit -c unlimited
6+
id -a
7+
ls -alh /seed
8+
9+
if [ ! -d /seed/00000 ]; then
10+
echo "seed directory is empty, exiting"
11+
exit 1
12+
fi
13+
14+
exec /usr/catapult/bin/catapult.recovery /userconfig

tools/shoestring/shoestring/templates/docker-compose-dual.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,11 @@ services:
3333
options:
3434
max-size: 25M
3535
max-file: '10'
36+
{%- if recovery %}
37+
command: /bin/bash /startup/wait.sh /data/startup/mongo-initialized /startup/startRecovery.sh
38+
{%- else %}
3639
command: /bin/bash /startup/wait.sh /data/startup/mongo-initialized /startup/startServer.sh
40+
{%- endif %}
3741
environment:
3842
- LD_LIBRARY_PATH=/usr/catapult/lib:/usr/catapult/deps
3943
stop_signal: SIGINT
@@ -50,6 +54,7 @@ services:
5054
- ./logs:/logs
5155
- ./keys/voting:/votingkeys
5256

57+
{%- if not recovery %}
5358
broker:
5459
image: '{{ catapult_client_image }}'
5560
user: '{{ user }}'
@@ -108,6 +113,7 @@ services:
108113
- ./https-proxy:/var/lib/https-portal
109114
- ./https-proxy/nginx.conf.erb:/var/lib/nginx-conf/{{ domainname }}.conf.erb:ro
110115
{% endif %}
116+
{% endif %}
111117
networks:
112118
default:
113119
name: catapult-node-network

tools/shoestring/shoestring/templates/docker-compose-peer.yaml

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@ services:
99
options:
1010
max-size: 25M
1111
max-file: '10'
12+
{%- if recovery %}
13+
command: /usr/catapult/bin/catapult.recovery /userconfig
14+
{%- else %}
1215
command: /usr/catapult/bin/catapult.server /userconfig
16+
{%- endif %}
1317
environment:
1418
- LD_LIBRARY_PATH=/usr/catapult/lib:/usr/catapult/deps
1519
stop_signal: SIGINT
@@ -26,7 +30,7 @@ services:
2630
- ./keys/voting:/votingkeys
2731
- ./keys/cert:/certificates
2832

29-
{%- if light_api %}
33+
{%- if light_api and not recovery %}
3034
rest-api:
3135
image: '{{ catapult_rest_image }}'
3236
user: '{{ user }}'

tools/shoestring/tests/commands/test_setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def server(event_loop, aiohttp_client):
3333
PEER_OUTPUT_FILES = {
3434
'data': 0o700,
3535
'docker-compose.yaml': 0o400,
36+
'docker-compose-recovery.yaml': 0o400,
3637
'keys': 0o700,
3738
'keys/cert': 0o700,
3839
'keys/cert/ca.crt.pem': 0o400,
@@ -89,6 +90,7 @@ def server(event_loop, aiohttp_client):
8990
'startup/delayrestapi.sh': 0o400,
9091
'startup/mongors.sh': 0o400,
9192
'startup/startBroker.sh': 0o400,
93+
'startup/startRecovery.sh': 0o400,
9294
'startup/startServer.sh': 0o400,
9395
'startup/wait.sh': 0o400,
9496
'userconfig/resources/config-database.properties': 0o400,

tools/shoestring/tests/commands/test_upgrade.py

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def server(event_loop, aiohttp_client):
3434
# region changed files
3535

3636
PEER_CHANGED_FILES = [
37+
'docker-compose-recovery.yaml',
3738
'docker-compose.yaml',
3839
'userconfig',
3940
'userconfig/resources',
@@ -72,6 +73,7 @@ def server(event_loop, aiohttp_client):
7273
'startup/delayrestapi.sh',
7374
'startup/mongors.sh',
7475
'startup/startBroker.sh',
76+
'startup/startRecovery.sh',
7577
'startup/startServer.sh',
7678
'startup/wait.sh',
7779
'userconfig/resources/config-database.properties',

tools/shoestring/tests/internal/test_Preparer.py

+39-29
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,32 @@ def test_can_generate_random_node_key_when_not_imported(self):
722722

723723
# region configure_docker
724724

725-
def _assert_can_configure_docker(self, config, expected_startup_files, expected_service_names):
725+
def _assert_can_configure_docker(self, config, expected_startup_files, expected_service_names, expected_recovery_service_names):
726+
def _assert_can_configure_service(docker_compose_filepath, expected_names):
727+
# - check compose file
728+
expected_image_map = {
729+
'db': 'mongo:7.0.16',
730+
'initiate': 'mongo:7.0.16',
731+
'client': 'symbolplatform/symbol-server:gcc-a.b.c.d',
732+
'broker': 'symbolplatform/symbol-server:gcc-a.b.c.d',
733+
'rest-api': 'symbolplatform/symbol-rest:a.b.c',
734+
'rest-api-https-proxy': 'steveltn/https-portal:1'
735+
}
736+
737+
service_names = []
738+
with open(docker_compose_filepath, 'rt', encoding='utf8') as infile:
739+
compose_config = yaml.safe_load(infile)
740+
for service_name in compose_config['services']:
741+
service = compose_config['services'][service_name]
742+
if 'rest-api-https-proxy' != service_name:
743+
self.assertEqual('2222:3333', service['user'])
744+
745+
self.assertEqual(expected_image_map[service_name], service['image'])
746+
747+
service_names.append(service_name)
748+
749+
self.assertEqual(expected_names, service_names)
750+
726751
# Arrange:
727752
with tempfile.TemporaryDirectory() as output_directory:
728753
with Preparer(output_directory, config) as preparer:
@@ -747,60 +772,45 @@ def _assert_can_configure_docker(self, config, expected_startup_files, expected_
747772
else:
748773
self.assertFalse(preparer.directories.startup.exists())
749774

750-
# - check compose file
751-
expected_image_map = {
752-
'db': 'mongo:7.0.16',
753-
'initiate': 'mongo:7.0.16',
754-
'client': 'symbolplatform/symbol-server:gcc-a.b.c.d',
755-
'broker': 'symbolplatform/symbol-server:gcc-a.b.c.d',
756-
'rest-api': 'symbolplatform/symbol-rest:a.b.c',
757-
'rest-api-https-proxy': 'steveltn/https-portal:1'
758-
}
759-
760775
compose_filepath = Path(output_directory) / 'docker-compose.yaml'
761776
assert 0o400 == compose_filepath.stat().st_mode & 0o777
777+
_assert_can_configure_service(compose_filepath, expected_service_names)
762778

763-
service_names = []
764-
with open(compose_filepath, 'rt', encoding='utf8') as infile:
765-
compose_config = yaml.safe_load(infile)
766-
for service_name in compose_config['services']:
767-
service = compose_config['services'][service_name]
768-
if 'rest-api-https-proxy' != service_name:
769-
self.assertEqual('2222:3333', service['user'])
770-
771-
self.assertEqual(expected_image_map[service_name], service['image'])
772-
773-
service_names.append(service_name)
774-
775-
self.assertEqual(expected_service_names, service_names)
779+
recovery_compose_filepath = Path(output_directory) / 'docker-compose-recovery.yaml'
780+
assert 0o400 == recovery_compose_filepath.stat().st_mode & 0o777
781+
_assert_can_configure_service(recovery_compose_filepath, expected_recovery_service_names)
776782

777783
def test_can_configure_docker_peer_node(self):
778784
config = self._create_configuration(NodeFeatures.PEER)
779-
self._assert_can_configure_docker(config, None, ['client'])
785+
self._assert_can_configure_docker(config, None, ['client'], ['client'])
780786

781787
def test_can_configure_docker_api_node_with_https(self):
782788
config = self._create_configuration(NodeFeatures.API)
783789
self._assert_can_configure_docker(config, [
784-
'delayrestapi.sh', 'mongors.sh', 'startBroker.sh', 'startServer.sh', 'wait.sh'
790+
'delayrestapi.sh', 'mongors.sh', 'startBroker.sh', 'startRecovery.sh', 'startServer.sh', 'wait.sh'
785791
], [
786792
'db', 'initiate', 'client', 'broker', 'rest-api', 'rest-api-https-proxy'
793+
], [
794+
'db', 'initiate', 'client'
787795
])
788796

789797
def test_can_configure_docker_api_node_without_https(self):
790798
config = self._create_configuration(NodeFeatures.API, api_https=False)
791799
self._assert_can_configure_docker(config, [
792-
'delayrestapi.sh', 'mongors.sh', 'startBroker.sh', 'startServer.sh', 'wait.sh'
800+
'delayrestapi.sh', 'mongors.sh', 'startBroker.sh', 'startRecovery.sh', 'startServer.sh', 'wait.sh'
793801
], [
794802
'db', 'initiate', 'client', 'broker', 'rest-api'
803+
], [
804+
'db', 'initiate', 'client'
795805
])
796806

797807
def test_can_configure_docker_api_light_node_with_https(self):
798808
config = self._create_configuration(NodeFeatures.API, light_api=True)
799-
self._assert_can_configure_docker(config, [], ['client', 'rest-api', 'rest-api-https-proxy'])
809+
self._assert_can_configure_docker(config, [], ['client', 'rest-api', 'rest-api-https-proxy'], ['client'])
800810

801811
def test_can_configure_docker_api_light_node_without_https(self):
802812
config = self._create_configuration(NodeFeatures.API, api_https=False, light_api=True)
803-
self._assert_can_configure_docker(config, [], ['client', 'rest-api'])
813+
self._assert_can_configure_docker(config, [], ['client', 'rest-api'], ['client'])
804814

805815
# endregion
806816

0 commit comments

Comments
 (0)