Skip to content

Commit 3e48e84

Browse files
authored
Integration tests with multiple configuration files (PR #49)
* Support loading specific configuration files with scrapyd-k8s command-line arguments * Integration tests with multiple scrapyd-k8s configurations * Integration test for http auth (#24) * CI test improvements
1 parent ad8f7bc commit 3e48e84

File tree

7 files changed

+189
-50
lines changed

7 files changed

+189
-50
lines changed

.github/workflows/test.yml

+75-28
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Scapyd-k8s CI
1+
name: Scrapyd-k8s CI
22
on:
33
push:
44
branches:
@@ -46,15 +46,20 @@ jobs:
4646
- name: Pull example spider
4747
run: docker pull ghcr.io/q-m/scrapyd-k8s-spider-example
4848

49-
- name: Run scrapyd-k8s
50-
run: |
51-
cp scrapyd_k8s.sample-docker.conf scrapyd_k8s.conf
52-
python -m scrapyd_k8s &
53-
while ! nc -q 1 localhost 6800 </dev/null; do sleep 1; done
54-
curl http://localhost:6800/daemonstatus.json
55-
5649
- name: Run tests
57-
run: pytest -vv --color=yes scrapyd_k8s/tests/integration/
50+
run: |
51+
for test in scrapyd_k8s/tests/integration/test_*.py; do
52+
echo; echo "# $test"
53+
# run scrapyd-k8s with test-specific configuration file
54+
cfg=`echo "$test" | sed 's/\.py$/.conf/'`
55+
python -m scrapyd_k8s -c scrapyd_k8s.sample-docker.conf -c "$cfg" &
56+
# wait for scrapyd-k8s to become ready
57+
curl -s --retry 30 --retry-delay 1 --retry-all-errors http://localhost:6800/daemonstatus.json
58+
# run test
59+
pytest -vv --color=yes "$test"
60+
# stop scrapyd-k8s again
61+
kill %1; wait %1 || true
62+
done
5863
5964
test-manifest:
6065
container:
@@ -100,17 +105,54 @@ jobs:
100105
sed -i 's/\(image:\s*\)ghcr\.io\/q-m\/scrapyd-k8s:/\1test:/' kubernetes.yaml
101106
sed -i 's/\(type:\s*\)ClusterIP/\1NodePort/' kubernetes.yaml
102107
kubectl create -f kubernetes.yaml
103-
# and wait for scrapyd-k8s to become ready
104-
kubectl wait --for=condition=Available deploy/scrapyd-k8s --timeout=60s
105-
curl --retry 10 --retry-delay 2 --retry-all-errors `minikube service scrapyd-k8s --url`/daemonstatus.json
108+
# don't start deployment just yet, as we want to run it with test-specific configuration
109+
kubectl scale --replicas=0 deploy/scrapyd-k8s
110+
# add second configuration file for test-specific configuration
111+
kubectl patch deploy scrapyd-k8s --type=json -p='[
112+
{
113+
"op": "add",
114+
"path": "/spec/template/spec/volumes/-",
115+
"value": { "configMap": { "name": "scrapyd-k8s-testcfg" }, "name": "scrapyd-k8s-testcfg" }
116+
},
117+
{
118+
"op": "add",
119+
"path": "/spec/template/spec/containers/0/volumeMounts/-",
120+
"value": { "name": "scrapyd-k8s-testcfg", "mountPath": "/opt/app/scrapyd_k8s.test.conf", "readOnly": true, "subPath": "scrapyd_k8s.test.conf" }
121+
},
122+
{
123+
"op": "replace",
124+
"path": "/spec/template/spec/containers/0/command",
125+
"value": ["python3", "-m", "scrapyd_k8s", "-c", "scrapyd_k8s.conf", "-c", "scrapyd_k8s.test.conf"]
126+
}
127+
]'
106128
107129
- name: Run tests
108130
run: |
109-
TEST_WITH_K8S=1 \
110-
TEST_BASE_URL=`minikube service scrapyd-k8s --url` \
111-
TEST_MAX_WAIT=60 \
112-
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
113-
pytest -vv --color=yes scrapyd_k8s/tests/integration/
131+
# setup for in-cluster k8s
132+
# for each integration test file
133+
for test in scrapyd_k8s/tests/integration/test_*.py; do
134+
echo; echo "# $test"
135+
# run scrapyd-k8s with test-specific configuration file
136+
cfg=`echo "$test" | sed 's/\.py$/.conf/'`
137+
kubectl create cm scrapyd-k8s-testcfg --from-file=scrapyd_k8s.test.conf="$cfg"
138+
kubectl scale --replicas=1 deploy/scrapyd-k8s
139+
# wait for scrapyd-k8s to become ready
140+
kubectl wait --for=condition=Available deploy/scrapyd-k8s --timeout=60s
141+
curl -s --retry 10 --retry-delay 2 --retry-all-errors `minikube service scrapyd-k8s --url`/daemonstatus.json
142+
# run test
143+
TEST_WITH_K8S=1 \
144+
TEST_BASE_URL=`minikube service scrapyd-k8s --url` \
145+
TEST_MAX_WAIT=60 \
146+
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
147+
pytest -vv --color=yes "$test"
148+
# delete al jobs to start with a clean slate next time
149+
kubectl delete job --all
150+
# stop scrapyd-k8s and delete test-specific configmap
151+
kubectl scale --replicas=0 deploy/scrapyd-k8s
152+
kubectl wait --for=delete pod -l app.kubernetes.io/name=scrapyd-k8s --timeout=90s
153+
kubectl delete cm scrapyd-k8s-testcfg --wait
154+
done
155+
114156
test-k8s:
115157
container:
116158
runs-on: ubuntu-latest
@@ -139,16 +181,21 @@ jobs:
139181
# already pull image so we don't have to wait for it later
140182
minikube image pull ghcr.io/q-m/scrapyd-k8s-spider-example:latest
141183
142-
- name: Run scrapyd-k8s
143-
run: |
144-
cp scrapyd_k8s.sample-k8s.conf scrapyd_k8s.conf
145-
python -m scrapyd_k8s &
146-
while ! nc -q 1 localhost 6800 </dev/null; do sleep 1; done
147-
curl http://localhost:6800/daemonstatus.json
148-
149184
- name: Run tests
150185
run: |
151-
TEST_WITH_K8S=1 \
152-
TEST_MAX_WAIT=60 \
153-
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
154-
pytest -vv --color=yes scrapyd_k8s/tests/integration/
186+
for test in scrapyd_k8s/tests/integration/test_*.py; do
187+
echo "# $test"
188+
# run scrapyd-k8s with test-specific configuration file
189+
cfg=`echo "$test" | sed 's/\.py$/.conf/'`
190+
[ -e "$cfg" ] || cfg=/dev/null
191+
python -m scrapyd_k8s -c scrapyd_k8s.sample-k8s.conf -c "$cfg" &
192+
# wait for scrapyd-k8s to become ready
193+
curl -s --retry 30 --retry-delay 1 --retry-all-errors http://localhost:6800/daemonstatus.json
194+
# run test
195+
TEST_WITH_K8S=1 \
196+
TEST_MAX_WAIT=60 \
197+
TEST_AVAILABLE_VERSIONS=latest,`skopeo list-tags docker://ghcr.io/q-m/scrapyd-k8s-spider-example | jq -r '.Tags | map(select(. != "latest" and (startswith("sha-") | not))) | join(",")'` \
198+
pytest -vv --color=yes "$test"
199+
# stop scrapyd-k8s again
200+
kill %1; wait %1 || true
201+
done

scrapyd_k8s/__main__.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,19 @@
1-
from .api import run
1+
import argparse
2+
3+
from .api import config, run
4+
5+
def argparser():
6+
parser = argparse.ArgumentParser(
7+
prog='scrapyd-k8s',
8+
description='Deploying and running spiders on container infrastructure, with the scrapyd protocol.'
9+
)
10+
parser.add_argument('-c', '--config', action='append', default=['scrapyd_k8s.conf'],
11+
help='Load configuration file (can be multiple)')
12+
return parser
213

314
if __name__ == "__main__":
15+
parser = argparser()
16+
args = parser.parse_args()
17+
config.read(args.config)
18+
419
run()

scrapyd_k8s/api.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,10 @@
55
from flask_basicauth import BasicAuth
66
from natsort import natsort_keygen, ns
77

8-
# setup logging before anything else
98
from .config import Config
10-
from .logging import setup_logging
119
config = Config()
12-
log_level = config.scrapyd().get('log_level', 'INFO')
13-
setup_logging(log_level)
1410

1511
app = Flask(__name__)
16-
repository = (config.repository_cls())(config)
17-
launcher = (config.launcher_cls())(config)
18-
scrapyd_config = config.scrapyd()
19-
2012

2113
@app.get("/")
2214
def home():
@@ -28,9 +20,9 @@ def healthz():
2820

2921
@app.get("/daemonstatus.json")
3022
def api_daemonstatus():
31-
jobs = list(launcher.listjobs())
23+
jobs = list(config.launcher().listjobs())
3224
return {
33-
"node_name": config.scrapyd().get("node_name", launcher.get_node_name()),
25+
"node_name": config.scrapyd().get("node_name", config.launcher().get_node_name()),
3426
"status": "ok",
3527
"pending": len([j for j in jobs if j['state'] == 'pending']),
3628
"running": len([j for j in jobs if j['state'] == 'running']),
@@ -55,7 +47,7 @@ def api_schedule():
5547
# any other parameter is passed as spider argument
5648
args = { k: v for k, v in request.form.items() if k not in ('project', 'spider', 'setting', 'jobid', 'priority', '_version') }
5749
env_config, env_secret = project.env_config(), project.env_secret()
58-
jobid = launcher.schedule(project, _version, spider, job_id, settings, args)
50+
jobid = config.launcher().schedule(project, _version, spider, job_id, settings, args)
5951
return { 'status': 'ok', 'jobid': job_id }
6052

6153
@app.post("/cancel.json")
@@ -67,7 +59,7 @@ def api_cancel():
6759
if not job_id:
6860
return error('job missing in form parameters', status=400)
6961
signal = request.form.get('signal', 'TERM') # TODO validate signal?
70-
prevstate = launcher.cancel(project_id, job_id, signal)
62+
prevstate = config.launcher().cancel(project_id, job_id, signal)
7163
if not prevstate:
7264
return error('job not found', status=404)
7365
return { 'status': 'ok', 'prevstate': prevstate }
@@ -84,7 +76,7 @@ def api_listversions():
8476
project = config.project(project_id)
8577
if not project:
8678
return error('project not found in configuration', status=404)
87-
tags = repository.listtags(project.repository())
79+
tags = config.repository().listtags(project.repository())
8880
tags = [t for t in tags if not t.startswith('sha-')]
8981
tags.sort(key=natsort_keygen(alg=ns.NUMAFTER))
9082
return { 'status': 'ok', 'versions': tags }
@@ -98,15 +90,15 @@ def api_listspiders():
9890
if not project:
9991
return error('project not found in configuration', status=404)
10092
_version = request.args.get('_version', 'latest') # TODO allow customizing latest tag
101-
spiders = repository.listspiders(project.repository(), project_id, _version)
93+
spiders = config.repository().listspiders(project.repository(), project_id, _version)
10294
if spiders is None:
10395
return error('project version not found in repository', status=404)
10496
return { 'status': 'ok', 'spiders': spiders }
10597

10698
@app.get("/listjobs.json")
10799
def api_listjobs():
108100
project_id = request.args.get('project')
109-
jobs = launcher.listjobs(project_id)
101+
jobs = config.launcher().listjobs(project_id)
110102
pending = [j for j in jobs if j['state'] == 'pending']
111103
running = [j for j in jobs if j['state'] == 'running']
112104
finished = [j for j in jobs if j['state'] == 'finished']
@@ -133,21 +125,29 @@ def api_delproject():
133125
def after_request(response: Response):
134126
if response.is_json:
135127
data = response.json
136-
data["node_name"] = config.scrapyd().get("node_name", launcher.get_node_name())
128+
data["node_name"] = config.scrapyd().get("node_name", config.launcher().get_node_name())
137129
response.data = jsonify(data).data
138130
return response
139131

140132
def error(msg, status=200):
141133
return { 'status': 'error', 'message': msg }, status
142134

143135
def enable_authentication(app, config_username, config_password):
144-
basic_auth = BasicAuth(app)
136+
137+
# workaround for https://github.com/jpvanhal/flask-basicauth/issues/11
138+
class BasicAuthExceptHealthz(BasicAuth):
139+
def authenticate(self):
140+
return request.path == "/healthz" or super().authenticate()
141+
142+
basic_auth = BasicAuthExceptHealthz(app)
145143
app.config["BASIC_AUTH_USERNAME"] = config_username
146144
app.config["BASIC_AUTH_PASSWORD"] = config_password
147145
app.config["BASIC_AUTH_FORCE"] = True
148146
return basic_auth
149147

150148
def run():
149+
scrapyd_config = config.scrapyd()
150+
151151
# where to listen
152152
host = scrapyd_config.get('bind_address', '127.0.0.1')
153153
port = scrapyd_config.get('http_port', '6800')

scrapyd_k8s/config.py

+25-4
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,42 @@
22
from configparser import ConfigParser
33
from importlib import import_module
44

5+
from .logging import setup_logging
6+
57
class Config:
6-
def __init__(self, file='scrapyd_k8s.conf'):
8+
def __init__(self):
79
self._config = ConfigParser(empty_lines_in_values=False)
8-
self._config.read(file)
10+
self._projects = []
11+
self._launcher = None
12+
self._repository = None
13+
14+
def read(self, files=['scrapyd_k8s.conf']):
15+
self._config.read(files)
16+
self._update()
17+
18+
def _update(self):
919
self._projects = [s[8:] for s in self._config.sections() if re.match(r'^project\.[^\.]+$', s)]
20+
setup_logging(self.scrapyd().get('log_level', 'INFO'))
1021

1122
def scrapyd(self):
1223
return self._config['scrapyd']
1324

14-
def repository_cls(self):
25+
def repository(self):
26+
if not self._repository:
27+
self._repository = (self._repository_cls())(self)
28+
return self._repository
29+
30+
def _repository_cls(self):
1531
repo = self._config['scrapyd'].get('repository', 'scrapyd_k8s.repository.Remote')
1632
pkg, cls = repo.rsplit('.', 1)
1733
return getattr(import_module(pkg), cls)
1834

19-
def launcher_cls(self):
35+
def launcher(self):
36+
if not self._launcher:
37+
self._launcher = (self._launcher_cls())(self)
38+
return self._launcher
39+
40+
def _launcher_cls(self):
2041
repo = self._config['scrapyd'].get('launcher', 'scrapyd_k8s.launcher.K8s')
2142
pkg, cls = repo.rsplit('.', 1)
2243
return getattr(import_module(pkg), cls)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# additional scrapyd-k8s configuration for test_api.py
2+
# (empty, i.e. there is no additional configuration for this test)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# additional scrapyd-k8s configuration for test_auth.py
2+
[scrapyd]
3+
username = foo
4+
password = secret
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import requests
4+
5+
BASE_URL = os.getenv('TEST_BASE_URL', 'http://localhost:6800')
6+
7+
def test_root_no_auth():
8+
response = requests.get(BASE_URL)
9+
assert response.status_code == 401
10+
assert 'scrapyd-k8s' not in response.text
11+
12+
def test_root_incorrect_auth():
13+
session = requests.Session()
14+
session.auth = ('nonexistant', 'incorrect')
15+
response = session.get(BASE_URL)
16+
assert response.status_code == 401
17+
assert 'scrapyd-k8s' not in response.text
18+
19+
def test_root_correct_auth():
20+
session = requests.Session()
21+
session.auth = ('foo', 'secret') # needs to match test_auth.conf
22+
response = session.get(BASE_URL)
23+
assert response.status_code == 200
24+
assert response.headers['Content-Type'] == 'text/html; charset=utf-8'
25+
assert 'scrapyd-k8s' in response.text
26+
assert '</html>' in response.text
27+
28+
# TODO this is going wrong now (!)
29+
#def test_healthz_ok():
30+
# response = requests.get(BASE_URL + '/healthz')
31+
# assert response.status_code == 200
32+
33+
def test_daemonstatus_no_auth():
34+
response = requests.get(BASE_URL + '/daemonstatus.json')
35+
assert response.status_code == 401
36+
37+
def test_daemonstatus_incorrect_auth():
38+
session = requests.Session()
39+
session.auth = ('nonexistant', 'incorrect')
40+
response = session.get(BASE_URL + '/daemonstatus.json')
41+
assert response.status_code == 401
42+
assert 'ok' not in response.text
43+
44+
def test_daemonstatus_correct_auth():
45+
session = requests.Session()
46+
session.auth = ('foo', 'secret') # needs to match test_auth.conf
47+
response = session.get(BASE_URL + '/daemonstatus.json')
48+
assert response.status_code == 200
49+
assert response.headers['Content-Type'] == 'application/json'
50+
assert response.json()['status'] == 'ok'

0 commit comments

Comments
 (0)