From cf8cd520f5608ac739fb464863488c99bcbdf67f Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Thu, 7 Mar 2019 14:39:09 -0800 Subject: [PATCH 001/221] Allow to decrease min_machines min_machines can be decrease if the nodes are autoscaled (that is if min_machines has been previously raised). Closes #3003. --- AppController/djinn.rb | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 2c8b59f578..c0719df216 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -1329,9 +1329,9 @@ def set_property(property_name, property_value, secret) unless is_cloud? Djinn.log_warn('min_machines is not used in non-cloud infrastructures.') end - if Integer(val) < Integer(@options['min_machines']) + unless can_we_scale_down?(Integer(val)) Djinn.log_warn('Invalid input: cannot lower min_machines!') - return 'min_machines cannot be less than the nodes defined in ips_layout' + return 'Cannot lower min_machines past non-autoscaled nodes' end elsif key == 'max_machines' unless is_cloud? @@ -2459,6 +2459,18 @@ def get_all_compute_nodes return ae_nodes end + # This method checks that nodes above index are compute only and thus + # can be easily terminated. + def can_we_scale_down?(index) + @state_change_lock.synchronize { + nodes_to_check = @nodes.drop(min_machines) + } + nodes_to_check.each { |node| + return false if node['jobs'] != ['compute'] + } + return true + end + # Gets a list of autoscaled nodes by going through the nodes array # and splitting the array from index greater than the # minimum images specified. From 6490a2d117f8e6c50d033fbfd6d7a5e99d62fb70 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 29 Mar 2019 13:56:59 +0200 Subject: [PATCH 002/221] Remove profiling, move to aiohttp --- Hermes/appscale/hermes/handlers.py | 175 +++++----- Hermes/appscale/hermes/hermes_server.py | 181 +++++++--- Hermes/appscale/hermes/portal.py | 62 ---- Hermes/appscale/hermes/profile.py | 414 ----------------------- Hermes/appscale/hermes/stats_app.py | 427 ------------------------ 5 files changed, 223 insertions(+), 1036 deletions(-) delete mode 100644 Hermes/appscale/hermes/portal.py delete mode 100644 Hermes/appscale/hermes/profile.py delete mode 100644 Hermes/appscale/hermes/stats_app.py diff --git a/Hermes/appscale/hermes/handlers.py b/Hermes/appscale/hermes/handlers.py index 2767377500..40639aec29 100644 --- a/Hermes/appscale/hermes/handlers.py +++ b/Hermes/appscale/hermes/handlers.py @@ -1,15 +1,12 @@ -import json +import http +import inspect import logging import time from datetime import datetime -from tornado import gen -from tornado.options import options -from tornado.web import RequestHandler +from aiohttp import web -from appscale.hermes.constants import ( - SECRET_HEADER, HTTP_Codes, ACCEPTABLE_STATS_AGE -) +from appscale.hermes.constants import SECRET_HEADER, ACCEPTABLE_STATS_AGE from appscale.hermes.converter import ( stats_to_dict, IncludeLists, WrongIncludeLists ) @@ -17,39 +14,74 @@ logger = logging.getLogger(__name__) -class CurrentStatsHandler(RequestHandler): +DEFAULT_INCLUDE_LISTS = IncludeLists({ + # Node stats + 'node': ['utc_timestamp', 'cpu', 'memory', + 'partitions_dict', 'loadavg'], + 'node.cpu': ['percent', 'count'], + 'node.memory': ['available', 'total'], + 'node.partition': ['free', 'used'], + 'node.loadavg': ['last_5min'], + # Processes stats + 'process': ['monit_name', 'unified_service_name', 'application_id', + 'port', 'cpu', 'memory', 'children_stats_sum'], + 'process.cpu': ['user', 'system', 'percent'], + 'process.memory': ['resident', 'virtual', 'unique'], + 'process.children_stats_sum': ['cpu', 'memory'], + # Proxies stats + 'proxy': ['name', 'unified_service_name', 'application_id', + 'frontend', 'backend', 'servers_count'], + 'proxy.frontend': ['bin', 'bout', 'scur', 'smax', 'rate', + 'req_rate', 'req_tot', 'hrsp_4xx', 'hrsp_5xx'], + 'proxy.backend': ['qcur', 'scur', 'hrsp_5xx', 'qtime', 'rtime'], + # Taskqueue service stats + 'taskqueue': ['utc_timestamp', 'current_requests', 'cumulative', 'recent', + 'instances_count', 'failures'], + 'taskqueue.instance': ['start_timestamp_ms', 'current_requests', + 'cumulative', 'recent'], + 'taskqueue.cumulative': ['total', 'failed', 'pb_reqs', 'rest_reqs'], + 'taskqueue.recent': ['total', 'failed', 'avg_latency', + 'pb_reqs', 'rest_reqs'], + # RabbitMQ stats + 'rabbitmq': ['utc_timestamp', 'disk_free_alarm', 'mem_alarm', 'name', + 'partitions'], + # Push queue stats + 'queue': ['name', 'messages'], + # Cassandra stats + 'cassandra': ['utc_timestamp', 'nodes', 'missing_nodes', 'unknown_nodes'], + # Cassandra node stats + 'cassandra.node': ['address', 'status', 'state', 'load', 'owns_pct', + 'tokens_num'], +}) + + +def verify_secret_middleware(secret): + async def verify_secret(request, handler): + if request.headers.get(SECRET_HEADER) != secret: + logger.warn("Received bad secret from {client}" + .format(client=request.remote)) + return web.Response(status=http.HTTPStatus.FORBIDDEN, + reason="Bad secret") + return await handler(request) + + return verify_secret + + +class LocalStatsHandler(object): """ Handler for getting current local stats of specific kind. """ - - def initialize(self, source, default_include_lists, cache_container): - """ Initializes RequestHandler for handling a single request. + def __init__(self, source): + """ Initializes request handler for providing current stats. Args: source: an object with method get_current. - default_include_lists: an instance of IncludeLists to use as default. - cache_container: a list containing a single element - cached snapshot. """ self._stats_source = source - self._default_include_lists = default_include_lists - self._cache_container = cache_container - - @property - def _cached_snapshot(self): - return self._cache_container[0] + self._cached_snapshot = None - @_cached_snapshot.setter - def _cached_snapshot(self, newer_snapshot): - self._cache_container[0] = newer_snapshot - - @gen.coroutine - def get(self): - if self.request.headers.get(SECRET_HEADER) != options.secret: - logger.warn("Received bad secret from {client}" - .format(client=self.request.remote_ip)) - self.set_status(HTTP_Codes.HTTP_DENIED, "Bad secret") - return - if self.request.body: - payload = json.loads(self.request.body) + async def get(self, request): + if request.has_body: + payload = await request.json() else: payload = {} include_lists = payload.get('include_lists') @@ -60,12 +92,11 @@ def get(self): include_lists = IncludeLists(include_lists) except WrongIncludeLists as err: logger.warn("Bad request from {client} ({error})" - .format(client=self.request.remote_ip, error=err)) - json.dump({'error': str(err)}, self) - self.set_status(HTTP_Codes.HTTP_BAD_REQUEST, 'Wrong include_lists') - return + .format(client=request.remote, error=err)) + return web.Response(status=http.HTTPStatus.BAD_REQUEST, + reason='Wrong include_lists', text=str(err)) else: - include_lists = self._default_include_lists + include_lists = DEFAULT_INCLUDE_LISTS snapshot = None @@ -76,42 +107,32 @@ def get(self): if self._cached_snapshot.utc_timestamp >= acceptable_time: snapshot = self._cached_snapshot logger.info("Returning cached snapshot with age {:.2f}s" - .format(now-self._cached_snapshot.utc_timestamp)) + .format(now-self._cached_snapshot.utc_timestamp)) if not snapshot: snapshot = self._stats_source.get_current() - if isinstance(snapshot, gen.Future): - snapshot = yield snapshot + if inspect.isawaitable(snapshot): + snapshot = await snapshot self._cached_snapshot = snapshot - json.dump(stats_to_dict(snapshot, include_lists), self) + return web.json_response(stats_to_dict(snapshot, include_lists)) -class CurrentClusterStatsHandler(RequestHandler): +class ClusterStatsHandler(object): """ Handler for getting current stats of specific kind. """ - - def initialize(self, source, default_include_lists, cache_container): - """ Initializes RequestHandler for handling a single request. + def __init__(self, source): + """ Initializes request handler for providing current stats. Args: source: an object with method get_current. - default_include_lists: an instance of IncludeLists to use as default. - cache_container: a dict with cached snapshots. """ - self._current_cluster_stats_source = source - self._default_include_lists = default_include_lists - self._cached_snapshots = cache_container + self._cluster_stats_source = source + self._cached_snapshots = {} - @gen.coroutine - def get(self): - if self.request.headers.get(SECRET_HEADER) != options.secret: - logger.warn("Received bad secret from {client}" - .format(client=self.request.remote_ip)) - self.set_status(HTTP_Codes.HTTP_DENIED, "Bad secret") - return - if self.request.body: - payload = json.loads(self.request.body) + async def get(self, request): + if request.has_body: + payload = await request.json() else: payload = {} include_lists = payload.get('include_lists') @@ -122,31 +143,30 @@ def get(self): include_lists = IncludeLists(include_lists) except WrongIncludeLists as err: logger.warn("Bad request from {client} ({error})" - .format(client=self.request.remote_ip, error=err)) - json.dump({'error': str(err)}, self) - self.set_status(HTTP_Codes.HTTP_BAD_REQUEST, 'Wrong include_lists') - return + .format(client=request.remote, error=err)) + return web.Response(status=http.HTTPStatus.BAD_REQUEST, + reason='Wrong include_lists', text=str(err)) else: - include_lists = self._default_include_lists + include_lists = DEFAULT_INCLUDE_LISTS newer_than = time.mktime(datetime.now().timetuple()) - max_age - if (not self._default_include_lists or - include_lists.is_subset_of(self._default_include_lists)): + if (not DEFAULT_INCLUDE_LISTS or + include_lists.is_subset_of(DEFAULT_INCLUDE_LISTS)): # If user didn't specify any non-default fields we can use local cache fresh_local_snapshots = { node_ip: snapshot - for node_ip, snapshot in self._cached_snapshots.iteritems() + for node_ip, snapshot in self._cached_snapshots.items() if max_age and snapshot.utc_timestamp > newer_than } if fresh_local_snapshots: logger.debug("Returning cluster stats with {} cached snapshots" - .format(len(fresh_local_snapshots))) + .format(len(fresh_local_snapshots))) else: fresh_local_snapshots = {} new_snapshots_dict, failures = ( - yield self._current_cluster_stats_source.get_current( + await self._cluster_stats_source.get_current( max_age=max_age, include_lists=include_lists, exclude_nodes=fresh_local_snapshots.keys() ) @@ -163,24 +183,21 @@ def get(self): for node_ip, snapshot in new_snapshots_dict.iteritems() } - json.dump({ + return web.json_response({ "stats": rendered_snapshots, "failures": failures - }, self) + }) -class Respond404Handler(RequestHandler): +def not_found(reason): """ - This class is aimed to stub unavailable route. + This function creates handler is aimed to stub unavailable route. Hermes master has some extra routes which are not available on slaves, also Hermes stats can work in lightweight or verbose mode and verbose mode has extra routes. This handlers is configured with a reason why specific resource is not available on the instance of Hermes. """ - - def initialize(self, reason): - self.reason = reason - - def get(self): - self.set_status(404, self.reason) + def handler(request): + return web.Response(status=http.HTTPStatus.NOT_FOUND, reason=reason) + return handler diff --git a/Hermes/appscale/hermes/hermes_server.py b/Hermes/appscale/hermes/hermes_server.py index 8a3d0e741a..7dafaace2c 100644 --- a/Hermes/appscale/hermes/hermes_server.py +++ b/Hermes/appscale/hermes/hermes_server.py @@ -3,55 +3,138 @@ import argparse import logging -import signal -import tornado.escape -import tornado.httpclient -import tornado.web -from appscale.common import appscale_info -from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS -from kazoo.client import KazooClient -from tornado.ioloop import IOLoop -from tornado.options import options +from aiohttp import web -from appscale.hermes import constants -from appscale.hermes import stats_app +from appscale.common import appscale_info +from appscale.common.constants import LOG_FORMAT + +from appscale.hermes import constants, handlers + +from appscale.hermes.handlers import ( + LocalStatsHandler, ClusterStatsHandler, not_found +) +from appscale.hermes.producers.cluster_stats import ( + cluster_nodes_stats, cluster_processes_stats, cluster_proxies_stats, + cluster_rabbitmq_stats, cluster_push_queues_stats, + cluster_taskqueue_stats, + cluster_cassandra_stats +) +from appscale.hermes.producers.cassandra_stats import CassandraStatsSource +from appscale.hermes.producers.node_stats import NodeStatsSource +from appscale.hermes.producers.process_stats import ProcessesStatsSource +from appscale.hermes.producers.proxy_stats import ProxiesStatsSource +from appscale.hermes.producers.rabbitmq_stats import PushQueueStatsSource +from appscale.hermes.producers.rabbitmq_stats import RabbitMQStatsSource +from appscale.hermes.producers.taskqueue_stats import TaskqueueStatsSource logger = logging.getLogger(__name__) -# A KazooClient for detecting configuration changes. -zk_client = None - - -def signal_handler(signal, frame): - """ Signal handler for graceful shutdown. """ - logger.warning("Caught signal: {0}".format(signal)) - zk_client.stop() - IOLoop.instance().add_callback(shutdown) - -def shutdown(): - """ Shuts down the server. """ - logger.warning("Hermes is shutting down.") - IOLoop.instance().stop() +def get_local_stats_api_routes(is_lb_node, is_tq_node, is_db_node): + """ Creates stats sources and API handlers for providing local + node, processes and proxies (only on LB nodes) stats. + + Args: + is_lb_node: A boolean indicating whether this node is load balancer. + is_tq_node: A boolean indicating whether this node runs taskqueue service. + is_db_node: A boolean indicating whether this node runs cassandra service. + Returns: + A list of route-handler tuples. + """ + + # Any node provides its node and processes stats + node_stats_handler = LocalStatsHandler(NodeStatsSource) + processes_stats_handler = LocalStatsHandler(ProcessesStatsSource) + if is_lb_node: + # Only LB nodes provide proxies and service stats + proxies_stats_handler = LocalStatsHandler(ProxiesStatsSource) + tq_stats_handler = LocalStatsHandler(TaskqueueStatsSource()) + else: + # Stub handler for non-LB nodes + proxies_stats_handler = not_found('Only LB nodes provides proxies stats') + tq_stats_handler = not_found('Only LB nodes provide TQ service stats') + + if is_tq_node: + # Only TQ nodes provide RabbitMQ stats. + rabbitmq_stats_handler = LocalStatsHandler(RabbitMQStatsSource) + push_queue_stats_handler = LocalStatsHandler(PushQueueStatsSource) + else: + # Stub handler for non-TQ nodes + rabbitmq_stats_handler = not_found('Only TQ nodes provide RabbitMQ stats') + push_queue_stats_handler = not_found('Only TQ nodes provide queue stats') + + if is_db_node: + # Only DB nodes provide Cassandra stats. + cassandra_stats_handler = LocalStatsHandler(CassandraStatsSource) + else: + # Stub handler for non-DB nodes + cassandra_stats_handler = not_found('Only DB nodes provide Cassandra stats') + + return [ + ('/stats/local/node', node_stats_handler), + ('/stats/local/processes', processes_stats_handler), + ('/stats/local/proxies', proxies_stats_handler), + ('/stats/local/rabbitmq', rabbitmq_stats_handler), + ('/stats/local/push_queues', push_queue_stats_handler), + ('/stats/local/taskqueue', tq_stats_handler), + ('/stats/local/cassandra', cassandra_stats_handler), + ] + + +def get_cluster_stats_api_routes(is_lb): + """ Creates stats sources and API handlers for providing cluster + node, processes and proxies stats (on master node only). + If this node is slave, it creates stub handlers for cluster stats routes. + + Args: + is_lb: A boolean indicating whether this node is load balancer. + Returns: + A list of route-handler tuples. + """ + if is_lb: + # Only LB nodes provide cluster stats + node_stats_handler = ClusterStatsHandler(cluster_nodes_stats) + processes_stats_handler = ClusterStatsHandler(cluster_processes_stats) + proxies_stats_handler = ClusterStatsHandler(cluster_proxies_stats) + taskqueue_stats_handler = ClusterStatsHandler(cluster_taskqueue_stats) + rabbitmq_stats_handler = ClusterStatsHandler(cluster_rabbitmq_stats) + push_queue_stats_handler = ClusterStatsHandler(cluster_push_queues_stats) + cassandra_stats_handler = ClusterStatsHandler(cluster_cassandra_stats) + else: + # Stub handler for slave nodes + cluster_stub_handler = not_found('Only LB nodes provide cluster stats') + node_stats_handler = cluster_stub_handler + processes_stats_handler = cluster_stub_handler + proxies_stats_handler = cluster_stub_handler + taskqueue_stats_handler = cluster_stub_handler + rabbitmq_stats_handler = cluster_stub_handler + push_queue_stats_handler = cluster_stub_handler + cassandra_stats_handler = cluster_stub_handler + + return [ + ('/stats/cluster/nodes', node_stats_handler), + ('/stats/cluster/processes', processes_stats_handler), + ('/stats/cluster/proxies', proxies_stats_handler), + ('/stats/cluster/taskqueue', taskqueue_stats_handler), + ('/stats/cluster/rabbitmq', rabbitmq_stats_handler), + ('/stats/cluster/push_queues', push_queue_stats_handler), + ('/stats/cluster/cassandra', cassandra_stats_handler), + ] def main(): """ Main. """ parser = argparse.ArgumentParser() - parser.add_argument( - '-v', '--verbose', action='store_true', - help='Output debug-level logging') + parser.add_argument('-v', '--verbose', action='store_true', + help='Output debug-level logging') + parser.add_argument('--port', type=int, default=constants.HERMES_PORT, + help='The port to listen on') args = parser.parse_args() logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) if args.verbose: - logging.getLogger().setLevel(logging.DEBUG) - - options.define('secret', appscale_info.get_secret()) - - signal.signal(signal.SIGTERM, signal_handler) - signal.signal(signal.SIGINT, signal_handler) + logging.getLogger('appscale').setLevel(logging.DEBUG) my_ip = appscale_info.get_private_ip() is_master = (my_ip == appscale_info.get_headnode_ip()) @@ -59,25 +142,15 @@ def main(): is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) - if is_master: - global zk_client - zk_client = KazooClient( - hosts=','.join(appscale_info.get_zk_node_ips()), - connection_retry=ZK_PERSISTENT_RECONNECTS) - zk_client.start() - # Start watching profiling configs in ZooKeeper - stats_app.ProfilingManager(zk_client) - - app = tornado.web.Application( - stats_app.get_local_stats_api_routes(is_lb, is_tq, is_db) - + stats_app.get_cluster_stats_api_routes(is_master), - debug=False - ) - app.listen(constants.HERMES_PORT) - - # Start loop for accepting http requests. - IOLoop.instance().start() + secret = appscale_info.get_secret() + security_middleware = handlers.verify_secret_middleware(secret) + app = web.Application(middlewares=[security_middleware]) - logger.info("Hermes is up and listening on port: {}." - .format(constants.HERMES_PORT)) + route_items = [] + route_items += get_local_stats_api_routes(is_lb, is_tq, is_db) + route_items += get_cluster_stats_api_routes(is_master) + for route, handler in route_items: + app.router.add_get(route, handlers) + logger.info("Starting Hermes on port: {}.".format(args.port)) + web.run_app(app, port=args.port, access_log=logger) diff --git a/Hermes/appscale/hermes/portal.py b/Hermes/appscale/hermes/portal.py deleted file mode 100644 index 47f5352a94..0000000000 --- a/Hermes/appscale/hermes/portal.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import logging -import urllib - -from appscale.hermes import helper, constants -from appscale.hermes.helper import JSONTags -from appscale.hermes.converter import stats_to_dict - -logger = logging.getLogger(__name__) - - -class NodeStatsPortalSender(object): - def __init__(self): - self._portal_method = '/{deployment_id}/stats/cluster/nodes' - - def send(self, nodes_stats): - deployment_id = helper.get_deployment_id() - # If the deployment is not registered, skip. - if not deployment_id: - return - - # Send request to AppScale Portal. - portal_path = self._portal_method.format(deployment_id=deployment_id) - url = "{0}{1}".format(constants.PORTAL_URL, portal_path) - data = { - 'deployment_id': deployment_id, - 'nodes_stats': json.dumps({ - node_ip: [stats_to_dict(snapshot) for snapshot in snapshots] - for node_ip, snapshots in nodes_stats.iteritems() - }) - } - snapshots_num = sum(len(snapshots) for snapshots in nodes_stats.values()) - logger.debug( - "Sending {snapshots} node stats snapshots about {nodes} nodes to the " - "AppScale Portal".format(snapshots=snapshots_num, nodes=len(nodes_stats)) - ) - - request = helper.create_request(url=url, method='POST', - body=urllib.urlencode(data)) - response = helper.urlfetch(request) - - if not response[JSONTags.SUCCESS]: - logger.error("Inaccessible resource: {}".format(url)) - return - - -class ProcessesStatsPortalSender(object): - def __init__(self): - self._portal_method = '/{deployment_id}/stats/cluster/processes' - - def send(self, processes_stats): - # TODO - pass - - -class ProxiesStatsPortalSender(object): - def __init__(self): - self._portal_method = '/{deployment_id}/stats/cluster/proxies' - - def send(self, proxies_stats): - # TODO - pass diff --git a/Hermes/appscale/hermes/profile.py b/Hermes/appscale/hermes/profile.py deleted file mode 100644 index ef8ef1f8f9..0000000000 --- a/Hermes/appscale/hermes/profile.py +++ /dev/null @@ -1,414 +0,0 @@ -""" This module is responsible for writing cluster statistics to CSV files. """ -import collections -import csv -import time -from datetime import datetime -from os import path, rename - -import attr - -from appscale.hermes import helper -from appscale.hermes import converter -from appscale.hermes.constants import PROFILE_LOG_DIR -from appscale.hermes.producers import node_stats, process_stats, \ - proxy_stats - - -class NodesProfileLog(object): - - def __init__(self, include_lists=None): - """ Initializes profile log for cluster node stats. - Renders header according to include_lists in advance and - creates base directory for node stats profile log. - - Args: - include_lists: An instance of IncludeLists describing which fields - of node stats should be written to CSV log. - """ - self._include_lists = include_lists - self._header = ( - converter.get_stats_header(node_stats.NodeStatsSnapshot, - self._include_lists) - ) - helper.ensure_directory(PROFILE_LOG_DIR) - - def write(self, nodes_stats_dict): - """ Saves newly produced cluster node stats - to a list of CSV files (file per node). - - Args: - nodes_stats_dict: A dict with node IP as key and list of - NodeStatsSnapshot as value. - """ - for node_ip, snapshot in nodes_stats_dict.iteritems(): - with self._prepare_file(node_ip) as csv_file: - row = converter.stats_to_list(snapshot, self._include_lists) - csv.writer(csv_file).writerow(row) - - def _prepare_file(self, node_ip): - """ Prepares CSV file with name node/.csv - for appending new lines. - - Args: - node_ip: A string representation of node IP. - Returns: - A file object opened for appending new data. - """ - node_dir = path.join(PROFILE_LOG_DIR, node_ip) - file_name = path.join(node_dir, 'node.csv') - if not path.isfile(file_name): - helper.ensure_directory(node_dir) - # Create file and write header - with open(file_name, 'w') as csv_file: - csv.writer(csv_file).writerow(self._header) - # Open table for appending data - return open(file_name, 'a') - - -class ProcessesProfileLog(object): - - @attr.s(cmp=False, hash=False, slots=True) - class ServiceProcessesSummary(object): - """ - This data structure is a service summary accumulator. - When new stats are received, ServiceProcessesSummary is created - for each service and then cpu time and memory usage of each process - running this service is added to the summary. - Separate CSV summary file is created for each attribute of this model, - so we can compare services regarding usage of the specific resource. - """ - cpu_time = attr.ib(default=0) - cpu_percent = attr.ib(default=0.0) - resident_mem = attr.ib(default=0) - unique_mem = attr.ib(default=0) - children_resident_mem = attr.ib(default=0) - children_unique_mem = attr.ib(default=0) - instances = attr.ib(default=0) - - def __init__(self, include_lists=None): - """ Initializes profile log for cluster processes stats. - Renders header according to include_lists in advance and - creates base directory for processes stats profile log. - It also reads header of summary file (if it exists) to identify - order of columns. - - Args: - include_lists: An instance of IncludeLists describing which fields - of processes stats should be written to CSV log. - """ - self._include_lists = include_lists - self._header = ( - ['utc_timestamp'] - + converter.get_stats_header(process_stats.ProcessStats, - self._include_lists) - ) - self.write_detailed_stats = False - helper.ensure_directory(PROFILE_LOG_DIR) - self._summary_file_name_template = 'summary-{resource}.csv' - self._summary_columns = self._get_summary_columns() - - def write(self, processes_stats_dict): - """ Saves newly produced cluster processes stats to a list of CSV files. - One detailed file for each process on every node and 3 summary files. - - Args: - processes_stats_dict: A dict with node IP as key and list of - ProcessesStatsSnapshot as value. - """ - services_summary = collections.defaultdict(self.ServiceProcessesSummary) - - for node_ip, snapshot in processes_stats_dict.iteritems(): - - # Add info to the summary - for proc in snapshot.processes_stats: - # Add this process stats to service summary - service_name = proc.unified_service_name - if proc.application_id: - service_name = '{}-{}'.format(service_name, proc.application_id) - summary = services_summary[service_name] - summary.cpu_time += ( - proc.cpu.system + proc.cpu.user - + proc.children_stats_sum.cpu.system - + proc.children_stats_sum.cpu.user - ) - summary.cpu_percent += ( - proc.cpu.percent + proc.children_stats_sum.cpu.percent - ) - summary.resident_mem += proc.memory.resident - summary.unique_mem += proc.memory.unique - summary.children_resident_mem += proc.children_stats_sum.memory.resident - summary.children_unique_mem += proc.children_stats_sum.memory.unique - summary.instances += 1 - - if not self.write_detailed_stats: - continue - - # Write detailed process stats - for proc in snapshot.processes_stats: - # Write stats of the specific process to its CSV file - with self._prepare_file(node_ip, proc.monit_name) as csv_file: - row = ( - [snapshot.utc_timestamp] - + converter.stats_to_list(proc, self._include_lists) - ) - csv.writer(csv_file).writerow(row) - - # Update self._summary_columns ordered dict (set) - for service_name in services_summary: - if service_name not in self._summary_columns: - self._summary_columns.append(service_name) - - # Write summary - self._save_summary(services_summary) - - def _prepare_file(self, node_ip, monit_name): - """ Prepares CSV file with name processes//.csv - for appending new lines. - - Args: - node_ip: A string representation of node IP. - monit_name: A string name of process as it's shown in monit status. - Returns: - A file object opened for appending new data. - """ - processes_dir = path.join(PROFILE_LOG_DIR, node_ip, 'processes') - file_name = path.join(processes_dir, '{}.csv'.format(monit_name)) - if not path.isfile(file_name): - helper.ensure_directory(processes_dir) - # Create file and write header - with open(file_name, 'w') as csv_file: - csv.writer(csv_file).writerow(self._header) - # Open file for appending new data - return open(file_name, 'a') - - def _get_summary_columns(self): - """ Opens summary-cpu-time.csv file (other summary file would be fine) - and reads its header. Profiler needs to know order of columns previously - written to the summary. - - Returns: - A list of column names: ['utc_timestamp', , , ..]. - """ - cpu_summary_file_name = self._get_summary_file_name('cpu_time') - if not path.isfile(cpu_summary_file_name): - return ['utc_timestamp'] - with open(cpu_summary_file_name, 'r') as summary_file: - reader = csv.reader(summary_file) - return reader.next() # First line is a header - - def _save_summary(self, services_summary): - """ Saves services summary for each resource (cpu, resident memory and - unique memory). Output is 3 files (one for each resource) which - have a column for each service + utc_timestamp column. - - Args: - services_summary: A dict where key is name of service and value is - an instance of ServiceProcessesSummary. - """ - old_summary_columns = self._get_summary_columns() - - for attribute in attr.fields(self.ServiceProcessesSummary): - # For each kind of resource (cpu, resident_mem, unique_mem) - - summary_file_name = self._get_summary_file_name(attribute.name) - - if len(old_summary_columns) == 1: - # Summary wasn't written yet - write header line to summary file - with open(summary_file_name, 'w') as new_summary: - csv.writer(new_summary).writerow(self._summary_columns) - - if len(old_summary_columns) < len(self._summary_columns): - # Header need to be updated - add new services columns - with open(summary_file_name, 'r') as old_summary: - old_summary.readline() # Skip header - new_summary_file_name = '{}.new'.format(summary_file_name) - with open(new_summary_file_name, 'w') as new_summary: - # Write new header - csv.writer(new_summary).writerow(self._summary_columns) - # Recover old data - new_summary.writelines(old_summary) - rename(new_summary_file_name, summary_file_name) - - with open(summary_file_name, 'a') as summary_file: - # Append line with the newest summary - row = [time.mktime(datetime.now().timetuple())] - columns_iterator = self._summary_columns.__iter__() - columns_iterator.next() # Skip timestamp column - for service_name in columns_iterator: - service_summary = services_summary.get(service_name) - if service_summary: - row.append(getattr(service_summary, attribute.name)) - else: - row.append('') - csv.writer(summary_file).writerow(row) - - def _get_summary_file_name(self, resource_name): - name = self._summary_file_name_template.format(resource=resource_name) - name = name.replace('_', '-') - return path.join(PROFILE_LOG_DIR, name) - - -class ProxiesProfileLog(object): - - @attr.s(cmp=False, hash=False, slots=True) - class ServiceProxySummary(object): - """ - This data structure holds a list of useful proxy stats attributes. - Separate CSV summary file is created for each attribute of this model, - so we can easily compare services regarding important properties. - """ - requests_rate = attr.ib(default=0) - bytes_in_out = attr.ib(default=0) - errors = attr.ib(default=0) - - def __init__(self, include_lists=None): - """ Initializes profile log for cluster processes stats. - Renders header according to include_lists in advance and - creates base directory for processes stats profile log. - It also reads header of summary file (if it exists) to identify - order of columns. - - Args: - include_lists: An instance of IncludeLists describing which fields - of processes stats should be written to CSV log. - """ - self._include_lists = include_lists - self._header = ( - ['utc_timestamp'] - + converter.get_stats_header(proxy_stats.ProxyStats, self._include_lists) - ) - self.write_detailed_stats = False - helper.ensure_directory(PROFILE_LOG_DIR) - self._summary_file_name_template = 'summary-{property}.csv' - self._summary_columns = self._get_summary_columns() - - def write(self, proxies_stats_dict): - """ Saves newly produced cluster proxies stats to a list of CSV files. - One detailed file for each proxy on every load balancer node - (if detailed stats is enabled) and three additional files - which summarize info about all cluster proxies. - - Args: - proxies_stats_dict: A dict with node IP as key and list of - ProxyStatsSnapshot as value. - """ - services_summary = collections.defaultdict(self.ServiceProxySummary) - - for node_ip, snapshot in proxies_stats_dict.iteritems(): - - # Add info to the summary - for proxy in snapshot.proxies_stats: - # Add this proxy stats to service summary - service_name = proxy.unified_service_name - if proxy.application_id: - service_name = '{}-{}'.format(service_name, proxy.application_id) - summary = services_summary[service_name] - summary.requests_rate += proxy.frontend.req_rate - summary.bytes_in_out += proxy.frontend.bin + proxy.frontend.bout - summary.errors += proxy.frontend.hrsp_4xx + proxy.frontend.hrsp_5xx - - if not self.write_detailed_stats: - continue - - # Write detailed proxy stats - for proxy in snapshot.proxies_stats: - # Write stats of the specific proxy to its CSV file - with self._prepare_file(node_ip, proxy.name) as csv_file: - row = ( - [snapshot.utc_timestamp] - + converter.stats_to_list(proxy, self._include_lists) - ) - csv.writer(csv_file).writerow(row) - - # Update self._summary_columns list - for service_name in services_summary: - if service_name not in self._summary_columns: - self._summary_columns.append(service_name) - - # Write summary - self._save_summary(services_summary) - - def _prepare_file(self, node_ip, pxname): - """ Prepares CSV file with name /.csv - for appending new lines. - - Args: - node_ip: A string representation of load balancer node IP. - pxname: A string name of proxy as it's shown haproxy stats. - Returns: - A file object opened for appending new data. - """ - proxies_dir = path.join(PROFILE_LOG_DIR, node_ip, 'proxies') - file_name = path.join(proxies_dir, '{}.csv'.format(pxname)) - if not path.isfile(file_name): - helper.ensure_directory(proxies_dir) - # Create file and write header - with open(file_name, 'w') as csv_file: - csv.writer(csv_file).writerow(self._header) - # Open file for appending new data - return open(file_name, 'a') - - def _get_summary_columns(self): - """ Opens summary file and reads its header. - Profiler needs to know order of columns previously written to the summary. - - Returns: - A list of column names: ['utc_timestamp', , , ..]. - """ - reqs_summary_file_name = self._get_summary_file_name('requests_rate') - if not path.isfile(reqs_summary_file_name): - return ['utc_timestamp'] - with open(reqs_summary_file_name, 'r') as summary_file: - reader = csv.reader(summary_file) - return reader.next() # First line is a header - - def _save_summary(self, services_summary): - """ Saves services summary for each property (requests rate, errors and - sum of bytes in & out). Output is 3 files (one for each property) which - have a column for each service + utc_timestamp column. - - Args: - services_summary: A dict where key is name of service and value is - an instance of ServiceProxySummary. - """ - old_summary_columns = self._get_summary_columns() - - for attribute in attr.fields(self.ServiceProxySummary): - # For each property (requests_rate, errors, bytes_in_out) - - summary_file_name = self._get_summary_file_name(attribute.name) - - if len(old_summary_columns) == 1: - # Summary wasn't written yet - write header line to summary file - with open(summary_file_name, 'w') as new_summary: - csv.writer(new_summary).writerow(self._summary_columns) - - if len(old_summary_columns) < len(self._summary_columns): - # Header need to be updated - add new services columns - with open(summary_file_name, 'r') as old_summary: - old_summary.readline() # Skip header - new_summary_file_name = '{}.new'.format(summary_file_name) - with open(new_summary_file_name, 'w') as new_summary: - # Write new header - csv.writer(new_summary).writerow(self._summary_columns) - # Recover old data - new_summary.writelines(old_summary) - rename(new_summary_file_name, summary_file_name) - - with open(summary_file_name, 'a') as summary_file: - # Append line with the newest summary - row = [time.mktime(datetime.now().timetuple())] - columns_iterator = self._summary_columns.__iter__() - columns_iterator.next() # Skip timestamp column - for service_name in columns_iterator: - service_summary = services_summary.get(service_name) - if service_summary: - row.append(getattr(service_summary, attribute.name)) - else: - row.append('') - csv.writer(summary_file).writerow(row) - - def _get_summary_file_name(self, property_name): - name = self._summary_file_name_template.format(property=property_name) - name = name.replace('_', '-') - return path.join(PROFILE_LOG_DIR, name) diff --git a/Hermes/appscale/hermes/stats_app.py b/Hermes/appscale/hermes/stats_app.py deleted file mode 100644 index bc2767a0eb..0000000000 --- a/Hermes/appscale/hermes/stats_app.py +++ /dev/null @@ -1,427 +0,0 @@ -""" Module responsible for configuring Stats API and stats profiling. """ -import json - -import attr -import logging - -from tornado.ioloop import PeriodicCallback, IOLoop - -from appscale.hermes.handlers import Respond404Handler -from appscale.hermes.constants import ( - NODES_STATS_CONFIGS_NODE, - PROCESSES_STATS_CONFIGS_NODE, - PROXIES_STATS_CONFIGS_NODE -) -from appscale.hermes.producers.taskqueue_stats import TaskqueueStatsSource -from appscale.hermes.profile import ( - NodesProfileLog, ProcessesProfileLog, ProxiesProfileLog -) -from appscale.hermes.converter import IncludeLists -from appscale.hermes.handlers import ( - CurrentStatsHandler, CurrentClusterStatsHandler -) -from appscale.hermes.producers.cluster_stats import ( - cluster_nodes_stats, cluster_processes_stats, cluster_proxies_stats, - cluster_rabbitmq_stats, cluster_push_queues_stats, - cluster_taskqueue_stats, - cluster_cassandra_stats -) -from appscale.hermes.producers.cassandra_stats import CassandraStatsSource -from appscale.hermes.producers.node_stats import NodeStatsSource -from appscale.hermes.producers.process_stats import ProcessesStatsSource -from appscale.hermes.producers.proxy_stats import ProxiesStatsSource -from appscale.hermes.producers.rabbitmq_stats import PushQueueStatsSource -from appscale.hermes.producers.rabbitmq_stats import RabbitMQStatsSource - -logger = logging.getLogger(__name__) - - -DEFAULT_INCLUDE_LISTS = IncludeLists({ - # Node stats - 'node': ['utc_timestamp', 'cpu', 'memory', - 'partitions_dict', 'loadavg'], - 'node.cpu': ['percent', 'count'], - 'node.memory': ['available', 'total'], - 'node.partition': ['free', 'used'], - 'node.loadavg': ['last_5min'], - # Processes stats - 'process': ['monit_name', 'unified_service_name', 'application_id', - 'port', 'cpu', 'memory', 'children_stats_sum'], - 'process.cpu': ['user', 'system', 'percent'], - 'process.memory': ['resident', 'virtual', 'unique'], - 'process.children_stats_sum': ['cpu', 'memory'], - # Proxies stats - 'proxy': ['name', 'unified_service_name', 'application_id', - 'frontend', 'backend', 'servers_count'], - 'proxy.frontend': ['bin', 'bout', 'scur', 'smax', 'rate', - 'req_rate', 'req_tot', 'hrsp_4xx', 'hrsp_5xx'], - 'proxy.backend': ['qcur', 'scur', 'hrsp_5xx', 'qtime', 'rtime'], - # Taskqueue service stats - 'taskqueue': ['utc_timestamp', 'current_requests', 'cumulative', 'recent', - 'instances_count', 'failures'], - 'taskqueue.instance': ['start_timestamp_ms', 'current_requests', - 'cumulative', 'recent'], - 'taskqueue.cumulative': ['total', 'failed', 'pb_reqs', 'rest_reqs'], - 'taskqueue.recent': ['total', 'failed', 'avg_latency', - 'pb_reqs', 'rest_reqs'], - # RabbitMQ stats - 'rabbitmq': ['utc_timestamp', 'disk_free_alarm', 'mem_alarm', 'name', - 'partitions'], - # Push queue stats - 'queue': ['name', 'messages'], - # Cassandra stats - 'cassandra': ['utc_timestamp', 'nodes', 'missing_nodes', 'unknown_nodes'], - # Cassandra node stats - 'cassandra.node': ['address', 'status', 'state', 'load', 'owns_pct', - 'tokens_num'], -}) - - -@attr.s -class HandlerInfo(object): - """ Container for handler information. """ - handler_class = attr.ib() - init_kwargs = attr.ib() - - -def get_local_stats_api_routes(is_lb_node, is_tq_node, is_db_node): - """ Creates stats sources and API handlers for providing local - node, processes and proxies (only on LB nodes) stats. - - Args: - is_lb_node: A boolean indicating whether this node is load balancer. - is_tq_node: A boolean indicating whether this node runs taskqueue service. - is_db_node: A boolean indicating whether this node runs cassandra service. - Returns: - A list of route-handler tuples. - """ - - # Any node provides its node and processes stats - local_node_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': NodeStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]}) - local_processes_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': ProcessesStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]}) - - if is_lb_node: - # Only LB nodes provide proxies and service stats - local_proxies_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': ProxiesStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]} - ) - local_taskqueue_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': TaskqueueStatsSource(), - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]} - ) - else: - # Stub handler for non-LB nodes - local_proxies_stats_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only LB nodes provides proxies stats'} - ) - local_taskqueue_stats_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only LB nodes provide taskqueue service stats'} - ) - - if is_tq_node: - # Only TQ nodes provide RabbitMQ stats. - local_rabbitmq_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': RabbitMQStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]} - ) - local_push_queue_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': PushQueueStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': [None]} - ) - else: - # Stub handler for non-TQ nodes - local_rabbitmq_stats_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only TQ nodes provide RabbitMQ stats'} - ) - local_push_queue_stats_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only TQ nodes provide push queue stats'} - ) - - if is_db_node: - # Only DB nodes provide Cassandra stats. - local_cassandra_stats_handler = HandlerInfo( - handler_class=CurrentStatsHandler, - init_kwargs={'source': CassandraStatsSource, - 'default_include_lists': DEFAULT_INCLUDE_LISTS} - ) - else: - # Stub handler for non-DB nodes - local_cassandra_stats_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only DB nodes provide Cassandra stats'} - ) - - routes = { - '/stats/local/node': local_node_stats_handler, - '/stats/local/processes': local_processes_stats_handler, - '/stats/local/proxies': local_proxies_stats_handler, - '/stats/local/rabbitmq': local_rabbitmq_stats_handler, - '/stats/local/push_queues': local_push_queue_stats_handler, - '/stats/local/taskqueue': local_taskqueue_stats_handler, - '/stats/local/cassandra': local_cassandra_stats_handler, - } - return [ - (route, handler.handler_class, handler.init_kwargs) - for route, handler in routes.iteritems() - ] - - -def get_cluster_stats_api_routes(is_lb): - """ Creates stats sources and API handlers for providing cluster - node, processes and proxies stats (on master node only). - If this node is slave, it creates stub handlers for cluster stats routes. - - Args: - is_lb: A boolean indicating whether this node is load balancer. - Returns: - A list of route-handler tuples. - """ - if is_lb: - # Only LB nodes provide cluster stats - cluster_node_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_nodes_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_processes_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_processes_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_proxies_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_proxies_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_taskqueue_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_taskqueue_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_rabbitmq_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_rabbitmq_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_push_queue_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_push_queues_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS, - 'cache_container': {}} - ) - cluster_cassandra_stats_handler = HandlerInfo( - handler_class=CurrentClusterStatsHandler, - init_kwargs={'source': cluster_cassandra_stats, - 'default_include_lists': DEFAULT_INCLUDE_LISTS} - ) - else: - # Stub handler for slave nodes - cluster_stub_handler = HandlerInfo( - handler_class=Respond404Handler, - init_kwargs={'reason': 'Only LB nodes provide cluster stats'} - ) - cluster_node_stats_handler = cluster_stub_handler - cluster_processes_stats_handler = cluster_stub_handler - cluster_proxies_stats_handler = cluster_stub_handler - cluster_taskqueue_stats_handler = cluster_stub_handler - cluster_rabbitmq_stats_handler = cluster_stub_handler - cluster_push_queue_stats_handler = cluster_stub_handler - cluster_cassandra_stats_handler = cluster_stub_handler - - routes = { - '/stats/cluster/nodes': cluster_node_stats_handler, - '/stats/cluster/processes': cluster_processes_stats_handler, - '/stats/cluster/proxies': cluster_proxies_stats_handler, - '/stats/cluster/taskqueue': cluster_taskqueue_stats_handler, - '/stats/cluster/rabbitmq': cluster_rabbitmq_stats_handler, - '/stats/cluster/push_queues': cluster_push_queue_stats_handler, - '/stats/cluster/cassandra': cluster_cassandra_stats_handler, - } - return [ - (route, handler.handler_class, handler.init_kwargs) - for route, handler in routes.iteritems() - ] - - -class ProfilingManager(object): - """ - This manager watches stats profiling configs in Zookeeper, - when configs are changed it starts/stops/restarts periodical - tasks which writes profile log with proper parameters. - """ - - def __init__(self, zk_client): - """ Initializes instance of ProfilingManager. - Starts watching profiling configs in zookeeper. - - Args: - zk_client: an instance of KazooClient - started zookeeper client. - """ - self.nodes_profile_log = None - self.processes_profile_log = None - self.proxies_profile_log = None - self.nodes_profile_task = None - self.processes_profile_task = None - self.proxies_profile_task = None - - def bridge_to_ioloop(update_function): - """ Creates function which schedule execution of update_function - inside current IOLoop. - - Args: - update_function: a function to execute in IOLoop. - Returns: - A callable which schedules execution of update_function inside IOLoop. - """ - def update_in_ioloop(new_conf, znode_stat): - IOLoop.current().add_callback(update_function, new_conf, znode_stat) - return update_in_ioloop - - zk_client.DataWatch(NODES_STATS_CONFIGS_NODE, - bridge_to_ioloop(self.update_nodes_profiling_conf)) - zk_client.DataWatch(PROCESSES_STATS_CONFIGS_NODE, - bridge_to_ioloop(self.update_processes_profiling_conf)) - zk_client.DataWatch(PROXIES_STATS_CONFIGS_NODE, - bridge_to_ioloop(self.update_proxies_profiling_conf)) - - def update_nodes_profiling_conf(self, new_conf, znode_stat): - """ Handles new value of nodes profiling configs and - starts/stops profiling with proper parameters. - - Args: - new_conf: a string representing new value of zookeeper node. - znode_stat: an instance if ZnodeStat. - """ - if not new_conf: - logger.debug("No node stats profiling configs are specified yet") - return - logger.info("New nodes stats profiling configs: {}".format(new_conf)) - conf = json.loads(new_conf) - enabled = conf["enabled"] - interval = conf["interval"] - if enabled: - if not self.nodes_profile_log: - self.nodes_profile_log = NodesProfileLog(DEFAULT_INCLUDE_LISTS) - if self.nodes_profile_task: - self.nodes_profile_task.stop() - self.nodes_profile_task = _configure_profiling( - stats_source=cluster_nodes_stats, - profiler=self.nodes_profile_log, - interval=interval - ) - self.nodes_profile_task.start() - elif self.nodes_profile_task: - self.nodes_profile_task.stop() - self.nodes_profile_task = None - - def update_processes_profiling_conf(self, new_conf, znode_stat): - """ Handles new value of processes profiling configs and - starts/stops profiling with proper parameters. - - Args: - new_conf: a string representing new value of zookeeper node. - znode_stat: an instance if ZnodeStat. - """ - if not new_conf: - logger.debug("No processes stats profiling configs are specified yet") - return - logger.info("New processes stats profiling configs: {}".format(new_conf)) - conf = json.loads(new_conf) - enabled = conf["enabled"] - interval = conf["interval"] - detailed = conf["detailed"] - if enabled: - if not self.processes_profile_log: - self.processes_profile_log = ProcessesProfileLog(DEFAULT_INCLUDE_LISTS) - self.processes_profile_log.write_detailed_stats = detailed - if self.processes_profile_task: - self.processes_profile_task.stop() - self.processes_profile_task = _configure_profiling( - stats_source=cluster_processes_stats, - profiler=self.processes_profile_log, - interval=interval - ) - self.processes_profile_task.start() - elif self.processes_profile_task: - self.processes_profile_task.stop() - self.processes_profile_task = None - - def update_proxies_profiling_conf(self, new_conf, znode_stat): - """ Handles new value of proxies profiling configs and - starts/stops profiling with proper parameters. - - Args: - new_conf: a string representing new value of zookeeper node. - znode_stat: an instance if ZnodeStat. - """ - if not new_conf: - logger.debug("No proxies stats profiling configs are specified yet") - return - logger.info("New proxies stats profiling configs: {}".format(new_conf)) - conf = json.loads(new_conf) - enabled = conf["enabled"] - interval = conf["interval"] - detailed = conf["detailed"] - if enabled: - if not self.proxies_profile_log: - self.proxies_profile_log = ProxiesProfileLog(DEFAULT_INCLUDE_LISTS) - self.proxies_profile_log.write_detailed_stats = detailed - if self.proxies_profile_task: - self.proxies_profile_task.stop() - self.proxies_profile_task = _configure_profiling( - stats_source=cluster_proxies_stats, - profiler=self.proxies_profile_log, - interval=interval - ) - self.proxies_profile_task.start() - elif self.proxies_profile_task: - self.proxies_profile_task.stop() - self.proxies_profile_task = None - - -def _configure_profiling(stats_source, profiler, interval): - - def write_stats_callback(future_stats): - """ Gets stats from already finished future wrapper - and calls profiler to write the stats. - - Args: - future_stats: A Future wrapper for the cluster stats. - """ - stats = future_stats.result()[0] # result is a tuple (stats, failures) - profiler.write(stats) - - def profiling_periodical_callback(): - """ Triggers asynchronous stats collection and schedules writing - of the cluster stats (when it's collected) to the stats profile. - """ - future_stats = stats_source.get_current(max_age=0) - IOLoop.current().add_future(future_stats, write_stats_callback) - - return PeriodicCallback(profiling_periodical_callback, interval*1000) From 6ff1cc59c9ff768ba2d8ba621fec663b43f0169a Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 29 Mar 2019 19:58:53 +0200 Subject: [PATCH 003/221] Replace tornado usages with asyncio/aiohttp --- Hermes/appscale/hermes/constants.py | 4 +- Hermes/appscale/hermes/handlers.py | 18 +-- Hermes/appscale/hermes/hermes_server.py | 4 +- .../hermes/producers/cassandra_stats.py | 60 +++++--- .../hermes/producers/cluster_stats.py | 142 ++++++++---------- .../hermes/producers/process_stats.py | 8 +- .../appscale/hermes/producers/proxy_stats.py | 40 ++--- .../hermes/producers/rabbitmq_stats.py | 57 ++++--- .../hermes/producers/taskqueue_stats.py | 56 +++---- 9 files changed, 189 insertions(+), 200 deletions(-) diff --git a/Hermes/appscale/hermes/constants.py b/Hermes/appscale/hermes/constants.py index adce8f3588..a09e9bc9ec 100644 --- a/Hermes/appscale/hermes/constants.py +++ b/Hermes/appscale/hermes/constants.py @@ -8,8 +8,8 @@ # Path to dictionary to write profile log PROFILE_LOG_DIR = '/var/log/appscale/profile' -# The amount of time to wait for local stats from a slave node. -STATS_REQUEST_TIMEOUT = 60 +# The amount of time to wait for remote http requests. +REMOTE_REQUEST_TIMEOUT = 60 # Stats which were produce less than X seconds ago is considered as current ACCEPTABLE_STATS_AGE = 10 diff --git a/Hermes/appscale/hermes/handlers.py b/Hermes/appscale/hermes/handlers.py index 40639aec29..cbadc446fc 100644 --- a/Hermes/appscale/hermes/handlers.py +++ b/Hermes/appscale/hermes/handlers.py @@ -6,6 +6,7 @@ from aiohttp import web +from appscale.common import appscale_info from appscale.hermes.constants import SECRET_HEADER, ACCEPTABLE_STATS_AGE from appscale.hermes.converter import ( stats_to_dict, IncludeLists, WrongIncludeLists @@ -55,16 +56,13 @@ }) -def verify_secret_middleware(secret): - async def verify_secret(request, handler): - if request.headers.get(SECRET_HEADER) != secret: - logger.warn("Received bad secret from {client}" - .format(client=request.remote)) - return web.Response(status=http.HTTPStatus.FORBIDDEN, - reason="Bad secret") - return await handler(request) - - return verify_secret +async def verify_secret_middleware(request, handler): + if request.headers.get(SECRET_HEADER) != appscale_info.get_secret(): + logger.warn("Received bad secret from {client}" + .format(client=request.remote)) + return web.Response(status=http.HTTPStatus.FORBIDDEN, + reason="Bad secret") + return await handler(request) class LocalStatsHandler(object): diff --git a/Hermes/appscale/hermes/hermes_server.py b/Hermes/appscale/hermes/hermes_server.py index 7dafaace2c..f004dfe25d 100644 --- a/Hermes/appscale/hermes/hermes_server.py +++ b/Hermes/appscale/hermes/hermes_server.py @@ -142,9 +142,7 @@ def main(): is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) - secret = appscale_info.get_secret() - security_middleware = handlers.verify_secret_middleware(secret) - app = web.Application(middlewares=[security_middleware]) + app = web.Application(middlewares=[handlers.verify_secret_middleware]) route_items = [] route_items += get_local_stats_api_routes(is_lb, is_tq, is_db) diff --git a/Hermes/appscale/hermes/producers/cassandra_stats.py b/Hermes/appscale/hermes/producers/cassandra_stats.py index 3b15b496f8..a1a28a6b97 100644 --- a/Hermes/appscale/hermes/producers/cassandra_stats.py +++ b/Hermes/appscale/hermes/producers/cassandra_stats.py @@ -1,16 +1,17 @@ """ Fetches `nodetool status` info. """ +import asyncio import logging import re import time import attr -from tornado import process, gen from appscale.common import appscale_info from appscale.hermes.converter import Meta, include_list_name # The endpoint used for retrieving queue stats. -NODETOOL_STATUS_COMMAND = ['/opt/cassandra/cassandra/bin/nodetool', 'status'] +NODETOOL_STATUS_COMMAND = '/opt/cassandra/cassandra/bin/nodetool status' +NODETOOL_STATUS_TIMEOUT = 60 logger = logging.getLogger(__name__) @@ -91,33 +92,47 @@ class CassandraStatsSource(object): } @classmethod - @gen.coroutine - def get_current(cls): + async def get_current(cls): """ Retrieves Cassandra status info. Returns: - An instance of RabbitMQStatsSnapshot. + An instance of CassandraStatsSnapshot. """ start = time.time() + + process = await asyncio.create_subprocess_shell( + NODETOOL_STATUS_COMMAND, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + logger.info('Started subprocess `{}` (pid: {})' + .format(NODETOOL_STATUS_COMMAND, process.pid)) + try: - proc = process.Subprocess( - NODETOOL_STATUS_COMMAND, - stdout=process.Subprocess.STREAM, - stderr=process.Subprocess.STREAM + # Wait for the subprocess to finish + stdout, stderr = await asyncio.wait_for( + process.communicate(), NODETOOL_STATUS_TIMEOUT ) - status = yield proc.stdout.read_until_close() - err = yield proc.stderr.read_until_close() - if err: - logger.error(err) - except process.CalledProcessError as err: - raise NodetoolStatusError(err) + except asyncio.TimeoutError: + raise NodetoolStatusError( + 'Timed out waiting for subprocess `{}` (pid: {})' + .format(NODETOOL_STATUS_COMMAND, process.pid) + ) + + output = stdout.encode() + error = stderr.encode() + if error: + logger.warning(error) + if process.returncode != 0: + raise NodetoolStatusError('Subprocess failed with return code {} ({})' + .format(process.returncode, error)) known_db_nodes = set(appscale_info.get_db_ips()) nodes = [] shown_nodes = set() - if cls.SINGLENODE_HEADER_PATTERN.search(status): - for match in cls.SINGLENODE_STATUS_PATTERN.finditer(status): + if cls.SINGLENODE_HEADER_PATTERN.search(output): + for match in cls.SINGLENODE_STATUS_PATTERN.finditer(output): address = match.group('address') status = match.group('status') state = match.group('state') @@ -140,8 +155,8 @@ def get_current(cls): nodes.append(node_stats) shown_nodes.add(address) - elif cls.MULTINODE_HEADER_PATTERN.search(status): - for match in cls.MULTINODE_STATUS_PATTERN.finditer(status): + elif cls.MULTINODE_HEADER_PATTERN.search(output): + for match in cls.MULTINODE_STATUS_PATTERN.finditer(output): address = match.group('address') status = match.group('status') state = match.group('state') @@ -166,7 +181,8 @@ def get_current(cls): else: raise NodetoolStatusError( - '`nodetool status` output does not contain expected header' + '`{}` output does not contain expected header. Actual output:\n{}' + .format(NODETOOL_STATUS_COMMAND, output) ) snapshot = CassandraStatsSnapshot( @@ -176,5 +192,5 @@ def get_current(cls): unknown_nodes=list(shown_nodes - known_db_nodes) ) logger.info('Prepared Cassandra nodes status in ' - '{elapsed:.1f}s.'.format(elapsed=time.time()-start)) - raise gen.Return(snapshot) + '{elapsed:.2f}s.'.format(elapsed=time.time()-start)) + return snapshot diff --git a/Hermes/appscale/hermes/producers/cluster_stats.py b/Hermes/appscale/hermes/producers/cluster_stats.py index 6fbc5916c5..23c815fe6c 100644 --- a/Hermes/appscale/hermes/producers/cluster_stats.py +++ b/Hermes/appscale/hermes/producers/cluster_stats.py @@ -1,21 +1,14 @@ """ Implementation of stats sources for cluster stats. """ -import json +import asyncio +import inspect import logging -import sys import time - import random -import socket + +import aiohttp from appscale.common import appscale_info -from tornado import gen, httpclient -from tornado.options import options -from tornado.simple_httpclient import SimpleAsyncHTTPClient - -from appscale.hermes import constants -from appscale.hermes.constants import SECRET_HEADER -from appscale.hermes import converter -from appscale.hermes.constants import STATS_REQUEST_TIMEOUT +from appscale.hermes import constants, converter from appscale.hermes.producers import ( proxy_stats, node_stats, process_stats, rabbitmq_stats, taskqueue_stats, cassandra_stats @@ -23,12 +16,11 @@ logger = logging.getLogger(__name__) -# Allow tornado to fetch up to 100 concurrent requests -httpclient.AsyncHTTPClient.configure(SimpleAsyncHTTPClient, max_clients=100) +max_concurrency = asyncio.Semaphore(100) -class BadStatsListFormat(ValueError): - """ Is used when Hermes slave responds with improperly formatted stats. """ +class RemoteHermesError(aiohttp.ClientError): + """ Represents an error while getting stats from remote Hermes. """ pass @@ -43,9 +35,8 @@ def __init__(self, ips_getter, method_path, stats_model, local_stats_source): self.stats_model = stats_model self.local_stats_source = local_stats_source - @gen.coroutine - def get_current(self, max_age=None, include_lists=None, - exclude_nodes=None): + async def get_current(self, max_age=None, include_lists=None, + exclude_nodes=None): """ Makes concurrent asynchronous http calls to cluster nodes and collects current stats. Local stats is got from local stats source. @@ -60,47 +51,49 @@ def get_current(self, max_age=None, include_lists=None, exclude_nodes = exclude_nodes or [] start = time.time() + stats_per_node = {} + failures = {} + # Do multiple requests asynchronously and wait for all results - stats_or_error_per_node = yield { - node_ip: self._stats_from_node_async(node_ip, max_age, include_lists) - for node_ip in self.ips_getter() if node_ip not in exclude_nodes - } - stats_per_node = { - ip: snapshot_or_err - for ip, snapshot_or_err in stats_or_error_per_node.iteritems() - if not isinstance(snapshot_or_err, (str, unicode)) - } - failures = { - ip: snapshot_or_err - for ip, snapshot_or_err in stats_or_error_per_node.iteritems() - if isinstance(snapshot_or_err, (str, unicode)) - } - logger.info("Fetched {stats} from {nodes} nodes in {elapsed:.1f}s." - .format(stats=self.stats_model.__name__, - nodes=len(stats_per_node), - elapsed=time.time() - start)) - raise gen.Return((stats_per_node, failures)) - - @gen.coroutine - def _stats_from_node_async(self, node_ip, max_age, include_lists): + async def get_remote_result(node_ip): + try: + stats_per_node[node_ip] = await self._stats_from_node_async( + node_ip, max_age, include_lists + ) + except RemoteHermesError as err: + failures[node_ip] = str(err) + + async with max_concurrency: + await asyncio.wait([ + get_remote_result(node_ip) + for node_ip in self.ips_getter() if node_ip not in exclude_nodes + ]) + + logger.info("Fetched {stats} from {nodes} nodes in {elapsed:.2f}s." + .format(stats=self.stats_model.__name__, + nodes=len(stats_per_node), + elapsed=time.time()-start)) + return stats_per_node, failures + + async def _stats_from_node_async(self, node_ip, max_age, include_lists): if node_ip == appscale_info.get_private_ip(): try: snapshot = self.local_stats_source.get_current() - if isinstance(snapshot, gen.Future): - snapshot = yield snapshot + if inspect.isawaitable(snapshot): + snapshot = await snapshot + return snapshot except Exception as err: - snapshot = unicode(err) - logger.exception( - u"Failed to prepare local stats: {err}".format(err=err)) + logger.error(u"Failed to prepare local stats: {err}".format(err=err)) + raise RemoteHermesError(str(err)) else: - snapshot = yield self._fetch_remote_stats_async( - node_ip, max_age, include_lists) - raise gen.Return(snapshot) + snapshot = await self._fetch_remote_stats_async( + node_ip, max_age, include_lists + ) + return snapshot - @gen.coroutine - def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): + async def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): # Security header - headers = {SECRET_HEADER: options.secret} + headers = {constants.SECRET_HEADER: appscale_info.get_secret()} # Build query arguments arguments = {} if include_lists is not None: @@ -110,35 +103,32 @@ def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): url = "http://{ip}:{port}/{path}".format( ip=node_ip, port=constants.HERMES_PORT, path=self.method_path) - request = httpclient.HTTPRequest( - url=url, method='GET', body=json.dumps(arguments), headers=headers, - request_timeout=STATS_REQUEST_TIMEOUT, allow_nonstandard_methods=True - ) - async_client = httpclient.AsyncHTTPClient() try: - # Send Future object to coroutine and suspend till result is ready - response = yield async_client.fetch(request) - except (socket.error, httpclient.HTTPError) as err: - msg = u"Failed to get stats from {url} ({err})".format(url=url, err=err) - if hasattr(err, 'response') and err.response and err.response.body: - msg += u"\nBODY: {body}".format(body=err.response.body) - logger.error(msg) - raise gen.Return(unicode(err)) - - try: - snapshot = json.loads(response.body) - raise gen.Return(converter.stats_from_dict(self.stats_model, snapshot)) - except TypeError as err: - msg = u"Can't parse stats snapshot ({})".format(err) - raise BadStatsListFormat(msg), None, sys.exc_info()[2] - - -def get_random_lb_node(): + async with aiohttp.ClientSession() as session: + awaitable_get = session.get( + url, headers=headers, json=arguments, + timeout=constants.REMOTE_REQUEST_TIMEOUT + ) + async with awaitable_get as resp: + if resp.status >= 400: + resp_text = await resp.text() + err_message = resp_text or resp.reason + logger.error("Failed to get {} ({})".format(url, err_message)) + raise RemoteHermesError(err_message) + snapshot = await resp.json() + return converter.stats_from_dict(self.stats_model, snapshot) + + except aiohttp.ClientError as err: + logger.error("Failed to get {} ({})".format(url, err)) + raise RemoteHermesError(str(err)) + + +async def get_random_lb_node(): return [random.choice(appscale_info.get_load_balancer_ips())] -def get_random_db_node(): +async def get_random_db_node(): return [random.choice(appscale_info.get_db_ips())] diff --git a/Hermes/appscale/hermes/producers/process_stats.py b/Hermes/appscale/hermes/producers/process_stats.py index 36d81aaa9e..2bbc8923de 100644 --- a/Hermes/appscale/hermes/producers/process_stats.py +++ b/Hermes/appscale/hermes/producers/process_stats.py @@ -129,8 +129,8 @@ def get_current(): stats = _process_stats(pid, service, monit_name, private_ip) processes_stats.append(stats) except psutil.Error as err: - logger.warn(u"Unable to get process stats for {monit_name} ({err})" - .format(monit_name=monit_name, err=err)) + logger.warn("Unable to get process stats for {monit_name} ({err})" + .format(monit_name=monit_name, err=err)) # Add processes managed by the ServiceManager. for server in ServiceManager.get_state(): @@ -140,8 +140,8 @@ def get_current(): private_ip) processes_stats.append(stats) except psutil.Error as error: - logger.warning(u'Unable to get process stats for ' - u'{} ({})'.format(server, error)) + logger.warning('Unable to get process stats for ' + '{} ({})'.format(server, error)) stats = ProcessesStatsSnapshot( utc_timestamp=time.mktime(datetime.now().timetuple()), diff --git a/Hermes/appscale/hermes/producers/proxy_stats.py b/Hermes/appscale/hermes/producers/proxy_stats.py index 5b0e39a727..4c716ba0b3 100644 --- a/Hermes/appscale/hermes/producers/proxy_stats.py +++ b/Hermes/appscale/hermes/producers/proxy_stats.py @@ -1,3 +1,5 @@ +import asyncio + import StringIO import csv import logging @@ -267,21 +269,17 @@ def _get_field_value(row, field_name): return value -def get_stats(socket_path): - client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) - client.connect(socket_path) +async def get_stats(socket_path): + reader, writer = await asyncio.open_unix_connection(socket_path) try: stats_output = StringIO.StringIO() - client.send('show stat\n') - while True: - data = client.recv(1024) - if not data: - break - stats_output.write(data) + writer.write(b'show stat\n') + stats_output.write(await reader.read()) stats_output.seek(0) return stats_output finally: - client.close() + reader.close() + writer.close() def get_frontend_ip_port(configs_dir, proxy_name): @@ -301,9 +299,9 @@ def get_connections(ip, port): if conn.laddr == (ip, port) and conn.status == 'ESTABLISHED') -def get_stats_from_one_haproxy(socket_path, configs_dir): +async def get_stats_from_one_haproxy(socket_path, configs_dir): # Get CSV table with haproxy stats - csv_buf = get_stats(socket_path) + csv_buf = await get_stats(socket_path) csv_buf.seek(2) # Seek to the beginning but skip "# " in the first row table = csv.DictReader(csv_buf, delimiter=',') if ProxiesStatsSource.first_run: @@ -395,7 +393,7 @@ class ProxiesStatsSource(object): first_run = True @staticmethod - def get_current(): + async def get_current(): """ Method which parses haproxy stats and returns detailed proxy statistics for all proxies. @@ -405,27 +403,29 @@ def get_current(): start = time.time() proxy_stats_list = [] - for haproxy_process_name, info in HAPROXY_PROCESSES.iteritems(): + for haproxy_process_name, info in HAPROXY_PROCESSES.items(): logger.debug("Processing {} haproxy stats".format(haproxy_process_name)) - proxy_stats_list += get_stats_from_one_haproxy( - info['socket'], info['configs']) + proxy_stats_list += await get_stats_from_one_haproxy( + info['socket'], info['configs'] + ) stats = ProxiesStatsSnapshot( utc_timestamp=time.mktime(datetime.now().timetuple()), proxies_stats=proxy_stats_list ) - logger.info("Prepared stats about {prox} proxies in {elapsed:.1f}s." - .format(prox=len(proxy_stats_list), elapsed=time.time()-start)) + logger.info("Prepared stats about {prox} proxies in {elapsed:.2f}s." + .format(prox=len(proxy_stats_list), elapsed=time.time()-start)) return stats -def get_service_instances(stats_socket_path, pxname): +async def get_service_instances(stats_socket_path, pxname): safe_pxname = re.escape(pxname) ip_port_list = [] ip_port_pattern = re.compile( "\n{proxy},{proxy}-(?P[.\w]+:\d+)".format(proxy=safe_pxname) ) - stats_csv = get_stats(stats_socket_path).read() + stats_buf = await get_stats(stats_socket_path) + stats_csv = stats_buf.read() for match in re.finditer(ip_port_pattern, stats_csv): ip_port_list.append(match.group("port_ip")) return ip_port_list diff --git a/Hermes/appscale/hermes/producers/rabbitmq_stats.py b/Hermes/appscale/hermes/producers/rabbitmq_stats.py index 8f9f3b3dc6..ce9424316d 100644 --- a/Hermes/appscale/hermes/producers/rabbitmq_stats.py +++ b/Hermes/appscale/hermes/producers/rabbitmq_stats.py @@ -1,14 +1,13 @@ """ Fetches RabbitMQ status. """ import base64 -import json import logging import socket import time +import aiohttp import attr -from tornado import gen -from tornado.httpclient import AsyncHTTPClient +from appscale.hermes import constants from appscale.hermes.converter import Meta, include_list_name # The port used by the RabbitMQ management plugin. @@ -64,8 +63,7 @@ class RabbitMQStatsSource(object): first_run = True @staticmethod - @gen.coroutine - def get_current(): + async def get_current(): """ Retrieves RabbitMQ stats for the current node. Returns: @@ -77,17 +75,18 @@ def get_current(): url = 'http://localhost:{}{}/{}'.format(API_PORT, NODES_API, node_name) creds = base64.b64encode(':'.join([USER, PASS])) headers = {'Authorization': 'Basic {}'.format(creds)} - async_client = AsyncHTTPClient() - try: - response = yield async_client.fetch(url, headers=headers) - except Exception as error: - raise APICallFailed('Call to {} failed: {}'.format(url, error)) try: - node_info = json.loads(response.body) - except ValueError: - raise APICallFailed('Invalid response from ' - '{}: {}'.format(url, response.body)) + async with aiohttp.ClientSession() as session: + awaitable_get = session.get( + url, headers=headers, timeout=constants.REMOTE_REQUEST_TIMEOUT + ) + async with awaitable_get as resp: + resp.raise_for_status() + node_info = await resp.json() + except aiohttp.ClientError as err: + logger.error("Failed to get {} ({})".format(url, err)) + raise APICallFailed(str(err)) snapshot = RabbitMQStatsSnapshot( utc_timestamp=int(time.time()), @@ -97,8 +96,8 @@ def get_current(): partitions=node_info['partitions'] ) logger.info('Prepared RabbitMQ node stats in ' - '{elapsed:.1f}s.'.format(elapsed=time.time()-start)) - raise gen.Return(snapshot) + '{elapsed:.2f}s.'.format(elapsed=time.time()-start)) + return snapshot class PushQueueStatsSource(object): @@ -107,8 +106,7 @@ class PushQueueStatsSource(object): first_run = True @staticmethod - @gen.coroutine - def get_current(): + async def get_current(): """ Retrieves push queue stats. Returns: @@ -119,17 +117,18 @@ def get_current(): url = 'http://localhost:{}{}'.format(API_PORT, QUEUES_API) creds = base64.b64encode(':'.join([USER, PASS])) headers = {'Authorization': 'Basic {}'.format(creds)} - async_client = AsyncHTTPClient() - try: - response = yield async_client.fetch(url, headers=headers) - except Exception as error: - raise APICallFailed('Call to {} failed: {}'.format(url, error)) try: - queues_info = json.loads(response.body) - except ValueError: - raise APICallFailed('Invalid response from ' - '{}: {}'.format(url, response.body)) + async with aiohttp.ClientSession() as session: + awaitable_get = session.get( + url, headers=headers, timeout=constants.REMOTE_REQUEST_TIMEOUT + ) + async with awaitable_get as resp: + resp.raise_for_status() + queues_info = await resp.json() + except aiohttp.ClientError as err: + logger.error("Failed to get {} ({})".format(url, err)) + raise APICallFailed(str(err)) queue_stats = [ PushQueueStats(name=queue['name'], messages=queue['messages']) @@ -139,5 +138,5 @@ def get_current(): queues=queue_stats ) logger.info('Prepared push queue stats in ' - '{elapsed:.1f}s.'.format(elapsed=time.time()-start)) - raise gen.Return(snapshot) + '{elapsed:.2f}s.'.format(elapsed=time.time()-start)) + return snapshot diff --git a/Hermes/appscale/hermes/producers/taskqueue_stats.py b/Hermes/appscale/hermes/producers/taskqueue_stats.py index 98203b801e..b6fabcc92d 100644 --- a/Hermes/appscale/hermes/producers/taskqueue_stats.py +++ b/Hermes/appscale/hermes/producers/taskqueue_stats.py @@ -1,22 +1,17 @@ """ Fetches TaskQueue service statistics. """ -import json +import asyncio import logging - -import attr import collections - -import sys - import time -import socket -from tornado import gen, httpclient +import aiohttp +import attr +from appscale.hermes.constants import REMOTE_REQUEST_TIMEOUT from appscale.hermes.converter import include_list_name, Meta - -# The endpoint used for retrieving node stats. from appscale.hermes.producers import proxy_stats +# The endpoint used for retrieving node stats. STATS_ENDPOINT = '/service-stats' logger = logging.getLogger(__name__) @@ -88,30 +83,24 @@ class TaskqueueStatsSource(object): IGNORE_RECENT_OLDER_THAN = 5*60*1000 # 5 minutes REQUEST_TIMEOUT = 10 # Wait up to 10 seconds - @gen.coroutine - def fetch_stats_from_instance(self, ip_port): + async def fetch_stats_from_instance(self, ip_port): url = "http://{ip_port}{path}?last_milliseconds={max_age}".format( ip_port=ip_port, path=STATS_ENDPOINT, max_age=self.IGNORE_RECENT_OLDER_THAN ) - request = httpclient.HTTPRequest( - url=url, method='GET', request_timeout=self.REQUEST_TIMEOUT - ) - async_client = httpclient.AsyncHTTPClient() - try: - # Send Future object to coroutine and suspend till result is ready - response = yield async_client.fetch(request) - except (socket.error, httpclient.HTTPError) as err: - msg = u"Failed to get stats from {url} ({err})".format(url=url, err=err) - if hasattr(err, 'response') and err.response and err.response.body: - msg += u"\nBODY: {body}".format(body=err.response.body) + async with aiohttp.ClientSession() as session: + awaitable_get = session.get(url, timeout=REMOTE_REQUEST_TIMEOUT) + async with awaitable_get as resp: + resp.raise_for_status() + stats_body = await resp.json() + except aiohttp.ClientError as err: + msg = u"Failed to get {url} ({err})".format(url=url, err=err) logger.error(msg) - failure = FailureSnapshot(ip_port=ip_port, error=unicode(err)) - raise gen.Return(failure) + failure = FailureSnapshot(ip_port=ip_port, error=str(err)) + return failure try: - stats_body = json.loads(response.body) cumulative_dict = stats_body["cumulative_counters"] recent_dict = stats_body["recent_stats"] cumulative = CumulativeStatsSnapshot( @@ -138,10 +127,10 @@ def fetch_stats_from_instance(self, ip_port): cumulative=cumulative, recent=recent, ) - raise gen.Return(instance_stats_snapshot) + return instance_stats_snapshot except (TypeError, KeyError) as err: msg = u"Can't parse taskqueue ({})".format(err) - raise BadTaskqueueStatsFormat(msg), None, sys.exc_info()[2] + raise BadTaskqueueStatsFormat(msg) from err @staticmethod def summarise_cumulative(instances_stats): @@ -189,18 +178,17 @@ def summarise_recent(instances_stats): by_rest_status=by_rest_status_sum ) - @gen.coroutine - def get_current(self): + async def get_current(self): start_time = time.time() # Find all taskqueue servers - tq_instances = proxy_stats.get_service_instances( + tq_instances = await proxy_stats.get_service_instances( proxy_stats.HAPROXY_SERVICES_STATS_SOCKET_PATH, "TaskQueue" ) # Query all TQ servers - instances_responses = yield [ + instances_responses = await asyncio.wait([ self.fetch_stats_from_instance(ip_port) for ip_port in tq_instances - ] + ]) # Select successful instances_stats = [ stats_or_err for stats_or_err in instances_responses @@ -229,7 +217,7 @@ def get_current(self): "Fetched Taskqueue server stats from {nodes} instances in {elapsed:.1f}s." .format(nodes=len(instances_stats), elapsed=time.time() - start_time) ) - raise gen.Return(stats) + return stats taskqueue_stats_source = TaskqueueStatsSource() From 1b10b2c7b70d0ff43b0e3d0f21543b43624fa899 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 29 Mar 2019 20:18:43 +0200 Subject: [PATCH 004/221] Automated 2to3 modification --- Hermes/appscale/hermes/constants.py | 2 +- Hermes/appscale/hermes/converter.py | 12 ++++++------ Hermes/appscale/hermes/handlers.py | 4 ++-- .../appscale/hermes/producers/cluster_stats.py | 2 +- Hermes/appscale/hermes/producers/proxy_stats.py | 16 +++++++--------- .../appscale/hermes/producers/taskqueue_stats.py | 12 ++++++------ .../hermes/producers/tests/test_cluster_stats.py | 8 ++++---- .../hermes/producers/tests/test_proxy.py | 12 ++++++------ .../hermes/producers/tests/test_taskqueue.py | 2 +- .../tests/test_unified_service_names.py | 4 ++-- Hermes/appscale/hermes/unified_service_names.py | 2 +- 11 files changed, 37 insertions(+), 39 deletions(-) diff --git a/Hermes/appscale/hermes/constants.py b/Hermes/appscale/hermes/constants.py index a09e9bc9ec..c8e314c381 100644 --- a/Hermes/appscale/hermes/constants.py +++ b/Hermes/appscale/hermes/constants.py @@ -27,7 +27,7 @@ class _MissedValue(object): in haproxy stats. """ - def __nonzero__(self): + def __bool__(self): return False def __repr__(self): diff --git a/Hermes/appscale/hermes/converter.py b/Hermes/appscale/hermes/converter.py index c8177f1e93..984e60b86e 100644 --- a/Hermes/appscale/hermes/converter.py +++ b/Hermes/appscale/hermes/converter.py @@ -96,13 +96,13 @@ def __init__(self, include_lists): self._lists = {} self._original_dict = include_lists - for list_name, fields_to_include in include_lists.iteritems(): + for list_name, fields_to_include in include_lists.items(): try: known_attributes = self.all_attributes[list_name] except KeyError: raise WrongIncludeLists( 'Include list "{name}" is unknown, available are: {known}' - .format(name=list_name, known=self.all_attributes.keys()) + .format(name=list_name, known=list(self.all_attributes.keys())) ) # List of field names will be transformed to set of attr.Attribute @@ -152,7 +152,7 @@ def is_subset_of(self, include_lists): """ if self is include_lists: return True - for list_name, include_list in self._lists.iteritems(): + for list_name, include_list in self._lists.items(): corresponding_list = include_lists._lists.get(list_name) if corresponding_list is None: return False @@ -184,11 +184,11 @@ def stats_to_dict(stats, include_lists=None): if value is MISSED: continue if value and isinstance(value, dict): - if attr.has(value.itervalues().next()): + if attr.has(next(iter(value.values()))): # Only collections of attr types (stats models) should be converted value = { k: stats_to_dict(v, include_lists) - for k, v in value.iteritems() + for k, v in value.items() } elif value and isinstance(value, list): if attr.has(value[0]): @@ -231,7 +231,7 @@ def stats_from_dict(stats_class, dictionary, strict=False): if nested_stats_class: changed_kwargs[att.name] = { key: stats_from_dict(nested_stats_class, value, strict) - for key, value in dictionary[att.name].iteritems() + for key, value in dictionary[att.name].items() } continue # Try to unpack list of nested entities diff --git a/Hermes/appscale/hermes/handlers.py b/Hermes/appscale/hermes/handlers.py index cbadc446fc..4e17211c2a 100644 --- a/Hermes/appscale/hermes/handlers.py +++ b/Hermes/appscale/hermes/handlers.py @@ -166,7 +166,7 @@ async def get(self, request): new_snapshots_dict, failures = ( await self._cluster_stats_source.get_current( max_age=max_age, include_lists=include_lists, - exclude_nodes=fresh_local_snapshots.keys() + exclude_nodes=list(fresh_local_snapshots.keys()) ) ) @@ -178,7 +178,7 @@ async def get(self, request): rendered_snapshots = { node_ip: stats_to_dict(snapshot, include_lists) - for node_ip, snapshot in new_snapshots_dict.iteritems() + for node_ip, snapshot in new_snapshots_dict.items() } return web.json_response({ diff --git a/Hermes/appscale/hermes/producers/cluster_stats.py b/Hermes/appscale/hermes/producers/cluster_stats.py index 23c815fe6c..c348eed72b 100644 --- a/Hermes/appscale/hermes/producers/cluster_stats.py +++ b/Hermes/appscale/hermes/producers/cluster_stats.py @@ -83,7 +83,7 @@ async def _stats_from_node_async(self, node_ip, max_age, include_lists): snapshot = await snapshot return snapshot except Exception as err: - logger.error(u"Failed to prepare local stats: {err}".format(err=err)) + logger.error("Failed to prepare local stats: {err}".format(err=err)) raise RemoteHermesError(str(err)) else: snapshot = await self._fetch_remote_stats_async( diff --git a/Hermes/appscale/hermes/producers/proxy_stats.py b/Hermes/appscale/hermes/producers/proxy_stats.py index 4c716ba0b3..1935a629d5 100644 --- a/Hermes/appscale/hermes/producers/proxy_stats.py +++ b/Hermes/appscale/hermes/producers/proxy_stats.py @@ -1,9 +1,7 @@ import asyncio - -import StringIO +import io import csv import logging -import socket import time from collections import defaultdict from datetime import datetime @@ -213,10 +211,10 @@ class HAProxyServerStats(object): ALL_HAPROXY_FIELDS = set( - attr.fields_dict(HAProxyListenerStats).keys() + - attr.fields_dict(HAProxyFrontendStats).keys() + - attr.fields_dict(HAProxyBackendStats).keys() + - attr.fields_dict(HAProxyServerStats).keys() + list(attr.fields_dict(HAProxyListenerStats).keys()) + + list(attr.fields_dict(HAProxyFrontendStats).keys()) + + list(attr.fields_dict(HAProxyBackendStats).keys()) + + list(attr.fields_dict(HAProxyServerStats).keys()) ) - {'private_ip', 'port'} # HAProxy stats doesn't include IP/Port columns # But we add these values by ourselves @@ -272,7 +270,7 @@ def _get_field_value(row, field_name): async def get_stats(socket_path): reader, writer = await asyncio.open_unix_connection(socket_path) try: - stats_output = StringIO.StringIO() + stats_output = io.StringIO() writer.write(b'show stat\n') stats_output.write(await reader.read()) stats_output.seek(0) @@ -342,7 +340,7 @@ async def get_stats_from_one_haproxy(socket_path, configs_dir): # Attempt to merge separate stats object to ProxyStats instances proxy_stats_list = [] - for proxy_name, stats_objects in parsed_objects.iteritems(): + for proxy_name, stats_objects in parsed_objects.items(): service = find_service_by_pxname(proxy_name) frontends = [stats for stats in stats_objects if isinstance(stats, HAProxyFrontendStats)] diff --git a/Hermes/appscale/hermes/producers/taskqueue_stats.py b/Hermes/appscale/hermes/producers/taskqueue_stats.py index b6fabcc92d..28a8ea0902 100644 --- a/Hermes/appscale/hermes/producers/taskqueue_stats.py +++ b/Hermes/appscale/hermes/producers/taskqueue_stats.py @@ -95,7 +95,7 @@ async def fetch_stats_from_instance(self, ip_port): resp.raise_for_status() stats_body = await resp.json() except aiohttp.ClientError as err: - msg = u"Failed to get {url} ({err})".format(url=url, err=err) + msg = "Failed to get {url} ({err})".format(url=url, err=err) logger.error(msg) failure = FailureSnapshot(ip_port=ip_port, error=str(err)) return failure @@ -129,7 +129,7 @@ async def fetch_stats_from_instance(self, ip_port): ) return instance_stats_snapshot except (TypeError, KeyError) as err: - msg = u"Can't parse taskqueue ({})".format(err) + msg = "Can't parse taskqueue ({})".format(err) raise BadTaskqueueStatsFormat(msg) from err @staticmethod @@ -156,13 +156,13 @@ def summarise_recent(instances_stats): by_pb_status_sum = collections.defaultdict(int) by_rest_status_sum = collections.defaultdict(int) for recent in recent_stats: - for pb_method, calls in recent.by_pb_method.iteritems(): + for pb_method, calls in recent.by_pb_method.items(): by_pb_method_sum[pb_method] += calls - for rest_method, calls in recent.by_rest_method.iteritems(): + for rest_method, calls in recent.by_rest_method.items(): by_rest_method_sum[rest_method] += calls - for pb_status, calls in recent.by_pb_status.iteritems(): + for pb_status, calls in recent.by_pb_status.items(): by_pb_status_sum[pb_status] += calls - for rest_status, calls in recent.by_rest_status.iteritems(): + for rest_status, calls in recent.by_rest_status.items(): by_rest_status_sum[rest_status] += calls # Return snapshot return RecentStatsSnapshot( diff --git a/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py b/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py index ca8a0c7e0c..f31333cfd5 100644 --- a/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py +++ b/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py @@ -19,7 +19,7 @@ def get_stats_from_file(json_file_name, stats_class): raw_dict = json.load(json_file) stats_dict = { ip: converter.stats_from_dict(stats_class, snapshot) - for ip, snapshot in raw_dict.iteritems() + for ip, snapshot in raw_dict.items() } return raw_dict, stats_dict @@ -130,7 +130,7 @@ def test_local_failure(self, mock_get_current, mock_ips_getter, mock_get_private_ip.return_value = '192.168.33.10' mock_ips_getter.return_value = ['192.168.33.10'] # Mock local source - mock_get_current.side_effect = ValueError(u"Something strange \u2234") + mock_get_current.side_effect = ValueError("Something strange \u2234") # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ # Call method under test @@ -138,7 +138,7 @@ def test_local_failure(self, mock_get_current, mock_ips_getter, # ASSERTING EXPECTATIONS self.assertEqual(stats, {}) - self.assertEqual(failures, {'192.168.33.10': u"Something strange \u2234"}) + self.assertEqual(failures, {'192.168.33.10': "Something strange \u2234"}) @patch.object(cluster_stats, 'options') @patch.object(cluster_stats.appscale_info, 'get_private_ip') @@ -166,7 +166,7 @@ def test_filtered_cluster_node_stats(self, mock_get_current, mock_fetch, future_response = gen.Future() future_response.set_result(response) mock_fetch.return_value = future_response - #Prepare raw dict with include lists + # Prepare raw dict with include lists raw_include_lists = { 'node': ['cpu', 'memory'], 'node.cpu': ['percent', 'count'], diff --git a/Hermes/appscale/hermes/producers/tests/test_proxy.py b/Hermes/appscale/hermes/producers/tests/test_proxy.py index 9a4c9f485c..466d075d1f 100644 --- a/Hermes/appscale/hermes/producers/tests/test_proxy.py +++ b/Hermes/appscale/hermes/producers/tests/test_proxy.py @@ -51,12 +51,12 @@ def test_haproxy_stats_v1_5(self, mock_socket): # Frontend stats shouldn't have Nones frontend = dashboard.frontend - for field in attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys()): self.assertIsNotNone(getattr(frontend, field)) # Backend stats shouldn't have Nones backend = dashboard.backend - for field in attr.fields_dict(proxy_stats.HAProxyBackendStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyBackendStats).keys()): self.assertIsNotNone(getattr(backend, field)) # Backend stats can have Nones only in some fields @@ -64,7 +64,7 @@ def test_haproxy_stats_v1_5(self, mock_socket): self.assertIsInstance(servers, list) self.assertEqual(len(servers), 3) for server in servers: - for field in attr.fields_dict(proxy_stats.HAProxyServerStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyServerStats).keys()): if field in {'qlimit', 'throttle', 'tracked', 'check_code', 'last_chk', 'last_agt'}: continue @@ -109,7 +109,7 @@ def test_haproxy_stats_v1_4(self, mock_logging_warn, mock_socket): # Frontend stats shouldn't have Nones frontend = dashboard.frontend - for field in attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys()): self.assertIsNotNone(getattr(frontend, field)) # New columns should be highlighted for new_in_v1_5 in ('comp_byp', 'comp_rsp', 'comp_out', 'comp_in'): @@ -117,7 +117,7 @@ def test_haproxy_stats_v1_4(self, mock_logging_warn, mock_socket): # Backend stats shouldn't have Nones backend = dashboard.backend - for field in attr.fields_dict(proxy_stats.HAProxyBackendStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyBackendStats).keys()): self.assertIsNotNone(getattr(backend, field)) # New columns should be highlighted for new_in_v1_5 in ('comp_byp', 'lastsess', 'comp_rsp', 'comp_out', @@ -129,7 +129,7 @@ def test_haproxy_stats_v1_4(self, mock_logging_warn, mock_socket): self.assertIsInstance(servers, list) self.assertEqual(len(servers), 3) for server in servers: - for field in attr.fields_dict(proxy_stats.HAProxyServerStats).keys(): + for field in list(attr.fields_dict(proxy_stats.HAProxyServerStats).keys()): if field in {'qlimit', 'throttle', 'tracked', 'check_code', 'last_chk', 'last_agt'}: continue diff --git a/Hermes/appscale/hermes/producers/tests/test_taskqueue.py b/Hermes/appscale/hermes/producers/tests/test_taskqueue.py index 3938d855b3..999098e52f 100644 --- a/Hermes/appscale/hermes/producers/tests/test_taskqueue.py +++ b/Hermes/appscale/hermes/producers/tests/test_taskqueue.py @@ -37,7 +37,7 @@ def test_taskqueue_stats(self, mock_fetch, mock_get_instances): ), '10.10.7.86:17450': socket.error("Connection refused") } - mock_get_instances.return_value = tq_responses.keys() + mock_get_instances.return_value = list(tq_responses.keys()) # Mock taskqueue service stats API def fetch(request, **kwargs): diff --git a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py index e24b779551..71b1e85c80 100644 --- a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py +++ b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py @@ -30,7 +30,7 @@ def test_search_for_known_service(self): 'memcached': ServicesEnum.MEMCACHED, 'appmanagerserver': ServicesEnum.APPMANAGER, } - for monit_name, expected in monit_name_to_expectation.iteritems(): + for monit_name, expected in monit_name_to_expectation.items(): self.assertEqual(find_service_by_monit_name(monit_name), expected) def test_search_for_unknown_service(self): @@ -75,7 +75,7 @@ def test_search_for_known_service(self): 'as_blob_server': ServicesEnum.BLOBSTORE, 'gae_app3-3': ServicesEnum.APPLICATION, } - for proxy_name, expected in proxy_name_to_expectation.iteritems(): + for proxy_name, expected in proxy_name_to_expectation.items(): self.assertEqual(find_service_by_pxname(proxy_name), expected) def test_search_for_unknown_service(self): diff --git a/Hermes/appscale/hermes/unified_service_names.py b/Hermes/appscale/hermes/unified_service_names.py index 3a51617844..f2efa7986f 100644 --- a/Hermes/appscale/hermes/unified_service_names.py +++ b/Hermes/appscale/hermes/unified_service_names.py @@ -172,7 +172,7 @@ class ServicesEnum(object): KNOWN_SERVICES = [ - value for value in ServicesEnum.__dict__.itervalues() + value for value in ServicesEnum.__dict__.values() if isinstance(value, Service) ] KNOWN_SERVICES_DICT = { From cfbaf9d1d52e77e1c5d9186dde2607242819e303 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 4 Apr 2019 17:51:31 +0300 Subject: [PATCH 005/221] appscale_info python3 compatibility --- common/appscale/common/appscale_info.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/common/appscale/common/appscale_info.py b/common/appscale/common/appscale_info.py index 19a69dfee1..392a98779a 100644 --- a/common/appscale/common/appscale_info.py +++ b/common/appscale/common/appscale_info.py @@ -54,9 +54,8 @@ def get_load_balancer_ips(): Returns: A list of LB node IPs. """ - raw_ips = file_io.read(constants.LOAD_BALANCER_IPS_LOC) - ips = raw_ips.split('\n') - return filter(None, ips) + with open(constants.LOAD_BALANCER_IPS_LOC) as lbs_file: + return [line.strip() for line in lbs_file if line.strip()] def get_headnode_ip(): """ Get the private IP of the head node. NOTE: it can change if node From 69818cd16b06a5d2b4d31e84339f1bc1c415748d Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 10 Apr 2019 17:08:51 +0300 Subject: [PATCH 006/221] Rename AdminServer/appscale/admin/__init__.py --- AdminServer/appscale/admin/{__init__.py => admin_server.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename AdminServer/appscale/admin/{__init__.py => admin_server.py} (100%) diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/admin_server.py similarity index 100% rename from AdminServer/appscale/admin/__init__.py rename to AdminServer/appscale/admin/admin_server.py From c649a5557081252666056b50ef72aeebf6c576d1 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 10 Apr 2019 17:11:26 +0300 Subject: [PATCH 007/221] Add empty __init__.py --- AdminServer/appscale/admin/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 AdminServer/appscale/admin/__init__.py diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 93b09334dd03bb8bf2c904fae0552e9281b8d02a Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 4 Apr 2019 19:04:47 +0300 Subject: [PATCH 008/221] Minor fixes and docstrings in stats producers --- .../hermes/producers/cassandra_stats.py | 4 +- .../hermes/producers/cluster_stats.py | 37 +++++++++++++++---- .../hermes/producers/process_stats.py | 8 ++-- .../appscale/hermes/producers/proxy_stats.py | 9 +++-- .../hermes/producers/rabbitmq_stats.py | 8 ++-- .../hermes/producers/taskqueue_stats.py | 12 ++++-- .../appscale/hermes/unified_service_names.py | 6 +-- 7 files changed, 56 insertions(+), 28 deletions(-) diff --git a/Hermes/appscale/hermes/producers/cassandra_stats.py b/Hermes/appscale/hermes/producers/cassandra_stats.py index a1a28a6b97..207e060725 100644 --- a/Hermes/appscale/hermes/producers/cassandra_stats.py +++ b/Hermes/appscale/hermes/producers/cassandra_stats.py @@ -119,8 +119,8 @@ async def get_current(cls): .format(NODETOOL_STATUS_COMMAND, process.pid) ) - output = stdout.encode() - error = stderr.encode() + output = stdout.decode() + error = stderr.decode() if error: logger.warning(error) if process.returncode != 0: diff --git a/Hermes/appscale/hermes/producers/cluster_stats.py b/Hermes/appscale/hermes/producers/cluster_stats.py index c348eed72b..3888d88ee0 100644 --- a/Hermes/appscale/hermes/producers/cluster_stats.py +++ b/Hermes/appscale/hermes/producers/cluster_stats.py @@ -41,7 +41,7 @@ async def get_current(self, max_age=None, include_lists=None, and collects current stats. Local stats is got from local stats source. Args: - max_age: UTC timestamp, allow to use cached snapshot if it's newer. + max_age: An int - max age of cached snapshot to use (in seconds). include_lists: An instance of IncludeLists. exclude_nodes: A list of node IPs to ignore when fetching stats. Returns: @@ -54,8 +54,10 @@ async def get_current(self, max_age=None, include_lists=None, stats_per_node = {} failures = {} - # Do multiple requests asynchronously and wait for all results async def get_remote_result(node_ip): + """ Helper coroutine for issuing async request for + remote/local statistics and filling stats_per_node and failures. + """ try: stats_per_node[node_ip] = await self._stats_from_node_async( node_ip, max_age, include_lists @@ -63,8 +65,9 @@ async def get_remote_result(node_ip): except RemoteHermesError as err: failures[node_ip] = str(err) + # Do multiple requests asynchronously and wait for all results async with max_concurrency: - await asyncio.wait([ + await asyncio.gather(*[ get_remote_result(node_ip) for node_ip in self.ips_getter() if node_ip not in exclude_nodes ]) @@ -76,6 +79,15 @@ async def get_remote_result(node_ip): return stats_per_node, failures async def _stats_from_node_async(self, node_ip, max_age, include_lists): + """ Fetches statistics from either local or remote node. + + Args: + node_ip: A string - remote node IP. + max_age: An int - max age of cached snapshot to use (in seconds). + include_lists: An instance of IncludeLists. + Returns: + An instance of stats snapshot. + """ if node_ip == appscale_info.get_private_ip(): try: snapshot = self.local_stats_source.get_current() @@ -92,6 +104,15 @@ async def _stats_from_node_async(self, node_ip, max_age, include_lists): return snapshot async def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): + """ Fetches statistics from a single remote node. + + Args: + node_ip: a string - remote node IP. + max_age: An int - max age of cached snapshot to use (in seconds). + include_lists: An instance of IncludeLists. + Returns: + An instance of stats snapshot. + """ # Security header headers = {constants.SECRET_HEADER: appscale_info.get_secret()} # Build query arguments @@ -112,11 +133,13 @@ async def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): ) async with awaitable_get as resp: if resp.status >= 400: + err_message = 'HTTP {}: {}'.format(resp.status, resp.reason) resp_text = await resp.text() - err_message = resp_text or resp.reason + if resp_text: + err_message += '. {}'.format(resp_text) logger.error("Failed to get {} ({})".format(url, err_message)) raise RemoteHermesError(err_message) - snapshot = await resp.json() + snapshot = await resp.json(content_type=None) return converter.stats_from_dict(self.stats_model, snapshot) except aiohttp.ClientError as err: @@ -124,11 +147,11 @@ async def _fetch_remote_stats_async(self, node_ip, max_age, include_lists): raise RemoteHermesError(str(err)) -async def get_random_lb_node(): +def get_random_lb_node(): return [random.choice(appscale_info.get_load_balancer_ips())] -async def get_random_db_node(): +def get_random_db_node(): return [random.choice(appscale_info.get_db_ips())] diff --git a/Hermes/appscale/hermes/producers/process_stats.py b/Hermes/appscale/hermes/producers/process_stats.py index 2bbc8923de..85db68c8b4 100644 --- a/Hermes/appscale/hermes/producers/process_stats.py +++ b/Hermes/appscale/hermes/producers/process_stats.py @@ -118,7 +118,7 @@ def get_current(): An instance ofProcessesStatsSnapshot. """ start = time.time() - monit_status = subprocess.check_output('monit status', shell=True) + monit_status = subprocess.check_output('monit status', shell=True).decode() processes_stats = [] private_ip = appscale_info.get_private_ip() for match in MONIT_PROCESS_PATTERN.finditer(monit_status): @@ -129,8 +129,8 @@ def get_current(): stats = _process_stats(pid, service, monit_name, private_ip) processes_stats.append(stats) except psutil.Error as err: - logger.warn("Unable to get process stats for {monit_name} ({err})" - .format(monit_name=monit_name, err=err)) + logger.warning("Unable to get process stats for {monit_name} ({err})" + .format(monit_name=monit_name, err=err)) # Add processes managed by the ServiceManager. for server in ServiceManager.get_state(): @@ -148,7 +148,7 @@ def get_current(): processes_stats=processes_stats ) logger.info("Prepared stats about {proc} processes in {elapsed:.1f}s." - .format(proc=len(processes_stats), elapsed=time.time()-start)) + .format(proc=len(processes_stats), elapsed=time.time()-start)) return stats diff --git a/Hermes/appscale/hermes/producers/proxy_stats.py b/Hermes/appscale/hermes/producers/proxy_stats.py index 1935a629d5..d5990b66ab 100644 --- a/Hermes/appscale/hermes/producers/proxy_stats.py +++ b/Hermes/appscale/hermes/producers/proxy_stats.py @@ -272,11 +272,10 @@ async def get_stats(socket_path): try: stats_output = io.StringIO() writer.write(b'show stat\n') - stats_output.write(await reader.read()) + stats_output.write((await reader.read()).decode()) stats_output.seek(0) return stats_output finally: - reader.close() writer.close() @@ -305,8 +304,10 @@ async def get_stats_from_one_haproxy(socket_path, configs_dir): if ProxiesStatsSource.first_run: missed = ALL_HAPROXY_FIELDS - set(table.fieldnames) if missed: - logger.warn("HAProxy stats fields {} are missed. Old version of HAProxy " - "is probably used (v1.5+ is expected)".format(list(missed))) + logger.warning( + "HAProxy stats fields {} are missed. Old version of HAProxy " + "is probably used (v1.5+ is expected)".format(list(missed)) + ) ProxiesStatsSource.first_run = False # Parse haproxy stats output line by line diff --git a/Hermes/appscale/hermes/producers/rabbitmq_stats.py b/Hermes/appscale/hermes/producers/rabbitmq_stats.py index ce9424316d..0bd2fad679 100644 --- a/Hermes/appscale/hermes/producers/rabbitmq_stats.py +++ b/Hermes/appscale/hermes/producers/rabbitmq_stats.py @@ -14,8 +14,8 @@ API_PORT = 15672 # Credentials used to access the RabbitMQ API. -USER = 'guest' -PASS = 'guest' +USER = b'guest' +PASS = b'guest' # The endpoint used for retrieving node stats. NODES_API = '/api/nodes' @@ -73,8 +73,8 @@ async def get_current(): node_name = 'rabbit@{}'.format(socket.gethostname()) url = 'http://localhost:{}{}/{}'.format(API_PORT, NODES_API, node_name) - creds = base64.b64encode(':'.join([USER, PASS])) - headers = {'Authorization': 'Basic {}'.format(creds)} + creds = base64.b64encode(b':'.join([USER, PASS])) + headers = {'Authorization': 'Basic {}'.format(creds.decode())} try: async with aiohttp.ClientSession() as session: diff --git a/Hermes/appscale/hermes/producers/taskqueue_stats.py b/Hermes/appscale/hermes/producers/taskqueue_stats.py index 28a8ea0902..e3e23fa96a 100644 --- a/Hermes/appscale/hermes/producers/taskqueue_stats.py +++ b/Hermes/appscale/hermes/producers/taskqueue_stats.py @@ -93,7 +93,7 @@ async def fetch_stats_from_instance(self, ip_port): awaitable_get = session.get(url, timeout=REMOTE_REQUEST_TIMEOUT) async with awaitable_get as resp: resp.raise_for_status() - stats_body = await resp.json() + stats_body = await resp.json(content_type=None) except aiohttp.ClientError as err: msg = "Failed to get {url} ({err})".format(url=url, err=err) logger.error(msg) @@ -164,12 +164,16 @@ def summarise_recent(instances_stats): by_pb_status_sum[pb_status] += calls for rest_status, calls in recent.by_rest_status.items(): by_rest_status_sum[rest_status] += calls + + avg_latency = None + if total_recent_reqs: + avg_latency = int(weighted_avg_latency_sum / total_recent_reqs) + # Return snapshot return RecentStatsSnapshot( total=total_recent_reqs, failed=sum(recent.failed for recent in recent_stats), - avg_latency=(weighted_avg_latency_sum / total_recent_reqs) - if total_recent_reqs else None, + avg_latency=avg_latency, pb_reqs=sum(recent.pb_reqs for recent in recent_stats), rest_reqs=sum(recent.rest_reqs for recent in recent_stats), by_pb_method=by_pb_method_sum, @@ -185,7 +189,7 @@ async def get_current(self): proxy_stats.HAPROXY_SERVICES_STATS_SOCKET_PATH, "TaskQueue" ) # Query all TQ servers - instances_responses = await asyncio.wait([ + instances_responses = await asyncio.gather(*[ self.fetch_stats_from_instance(ip_port) for ip_port in tq_instances ]) diff --git a/Hermes/appscale/hermes/unified_service_names.py b/Hermes/appscale/hermes/unified_service_names.py index f2efa7986f..16e3bad7ab 100644 --- a/Hermes/appscale/hermes/unified_service_names.py +++ b/Hermes/appscale/hermes/unified_service_names.py @@ -22,13 +22,13 @@ class Service(object): name = attr.ib() # monit_matcher have to contain 'app' and 'port' groups when possible - monit_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) + monit_matcher = attr.ib(default=UNMATCHABLE, converter=re.compile) # haproxy_proxy_matcher have to contain 'app' group when possible - haproxy_proxy_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) + haproxy_proxy_matcher = attr.ib(default=UNMATCHABLE, converter=re.compile) # haproxy_server_matcher have to contain 'app', 'ip' and 'port' groups when possible - haproxy_server_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) + haproxy_server_matcher = attr.ib(default=UNMATCHABLE, converter=re.compile) def recognize_monit_process(self, monit_name): """ Checks whether monit process corresponds to this service. From d8de4ef2ccdf89a3725de595e11e512d8710ba12 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 4 Apr 2019 19:14:15 +0300 Subject: [PATCH 009/221] Reworking routing preparation and handlers --- Hermes/appscale/hermes/handlers.py | 168 ++++++++++++++---------- Hermes/appscale/hermes/hermes_server.py | 23 ++-- 2 files changed, 112 insertions(+), 79 deletions(-) diff --git a/Hermes/appscale/hermes/handlers.py b/Hermes/appscale/hermes/handlers.py index 4e17211c2a..510cff2d25 100644 --- a/Hermes/appscale/hermes/handlers.py +++ b/Hermes/appscale/hermes/handlers.py @@ -15,48 +15,17 @@ logger = logging.getLogger(__name__) -DEFAULT_INCLUDE_LISTS = IncludeLists({ - # Node stats - 'node': ['utc_timestamp', 'cpu', 'memory', - 'partitions_dict', 'loadavg'], - 'node.cpu': ['percent', 'count'], - 'node.memory': ['available', 'total'], - 'node.partition': ['free', 'used'], - 'node.loadavg': ['last_5min'], - # Processes stats - 'process': ['monit_name', 'unified_service_name', 'application_id', - 'port', 'cpu', 'memory', 'children_stats_sum'], - 'process.cpu': ['user', 'system', 'percent'], - 'process.memory': ['resident', 'virtual', 'unique'], - 'process.children_stats_sum': ['cpu', 'memory'], - # Proxies stats - 'proxy': ['name', 'unified_service_name', 'application_id', - 'frontend', 'backend', 'servers_count'], - 'proxy.frontend': ['bin', 'bout', 'scur', 'smax', 'rate', - 'req_rate', 'req_tot', 'hrsp_4xx', 'hrsp_5xx'], - 'proxy.backend': ['qcur', 'scur', 'hrsp_5xx', 'qtime', 'rtime'], - # Taskqueue service stats - 'taskqueue': ['utc_timestamp', 'current_requests', 'cumulative', 'recent', - 'instances_count', 'failures'], - 'taskqueue.instance': ['start_timestamp_ms', 'current_requests', - 'cumulative', 'recent'], - 'taskqueue.cumulative': ['total', 'failed', 'pb_reqs', 'rest_reqs'], - 'taskqueue.recent': ['total', 'failed', 'avg_latency', - 'pb_reqs', 'rest_reqs'], - # RabbitMQ stats - 'rabbitmq': ['utc_timestamp', 'disk_free_alarm', 'mem_alarm', 'name', - 'partitions'], - # Push queue stats - 'queue': ['name', 'messages'], - # Cassandra stats - 'cassandra': ['utc_timestamp', 'nodes', 'missing_nodes', 'unknown_nodes'], - # Cassandra node stats - 'cassandra.node': ['address', 'status', 'state', 'load', 'owns_pct', - 'tokens_num'], -}) - - +@web.middleware async def verify_secret_middleware(request, handler): + """ Security middleware for secret verification. + + Args: + request: an instance of Request. + handler: a callable handler for further request processing. + Return: + 403 Response if secret is incorrect, + Response provided by handler otherwise. + """ if request.headers.get(SECRET_HEADER) != appscale_info.get_secret(): logger.warn("Received bad secret from {client}" .format(client=request.remote)) @@ -65,19 +34,75 @@ async def verify_secret_middleware(request, handler): return await handler(request) -class LocalStatsHandler(object): +def get_default_include_lists(): + """ Creates an instance of IncludeLists with default values. + It is not a constant because all model classes should be imported before + creating an instance of IncludeLists. + We're planning to get rid of complicated include lists logic + by splitting composite models into smaller. + """ + return IncludeLists({ + # Node stats + 'node': ['utc_timestamp', 'cpu', 'memory', + 'partitions_dict', 'loadavg'], + 'node.cpu': ['percent', 'count'], + 'node.memory': ['available', 'total'], + 'node.partition': ['free', 'used'], + 'node.loadavg': ['last_5min'], + # Processes stats + 'process': ['monit_name', 'unified_service_name', 'application_id', + 'port', 'cpu', 'memory', 'children_stats_sum'], + 'process.cpu': ['user', 'system', 'percent'], + 'process.memory': ['resident', 'virtual', 'unique'], + 'process.children_stats_sum': ['cpu', 'memory'], + # Proxies stats + 'proxy': ['name', 'unified_service_name', 'application_id', + 'frontend', 'backend', 'servers_count'], + 'proxy.frontend': ['bin', 'bout', 'scur', 'smax', 'rate', + 'req_rate', 'req_tot', 'hrsp_4xx', 'hrsp_5xx'], + 'proxy.backend': ['qcur', 'scur', 'hrsp_5xx', 'qtime', 'rtime'], + # Taskqueue service stats + 'taskqueue': ['utc_timestamp', 'current_requests', 'cumulative', 'recent', + 'instances_count', 'failures'], + 'taskqueue.instance': ['start_timestamp_ms', 'current_requests', + 'cumulative', 'recent'], + 'taskqueue.cumulative': ['total', 'failed', 'pb_reqs', 'rest_reqs'], + 'taskqueue.recent': ['total', 'failed', 'avg_latency', + 'pb_reqs', 'rest_reqs'], + # RabbitMQ stats + 'rabbitmq': ['utc_timestamp', 'disk_free_alarm', 'mem_alarm', 'name', + 'partitions'], + # Push queue stats + 'queue': ['name', 'messages'], + # Cassandra stats + 'cassandra': ['utc_timestamp', 'nodes', 'missing_nodes', 'unknown_nodes'], + # Cassandra node stats + 'cassandra.node': ['address', 'status', 'state', 'load', 'owns_pct', + 'tokens_num'], + }) + + +class LocalStatsHandler: """ Handler for getting current local stats of specific kind. """ - def __init__(self, source): + def __init__(self, stats_source): """ Initializes request handler for providing current stats. Args: - source: an object with method get_current. + stats_source: an object with method get_current. """ - self._stats_source = source - self._cached_snapshot = None + self.stats_source = stats_source + self.cached_snapshot = None + self.default_include_lists = get_default_include_lists() + + async def __call__(self, request): + """ Handles HTTP request. - async def get(self, request): + Args: + request: an instance of Request. + Returns: + An instance of Resposne. + """ if request.has_body: payload = await request.json() else: @@ -94,41 +119,50 @@ async def get(self, request): return web.Response(status=http.HTTPStatus.BAD_REQUEST, reason='Wrong include_lists', text=str(err)) else: - include_lists = DEFAULT_INCLUDE_LISTS + include_lists = self.default_include_lists snapshot = None # Try to use cached snapshot - if self._cached_snapshot: + if self.cached_snapshot: now = time.time() acceptable_time = now - max_age - if self._cached_snapshot.utc_timestamp >= acceptable_time: - snapshot = self._cached_snapshot + if self.cached_snapshot.utc_timestamp >= acceptable_time: + snapshot = self.cached_snapshot logger.info("Returning cached snapshot with age {:.2f}s" - .format(now-self._cached_snapshot.utc_timestamp)) + .format(now-self.cached_snapshot.utc_timestamp)) if not snapshot: - snapshot = self._stats_source.get_current() + snapshot = self.stats_source.get_current() if inspect.isawaitable(snapshot): snapshot = await snapshot - self._cached_snapshot = snapshot + self.cached_snapshot = snapshot return web.json_response(stats_to_dict(snapshot, include_lists)) -class ClusterStatsHandler(object): - """ Handler for getting current stats of specific kind. +class ClusterStatsHandler: + """ Handler for getting current cluster stats of specific kind. """ - def __init__(self, source): + + def __init__(self, stats_source): """ Initializes request handler for providing current stats. Args: - source: an object with method get_current. + stats_source: an object with method get_current. """ - self._cluster_stats_source = source - self._cached_snapshots = {} + self.stats_source = stats_source + self.cached_snapshots = {} + self.default_include_lists = get_default_include_lists() + + async def __call__(self, request): + """ Handles HTTP request. - async def get(self, request): + Args: + request: an instance of Request. + Returns: + An instance of Response. + """ if request.has_body: payload = await request.json() else: @@ -145,16 +179,16 @@ async def get(self, request): return web.Response(status=http.HTTPStatus.BAD_REQUEST, reason='Wrong include_lists', text=str(err)) else: - include_lists = DEFAULT_INCLUDE_LISTS + include_lists = self.default_include_lists newer_than = time.mktime(datetime.now().timetuple()) - max_age - if (not DEFAULT_INCLUDE_LISTS or - include_lists.is_subset_of(DEFAULT_INCLUDE_LISTS)): + if (not self.default_include_lists or + include_lists.is_subset_of(self.default_include_lists)): # If user didn't specify any non-default fields we can use local cache fresh_local_snapshots = { node_ip: snapshot - for node_ip, snapshot in self._cached_snapshots.items() + for node_ip, snapshot in self.cached_snapshots.items() if max_age and snapshot.utc_timestamp > newer_than } if fresh_local_snapshots: @@ -164,14 +198,14 @@ async def get(self, request): fresh_local_snapshots = {} new_snapshots_dict, failures = ( - await self._cluster_stats_source.get_current( + await self.stats_source.get_current( max_age=max_age, include_lists=include_lists, exclude_nodes=list(fresh_local_snapshots.keys()) ) ) # Put new snapshots to local cache - self._cached_snapshots.update(new_snapshots_dict) + self.cached_snapshots.update(new_snapshots_dict) # Extend fetched snapshots dict with fresh local snapshots new_snapshots_dict.update(fresh_local_snapshots) diff --git a/Hermes/appscale/hermes/hermes_server.py b/Hermes/appscale/hermes/hermes_server.py index f004dfe25d..0c3565a035 100644 --- a/Hermes/appscale/hermes/hermes_server.py +++ b/Hermes/appscale/hermes/hermes_server.py @@ -9,15 +9,13 @@ from appscale.common import appscale_info from appscale.common.constants import LOG_FORMAT -from appscale.hermes import constants, handlers - +from appscale.hermes import constants from appscale.hermes.handlers import ( - LocalStatsHandler, ClusterStatsHandler, not_found + verify_secret_middleware, LocalStatsHandler, ClusterStatsHandler, not_found ) from appscale.hermes.producers.cluster_stats import ( cluster_nodes_stats, cluster_processes_stats, cluster_proxies_stats, - cluster_rabbitmq_stats, cluster_push_queues_stats, - cluster_taskqueue_stats, + cluster_rabbitmq_stats, cluster_push_queues_stats, cluster_taskqueue_stats, cluster_cassandra_stats ) from appscale.hermes.producers.cassandra_stats import CassandraStatsSource @@ -32,8 +30,9 @@ def get_local_stats_api_routes(is_lb_node, is_tq_node, is_db_node): - """ Creates stats sources and API handlers for providing local - node, processes and proxies (only on LB nodes) stats. + """ Creates stats sources and API handlers for providing local stats. + Routes which are not applicable for node role are stubbed with + 404 handler. Args: is_lb_node: A boolean indicating whether this node is load balancer. @@ -83,9 +82,9 @@ def get_local_stats_api_routes(is_lb_node, is_tq_node, is_db_node): def get_cluster_stats_api_routes(is_lb): - """ Creates stats sources and API handlers for providing cluster - node, processes and proxies stats (on master node only). - If this node is slave, it creates stub handlers for cluster stats routes. + """ Creates stats sources and API handlers for providing cluster nodes. + If this node is not Load balancer, + it creates stub handlers for cluster stats routes. Args: is_lb: A boolean indicating whether this node is load balancer. @@ -142,13 +141,13 @@ def main(): is_tq = (my_ip in appscale_info.get_taskqueue_nodes()) is_db = (my_ip in appscale_info.get_db_ips()) - app = web.Application(middlewares=[handlers.verify_secret_middleware]) + app = web.Application(middlewares=[verify_secret_middleware]) route_items = [] route_items += get_local_stats_api_routes(is_lb, is_tq, is_db) route_items += get_cluster_stats_api_routes(is_master) for route, handler in route_items: - app.router.add_get(route, handlers) + app.router.add_get(route, handler) logger.info("Starting Hermes on port: {}.".format(args.port)) web.run_app(app, port=args.port, access_log=logger) From 480bcedb2038ea9ace086dac38f85b380e76ebaa Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 10 Apr 2019 17:33:51 +0300 Subject: [PATCH 010/221] Updated unit tests for hermes to deal with aiohttp and asyncio --- .../hermes/producers/tests/test_cassandra.py | 262 +++---- .../producers/tests/test_cluster_stats.py | 675 ++++++++++-------- .../hermes/producers/tests/test_node.py | 40 +- .../hermes/producers/tests/test_process.py | 128 ++-- .../hermes/producers/tests/test_proxy.py | 225 +++--- .../hermes/producers/tests/test_taskqueue.py | 316 ++++---- .../tests/test_unified_service_names.py | 62 +- 7 files changed, 935 insertions(+), 773 deletions(-) diff --git a/Hermes/appscale/hermes/producers/tests/test_cassandra.py b/Hermes/appscale/hermes/producers/tests/test_cassandra.py index a32f922720..5238ac37bf 100644 --- a/Hermes/appscale/hermes/producers/tests/test_cassandra.py +++ b/Hermes/appscale/hermes/producers/tests/test_cassandra.py @@ -1,16 +1,18 @@ +import asyncio + +import pytest from mock import MagicMock, patch -from tornado import gen, testing from appscale.hermes.producers import cassandra_stats -def future(value): - future = gen.Future() - future.set_result(value) - return future +def future(value=None): + future_obj = asyncio.Future() + future_obj.set_result(value) + return future_obj -MULTINODE_STATUS = """Datacenter: datacenter1 +MULTINODE_STATUS = b"""Datacenter: datacenter1 ======================= Status=Up/Down |/ State=Normal/Leaving/Joining/Moving @@ -20,7 +22,7 @@ def future(value): """ -SINGLENODE_STATUS = """Datacenter: datacenter1 +SINGLENODE_STATUS = b"""Datacenter: datacenter1 ======================= Status=Up/Down |/ State=Normal/Leaving/Joining/Moving @@ -30,117 +32,135 @@ def future(value): """ -class TestCurrentCassandraStats(testing.AsyncTestCase): - - @patch.object(cassandra_stats.process, 'Subprocess') - @patch.object(cassandra_stats.appscale_info, 'get_db_ips') - @testing.gen_test - def test_multinode(self, mock_get_db_ips, mock_subprocess): - subprocess = MagicMock() - - # Mocking `get_db_ips` and Subprocess - mock_get_db_ips.return_value = ['10.0.2.15', '10.0.2.16'] - mock_subprocess.return_value = subprocess - subprocess.stdout.read_until_close.return_value = future(MULTINODE_STATUS) - subprocess.stderr.read_until_close.return_value = future('') - - # Calling method under test - stats = yield cassandra_stats.CassandraStatsSource.get_current() - - # Asserting expectations - self.assertEqual(stats.missing_nodes, []) - self.assertEqual(stats.unknown_nodes, []) - self.assertIsInstance(stats.utc_timestamp, int) - self.assertEqual(len(stats.nodes), 2) - - first = stats.nodes[0] - self.assertEqual(first.address, '10.0.2.15') - self.assertEqual(first.status, 'Up') - self.assertEqual(first.state, 'Normal') - self.assertEqual(first.load, int(67.94 * 1024**3)) - self.assertEqual(first.owns_pct, 99.8) - self.assertEqual(first.tokens_num, 1) - self.assertEqual(first.host_id, 'a341df86-71e2-4054-83d6-c2d92dc75afc') - self.assertEqual(first.rack, 'rack1') - - second = stats.nodes[1] - self.assertEqual(second.address, '10.0.2.16') - self.assertEqual(second.status, 'Up') - self.assertEqual(second.state, 'Normal') - self.assertEqual(second.load, int(65.99 * 1024**3)) - self.assertEqual(second.owns_pct, 0.2) - self.assertEqual(second.tokens_num, 1) - self.assertEqual(second.host_id, '2ceb81a6-4c49-456d-a38b-23667ee60ff9') - self.assertEqual(second.rack, 'rack1') - - @patch.object(cassandra_stats.process, 'Subprocess') - @patch.object(cassandra_stats.appscale_info, 'get_db_ips') - @testing.gen_test - def test_singlenode(self, mock_get_db_ips, mock_subprocess): - subprocess = MagicMock() - - # Mocking `get_db_ips` and Subprocess - mock_get_db_ips.return_value = ['10.0.2.15'] - mock_subprocess.return_value = subprocess - subprocess.stdout.read_until_close.return_value = future(SINGLENODE_STATUS) - subprocess.stderr.read_until_close.return_value = future('') - - # Calling method under test - stats = yield cassandra_stats.CassandraStatsSource.get_current() - - # Asserting expectations - self.assertEqual(stats.missing_nodes, []) - self.assertEqual(stats.unknown_nodes, []) - self.assertIsInstance(stats.utc_timestamp, int) - self.assertEqual(len(stats.nodes), 1) - - first = stats.nodes[0] - self.assertEqual(first.address, '10.0.2.15') - self.assertEqual(first.status, 'Up') - self.assertEqual(first.state, 'Normal') - self.assertEqual(first.load, int(337.07 * 1024**2)) - self.assertEqual(first.owns_pct, 100.0) - self.assertEqual(first.tokens_num, 1) - self.assertEqual(first.host_id, '38fd1ac1-85f9-4b19-8f8f-19ef5a00d65d') - self.assertEqual(first.rack, 'rack1') - - @patch.object(cassandra_stats.process, 'Subprocess') - @patch.object(cassandra_stats.appscale_info, 'get_db_ips') - @testing.gen_test - def test_missing_and_unknown(self, mock_get_db_ips, mock_subprocess): - subprocess = MagicMock() - - # Mocking `get_db_ips` and Subprocess - mock_get_db_ips.return_value = ['10.0.2.15', '10.0.2.missing'] - mock_subprocess.return_value = subprocess - subprocess.stdout.read_until_close.return_value = future(MULTINODE_STATUS) - subprocess.stderr.read_until_close.return_value = future('') - - # Calling method under test - stats = yield cassandra_stats.CassandraStatsSource.get_current() - - # Asserting expectations - self.assertEqual(stats.missing_nodes, ['10.0.2.missing']) - self.assertEqual(stats.unknown_nodes, ['10.0.2.16']) - self.assertIsInstance(stats.utc_timestamp, int) - self.assertEqual(len(stats.nodes), 2) - - first = stats.nodes[0] - self.assertEqual(first.address, '10.0.2.15') - self.assertEqual(first.status, 'Up') - self.assertEqual(first.state, 'Normal') - self.assertEqual(first.load, int(67.94 * 1024**3)) - self.assertEqual(first.owns_pct, 99.8) - self.assertEqual(first.tokens_num, 1) - self.assertEqual(first.host_id, 'a341df86-71e2-4054-83d6-c2d92dc75afc') - self.assertEqual(first.rack, 'rack1') - - second = stats.nodes[1] - self.assertEqual(second.address, '10.0.2.16') - self.assertEqual(second.status, 'Up') - self.assertEqual(second.state, 'Normal') - self.assertEqual(second.load, int(65.99 * 1024**3)) - self.assertEqual(second.owns_pct, 0.2) - self.assertEqual(second.tokens_num, 1) - self.assertEqual(second.host_id, '2ceb81a6-4c49-456d-a38b-23667ee60ff9') - self.assertEqual(second.rack, 'rack1') +@pytest.mark.asyncio +async def test_multinode(): + process_mock = MagicMock(returncode=0) + stdout = MULTINODE_STATUS + stderr = b'' + process_mock.communicate.return_value = future((stdout, stderr)) + + db_ips_patcher = patch( + 'appscale.common.appscale_info.get_db_ips', + return_value=['10.0.2.15', '10.0.2.16'] + ) + subprocess_patcher = patch( + 'asyncio.create_subprocess_shell', + return_value=future(process_mock) + ) + + with db_ips_patcher: + with subprocess_patcher: + # Calling method under test + stats = await cassandra_stats.CassandraStatsSource.get_current() + + # Asserting expectations + assert stats.missing_nodes == [] + assert stats.unknown_nodes == [] + assert isinstance(stats.utc_timestamp, int) + assert len(stats.nodes) == 2 + + first = stats.nodes[0] + assert first.address == '10.0.2.15' + assert first.status == 'Up' + assert first.state == 'Normal' + assert first.load == int(67.94 * 1024**3) + assert first.owns_pct == 99.8 + assert first.tokens_num == 1 + assert first.host_id == 'a341df86-71e2-4054-83d6-c2d92dc75afc' + assert first.rack == 'rack1' + + second = stats.nodes[1] + assert second.address == '10.0.2.16' + assert second.status == 'Up' + assert second.state == 'Normal' + assert second.load == int(65.99 * 1024**3) + assert second.owns_pct == 0.2 + assert second.tokens_num == 1 + assert second.host_id == '2ceb81a6-4c49-456d-a38b-23667ee60ff9' + assert second.rack == 'rack1' + + +@pytest.mark.asyncio +async def test_singlenode(): + process_mock = MagicMock(returncode=0) + stdout = SINGLENODE_STATUS + stderr = b'' + process_mock.communicate.return_value = future((stdout, stderr)) + + db_ips_patcher = patch( + 'appscale.common.appscale_info.get_db_ips', + return_value=['10.0.2.15'] + ) + subprocess_patcher = patch( + 'asyncio.create_subprocess_shell', + return_value=future(process_mock) + ) + + with db_ips_patcher: + with subprocess_patcher: + # Calling method under test + stats = await cassandra_stats.CassandraStatsSource.get_current() + + # Asserting expectations + assert stats.missing_nodes == [] + assert stats.unknown_nodes == [] + assert isinstance(stats.utc_timestamp, int) + assert len(stats.nodes) == 1 + + first = stats.nodes[0] + assert first.address == '10.0.2.15' + assert first.status == 'Up' + assert first.state == 'Normal' + assert first.load == int(337.07 * 1024**2) + assert first.owns_pct == 100.0 + assert first.tokens_num == 1 + assert first.host_id == '38fd1ac1-85f9-4b19-8f8f-19ef5a00d65d' + assert first.rack == 'rack1' + + +@pytest.mark.asyncio +async def test_missing_and_unknown(): + process_mock = MagicMock(returncode=0) + stdout = MULTINODE_STATUS + stderr = b'' + process_mock.communicate.return_value = future((stdout, stderr)) + + db_ips_patcher = patch( + 'appscale.common.appscale_info.get_db_ips', + return_value=['10.0.2.15', '10.0.2.missing'] + ) + subprocess_patcher = patch( + 'asyncio.create_subprocess_shell', + return_value=future(process_mock) + ) + + with db_ips_patcher: + with subprocess_patcher: + # Calling method under test + stats = await cassandra_stats.CassandraStatsSource.get_current() + + # Asserting expectations + assert stats.missing_nodes == ['10.0.2.missing'] + assert stats.unknown_nodes == ['10.0.2.16'] + assert isinstance(stats.utc_timestamp, int) + assert len(stats.nodes) == 2 + + first = stats.nodes[0] + assert first.address == '10.0.2.15' + assert first.status == 'Up' + assert first.state == 'Normal' + assert first.load == int(67.94 * 1024**3) + assert first.owns_pct == 99.8 + assert first.tokens_num == 1 + assert first.host_id == 'a341df86-71e2-4054-83d6-c2d92dc75afc' + assert first.rack == 'rack1' + + second = stats.nodes[1] + assert second.address == '10.0.2.16' + assert second.status == 'Up' + assert second.state == 'Normal' + assert second.load == int(65.99 * 1024**3) + assert second.owns_pct == 0.2 + assert second.tokens_num == 1 + assert second.host_id == '2ceb81a6-4c49-456d-a38b-23667ee60ff9' + assert second.rack == 'rack1' diff --git a/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py b/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py index f31333cfd5..41c1eadbda 100644 --- a/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py +++ b/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py @@ -1,10 +1,13 @@ +import asyncio +import contextlib import json import os +import aiohttp +import pytest from mock import patch, MagicMock -from tornado import testing, gen, httpclient -from appscale.hermes import converter +from appscale.hermes import converter, constants from appscale.hermes.converter import IncludeLists from appscale.hermes.producers import ( cluster_stats, node_stats, process_stats, proxy_stats @@ -14,6 +17,20 @@ TEST_DATA_DIR = os.path.join(CUR_DIR, 'test-data') +def future(value=None): + future_obj = asyncio.Future() + future_obj.set_result(value) + return future_obj + + +class AsyncContextMock(MagicMock): + async def __aenter__(self): + return self.aenter + + async def __aexit__(self, exc_type, exc_val, exc_tb): + return None + + def get_stats_from_file(json_file_name, stats_class): with open(os.path.join(TEST_DATA_DIR, json_file_name)) as json_file: raw_dict = json.load(json_file) @@ -24,148 +41,178 @@ def get_stats_from_file(json_file_name, stats_class): return raw_dict, stats_dict -class TestClusterNodeStatsProducer(testing.AsyncTestCase): - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_nodes_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @patch.object(node_stats.NodeStatsSource, 'get_current') - @testing.gen_test - def test_verbose_cluster_node_stats(self, mock_get_current, mock_fetch, - mock_ips_getter, mock_get_private_ip, - mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10', '192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' +class TestClusterNodeStatsProducer: + + @staticmethod + @pytest.mark.asyncio + async def test_verbose_cluster_nodes_stats(): # Read test data from json file raw_test_data, stats_test_data = get_stats_from_file( 'node-stats.json', node_stats.NodeStatsSnapshot ) - # Mock local source - mock_get_current.return_value = stats_test_data['192.168.33.10'] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_nodes_stats.ips_getter', + return_value=['192.168.33.10', '192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.node_stats.NodeStatsSource.get_current', + return_value=stats_test_data['192.168.33.10'] + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test - stats, failures = yield cluster_stats.cluster_nodes_stats.get_current() + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + stack.enter_context(local_stats_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test + stats, failures = await cluster_stats.cluster_nodes_stats.get_current() # ASSERTING EXPECTATIONS - request_to_slave = mock_fetch.call_args[0][0] - self.assertEqual(json.loads(request_to_slave.body), {}) - self.assertEqual( - request_to_slave.url, 'http://192.168.33.11:4378/stats/local/node' - ) - self.assertDictContainsSubset( - request_to_slave.headers, {'Appscale-Secret': 'secret'} + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/node', + headers={'Appscale-Secret': 'secret'}, + json={}, timeout=constants.REMOTE_REQUEST_TIMEOUT ) - self.assertEqual(failures, {}) + assert failures == {} local_stats = stats['192.168.33.10'] slave_stats = stats['192.168.33.11'] - self.assertIsInstance(local_stats, node_stats.NodeStatsSnapshot) - self.assertEqual(local_stats.utc_timestamp, 1494248091.0) - self.assertIsInstance(slave_stats, node_stats.NodeStatsSnapshot) - self.assertEqual(slave_stats.utc_timestamp, 1494248082.0) - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_nodes_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @patch.object(node_stats.NodeStatsSource, 'get_current') - @testing.gen_test - def test_remote_failure(self, mock_get_current, mock_fetch, - mock_ips_getter, mock_get_private_ip, mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10', '192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert isinstance(local_stats, node_stats.NodeStatsSnapshot) + assert local_stats.utc_timestamp == 1494248091.0 + assert isinstance(slave_stats, node_stats.NodeStatsSnapshot) + assert slave_stats.utc_timestamp == 1494248082.0 + + @staticmethod + @pytest.mark.asyncio + async def test_remote_failure(): # Read test data from json file - stats_test_data = get_stats_from_file( + raw_test_data, stats_test_data = get_stats_from_file( 'node-stats.json', node_stats.NodeStatsSnapshot - )[1] - # Mock local source - mock_get_current.return_value = stats_test_data['192.168.33.10'] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - failure = httpclient.HTTPError(500, "Timeout error") - future_response = gen.Future() - future_response.set_exception(failure) - mock_fetch.return_value = future_response + ) + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_nodes_stats.ips_getter', + return_value=['192.168.33.10', '192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.node_stats.NodeStatsSource.get_current', + return_value=stats_test_data['192.168.33.10'] + ) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + side_effect=aiohttp.ClientError('HTTP 504: Gateway Timeout') + ) # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test - stats, failures = yield cluster_stats.cluster_nodes_stats.get_current() + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + stack.enter_context(local_stats_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test + stats, failures = await cluster_stats.cluster_nodes_stats.get_current() # ASSERTING EXPECTATIONS - request_to_slave = mock_fetch.call_args[0][0] - self.assertEqual(json.loads(request_to_slave.body), {}) - self.assertEqual( - request_to_slave.url, 'http://192.168.33.11:4378/stats/local/node' - ) - self.assertDictContainsSubset( - request_to_slave.headers, {'Appscale-Secret': 'secret'} + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/node', + headers={'Appscale-Secret': 'secret'}, + json={}, timeout=constants.REMOTE_REQUEST_TIMEOUT ) local_stats = stats['192.168.33.10'] - self.assertNotIn('192.168.33.11', stats) - self.assertIsInstance(local_stats, node_stats.NodeStatsSnapshot) - self.assertEqual(local_stats.utc_timestamp, 1494248091.0) - self.assertEqual(failures, {'192.168.33.11': 'HTTP 500: Timeout error'}) - - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_nodes_stats, 'ips_getter') - @patch.object(node_stats.NodeStatsSource, 'get_current') - @testing.gen_test - def test_local_failure(self, mock_get_current, mock_ips_getter, - mock_get_private_ip): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10'] - # Mock local source - mock_get_current.side_effect = ValueError("Something strange \u2234") + assert '192.168.33.11' not in stats + assert isinstance(local_stats, node_stats.NodeStatsSnapshot) + assert local_stats.utc_timestamp == 1494248091.0 + assert failures == {'192.168.33.11': 'HTTP 504: Gateway Timeout'} + + @staticmethod + @pytest.mark.asyncio + async def test_local_failure(): + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_nodes_stats.ips_getter', + return_value=['192.168.33.10'] + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.node_stats.NodeStatsSource.get_current', + side_effect=ValueError("Something strange \u2234") + ) # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test - stats, failures = yield cluster_stats.cluster_nodes_stats.get_current() + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(local_stats_patcher) + # Call method under test + stats, failures = await cluster_stats.cluster_nodes_stats.get_current() # ASSERTING EXPECTATIONS - self.assertEqual(stats, {}) - self.assertEqual(failures, {'192.168.33.10': "Something strange \u2234"}) - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_nodes_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @patch.object(node_stats.NodeStatsSource, 'get_current') - @testing.gen_test - def test_filtered_cluster_node_stats(self, mock_get_current, mock_fetch, - mock_ips_getter, mock_get_private_ip, - mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10', '192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert stats == {} + assert failures == {'192.168.33.10': "Something strange \u2234"} + + @staticmethod + @pytest.mark.asyncio + async def test_filtered_cluster_nodes_stats(): # Read test data from json file raw_test_data, stats_test_data = get_stats_from_file( 'node-stats.json', node_stats.NodeStatsSnapshot ) - # Mock local source - mock_get_current.return_value = stats_test_data['192.168.33.10'] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_nodes_stats.ips_getter', + return_value=['192.168.33.10', '192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.node_stats.NodeStatsSource.get_current', + return_value=stats_test_data['192.168.33.10'] + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) + # Prepare raw dict with include lists raw_include_lists = { 'node': ['cpu', 'memory'], @@ -174,117 +221,132 @@ def test_filtered_cluster_node_stats(self, mock_get_current, mock_fetch, } # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test to get stats with filtered set of fields - include_lists = IncludeLists(raw_include_lists) - stats, failures = yield cluster_stats.cluster_nodes_stats.get_current( - max_age=10, include_lists=include_lists - ) + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + stack.enter_context(local_stats_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test to get stats with filtered set of fields + include_lists = IncludeLists(raw_include_lists) + stats, failures = await cluster_stats.cluster_nodes_stats.get_current( + max_age=10, include_lists=include_lists + ) # ASSERTING EXPECTATIONS - request_to_slave = mock_fetch.call_args[0][0] - self.assertEqual( - json.loads(request_to_slave.body), - { + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/node', + headers={'Appscale-Secret': 'secret'}, + json={ 'max_age': 10, 'include_lists': raw_include_lists, - }) - self.assertEqual( - request_to_slave.url, 'http://192.168.33.11:4378/stats/local/node' - ) - self.assertDictContainsSubset( - request_to_slave.headers, {'Appscale-Secret': 'secret'} + }, + timeout=constants.REMOTE_REQUEST_TIMEOUT ) - self.assertEqual(failures, {}) + assert failures == {} local_stats = stats['192.168.33.10'] slave_stats = stats['192.168.33.11'] - self.assertIsInstance(local_stats, node_stats.NodeStatsSnapshot) - self.assertEqual(local_stats.utc_timestamp, 1494248091.0) - self.assertIsInstance(slave_stats, node_stats.NodeStatsSnapshot) - self.assertEqual(slave_stats.utc_timestamp, 1494248082.0) - - -class TestClusterProcessesStatsProducer(testing.AsyncTestCase): - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_processes_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @patch.object(process_stats.ProcessesStatsSource, 'get_current') - @testing.gen_test - def test_verbose_cluster_processes_stats(self, mock_get_current, mock_fetch, - mock_ips_getter, mock_get_private_ip, - mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10', '192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert isinstance(local_stats, node_stats.NodeStatsSnapshot) + assert local_stats.utc_timestamp == 1494248091.0 + assert isinstance(slave_stats, node_stats.NodeStatsSnapshot) + assert slave_stats.utc_timestamp == 1494248082.0 + + +class TestClusterProcessesStatsProducer: + + @staticmethod + @pytest.mark.asyncio + async def test_verbose_cluster_processes_stats(): # Read test data from json file raw_test_data, stats_test_data = get_stats_from_file( 'processes-stats.json', process_stats.ProcessesStatsSnapshot ) - # Mock local source - mock_get_current.return_value = stats_test_data['192.168.33.10'] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_processes_stats.ips_getter', + return_value=['192.168.33.10', '192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.process_stats.ProcessesStatsSource.get_current', + return_value=stats_test_data['192.168.33.10'] + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test to get the latest stats - stats, failures = yield cluster_stats.cluster_processes_stats.get_current() + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + stack.enter_context(local_stats_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test + stats, failures = await cluster_stats.cluster_processes_stats.get_current() # ASSERTING EXPECTATIONS - request_to_slave = mock_fetch.call_args[0][0] - self.assertEqual(json.loads(request_to_slave.body), {}) - self.assertEqual( - request_to_slave.url, - 'http://192.168.33.11:4378/stats/local/processes' + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/processes', + headers={'Appscale-Secret': 'secret'}, + json={}, timeout=constants.REMOTE_REQUEST_TIMEOUT ) - self.assertDictContainsSubset( - request_to_slave.headers, {'Appscale-Secret': 'secret'} - ) - self.assertEqual(failures, {}) + assert failures == {} local_stats = stats['192.168.33.10'] slave_stats = stats['192.168.33.11'] - self.assertIsInstance(local_stats, process_stats.ProcessesStatsSnapshot) - self.assertEqual(len(local_stats.processes_stats), 24) - self.assertEqual(local_stats.utc_timestamp, 1494248000.0) - self.assertIsInstance(slave_stats, process_stats.ProcessesStatsSnapshot) - self.assertEqual(len(slave_stats.processes_stats), 10) - self.assertEqual(slave_stats.utc_timestamp, 1494248091.0) - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_processes_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @patch.object(process_stats.ProcessesStatsSource, 'get_current') - @testing.gen_test - def test_filtered_cluster_processes_stats(self, mock_get_current, mock_fetch, - mock_ips_getter, mock_get_private_ip, - mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.10', '192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert isinstance(local_stats, process_stats.ProcessesStatsSnapshot) + assert len(local_stats.processes_stats) == 24 + assert local_stats.utc_timestamp == 1494248000.0 + assert isinstance(slave_stats, process_stats.ProcessesStatsSnapshot) + assert len(slave_stats.processes_stats) == 10 + assert slave_stats.utc_timestamp == 1494248091.0 + + @staticmethod + @pytest.mark.asyncio + async def test_filtered_cluster_processes_stats(): # Read test data from json file raw_test_data, stats_test_data = get_stats_from_file( 'processes-stats.json', process_stats.ProcessesStatsSnapshot ) - # Mock local source - mock_get_current.return_value = stats_test_data['192.168.33.10'] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response - #Prepare raw dict with include lists + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_processes_stats.ips_getter', + return_value=['192.168.33.10', '192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + local_stats_patcher = patch( + 'appscale.hermes.producers.process_stats.ProcessesStatsSource.get_current', + return_value=stats_test_data['192.168.33.10'] + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) + + # Prepare raw dict with include lists raw_include_lists = { 'process': ['monit_name', 'unified_service_name', 'application_id', 'port', 'cpu', 'memory', 'children_stats_sum'], @@ -294,138 +356,153 @@ def test_filtered_cluster_processes_stats(self, mock_get_current, mock_fetch, } # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test to get stats with filtered set of fields - include_lists = IncludeLists(raw_include_lists) - stats, failures = yield cluster_stats.cluster_processes_stats.get_current( - max_age=15, include_lists=include_lists - ) - self.assertEqual(failures, {}) + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + stack.enter_context(local_stats_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test to get stats with filtered set of fields + include_lists = IncludeLists(raw_include_lists) + stats, failures = await cluster_stats.cluster_processes_stats.get_current( + max_age=15, include_lists=include_lists + ) # ASSERTING EXPECTATIONS - request_to_slave = mock_fetch.call_args[0][0] - self.assertEqual( - json.loads(request_to_slave.body), - { + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/processes', + headers={'Appscale-Secret': 'secret'}, + json={ 'max_age': 15, 'include_lists': raw_include_lists, - }) - self.assertEqual( - request_to_slave.url, - 'http://192.168.33.11:4378/stats/local/processes' - ) - self.assertDictContainsSubset( - request_to_slave.headers, {'Appscale-Secret': 'secret'} + }, + timeout=constants.REMOTE_REQUEST_TIMEOUT ) + assert failures == {} local_stats = stats['192.168.33.10'] slave_stats = stats['192.168.33.11'] - self.assertIsInstance(local_stats, process_stats.ProcessesStatsSnapshot) - self.assertEqual(len(local_stats.processes_stats), 24) - self.assertEqual(local_stats.utc_timestamp, 1494248000.0) - self.assertIsInstance(slave_stats, process_stats.ProcessesStatsSnapshot) - self.assertEqual(len(slave_stats.processes_stats), 10) - self.assertEqual(slave_stats.utc_timestamp, 1494248091.0) - - -class TestClusterProxiesStatsProducer(testing.AsyncTestCase): - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_proxies_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @testing.gen_test - def test_verbose_cluster_proxies_stats(self, mock_fetch, mock_ips_getter, - mock_get_private_ip, mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert isinstance(local_stats, process_stats.ProcessesStatsSnapshot) + assert len(local_stats.processes_stats) == 24 + assert local_stats.utc_timestamp == 1494248000.0 + assert isinstance(slave_stats, process_stats.ProcessesStatsSnapshot) + assert len(slave_stats.processes_stats) == 10 + assert slave_stats.utc_timestamp == 1494248091.0 + + +class TestClusterProxiesStatsProducer: + + @staticmethod + @pytest.mark.asyncio + async def test_verbose_cluster_proxies_stats(): # Read test data from json file raw_test_data = get_stats_from_file( 'proxies-stats.json', proxy_stats.ProxiesStatsSnapshot )[0] - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_proxies_stats.ips_getter', + return_value=['192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test to get the latest stats - stats, failures = yield cluster_stats.cluster_proxies_stats.get_current() + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test + stats, failures = await cluster_stats.cluster_proxies_stats.get_current() # ASSERTING EXPECTATIONS - request_to_lb = mock_fetch.call_args[0][0] - self.assertEqual(json.loads(request_to_lb.body), {}) - self.assertEqual( - request_to_lb.url, 'http://192.168.33.11:4378/stats/local/proxies' - ) - self.assertDictContainsSubset( - request_to_lb.headers, {'Appscale-Secret': 'secret'} + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/proxies', + headers={'Appscale-Secret': 'secret'}, + json={}, timeout=constants.REMOTE_REQUEST_TIMEOUT ) - self.assertEqual(failures, {}) + assert failures == {} lb_stats = stats['192.168.33.11'] - self.assertIsInstance(lb_stats, proxy_stats.ProxiesStatsSnapshot) - self.assertEqual(len(lb_stats.proxies_stats), 5) - self.assertEqual(lb_stats.utc_timestamp, 1494248097.0) - - @patch.object(cluster_stats, 'options') - @patch.object(cluster_stats.appscale_info, 'get_private_ip') - @patch.object(cluster_stats.cluster_proxies_stats, 'ips_getter') - @patch.object(cluster_stats.httpclient.AsyncHTTPClient, 'fetch') - @testing.gen_test - def test_filtered_cluster_proxies_stats(self, mock_fetch, mock_ips_getter, - mock_get_private_ip, mock_options): - # Mock appscale_info functions for getting IPs - mock_get_private_ip.return_value = '192.168.33.10' - mock_ips_getter.return_value = ['192.168.33.11'] - # Mock secret - mock_options.secret = 'secret' + assert isinstance(lb_stats, proxy_stats.ProxiesStatsSnapshot) + assert len(lb_stats.proxies_stats) == 5 + assert lb_stats.utc_timestamp == 1494248097.0 + + @staticmethod + @pytest.mark.asyncio + async def test_filtered_cluster_proxies_stats(): # Read test data from json file raw_test_data = get_stats_from_file( 'proxies-stats.json', proxy_stats.ProxiesStatsSnapshot )[0] - #Prepare raw dict with include lists + + private_ip_patcher = patch( + 'appscale.common.appscale_info.get_private_ip', + return_value='192.168.33.10' + ) + ips_getter_patcher = patch( + 'appscale.hermes.producers.cluster_stats.cluster_proxies_stats.ips_getter', + return_value=['192.168.33.11'] + ) + secret_patcher = patch( + 'appscale.common.appscale_info.get_secret', + return_value='secret' + ) + json_method = MagicMock(return_value=future(raw_test_data['192.168.33.11'])) + response = MagicMock(json=json_method, status=200) + get_remote_patcher = patch( + 'aiohttp.ClientSession.get', + return_value=AsyncContextMock(aenter=response) + ) + # Prepare raw dict with include lists raw_include_lists = { 'proxy': ['name', 'unified_service_name', 'application_id', 'frontend', 'backend'], 'proxy.frontend': ['scur', 'smax', 'rate', 'req_rate', 'req_tot'], 'proxy.backend': ['qcur', 'scur', 'hrsp_5xx', 'qtime', 'rtime'], } - # Mock AsyncHTTPClient.fetch using raw stats dictionaries from test data - response = MagicMock(body=json.dumps(raw_test_data['192.168.33.11']), - code=200, reason='OK') - future_response = gen.Future() - future_response.set_result(response) - mock_fetch.return_value = future_response # ^^^ ALL INPUTS ARE SPECIFIED (or mocked) ^^^ - # Call method under test to get stats with filtered set of fields - include_lists = IncludeLists(raw_include_lists) - stats, failures = yield cluster_stats.cluster_proxies_stats.get_current( - max_age=18, include_lists=include_lists - ) + with contextlib.ExitStack() as stack: + # Start patchers + stack.enter_context(private_ip_patcher) + stack.enter_context(ips_getter_patcher) + stack.enter_context(secret_patcher) + session_get_mock = stack.enter_context(get_remote_patcher) + # Call method under test to get stats with filtered set of fields + include_lists = IncludeLists(raw_include_lists) + stats, failures = await cluster_stats.cluster_proxies_stats.get_current( + max_age=18, include_lists=include_lists + ) # ASSERTING EXPECTATIONS - request_to_lb = mock_fetch.call_args[0][0] - self.assertEqual( - json.loads(request_to_lb.body), - { + session_get_mock.assert_called_once_with( + 'http://192.168.33.11:4378/stats/local/proxies', + headers={'Appscale-Secret': 'secret'}, + json={ 'max_age': 18, 'include_lists': raw_include_lists, - }) - self.assertEqual( - request_to_lb.url, 'http://192.168.33.11:4378/stats/local/proxies' - ) - self.assertDictContainsSubset( - request_to_lb.headers, {'Appscale-Secret': 'secret'} + }, + timeout=constants.REMOTE_REQUEST_TIMEOUT ) - self.assertEqual(failures, {}) + assert failures == {} lb_stats = stats['192.168.33.11'] - self.assertIsInstance(lb_stats, proxy_stats.ProxiesStatsSnapshot) - self.assertEqual(len(lb_stats.proxies_stats), 5) - self.assertEqual(lb_stats.utc_timestamp, 1494248097.0) + assert isinstance(lb_stats, proxy_stats.ProxiesStatsSnapshot) + assert len(lb_stats.proxies_stats) == 5 + assert lb_stats.utc_timestamp == 1494248097.0 diff --git a/Hermes/appscale/hermes/producers/tests/test_node.py b/Hermes/appscale/hermes/producers/tests/test_node.py index 7042ab4d37..d97e0a0649 100644 --- a/Hermes/appscale/hermes/producers/tests/test_node.py +++ b/Hermes/appscale/hermes/producers/tests/test_node.py @@ -1,29 +1,25 @@ -import unittest - from mock import patch from appscale.hermes.producers import node_stats -class TestCurrentNodeStats(unittest.TestCase): - - @patch.object(node_stats.appscale_info, 'get_private_ip') - def test_node_stats(self, mock_get_private_ip): - # Mocking `get_private_ip` - mock_get_private_ip.return_value = '10.10.11.12' +@patch('appscale.common.appscale_info.get_private_ip') +def test_node_stats(mock_get_private_ip): + # Mocking `get_private_ip` + mock_get_private_ip.return_value = '10.10.11.12' - # Calling method under test - stats = node_stats.NodeStatsSource.get_current() + # Calling method under test + stats = node_stats.NodeStatsSource.get_current() - # Asserting expectations - self.assertIsInstance(stats, node_stats.NodeStatsSnapshot) - self.assertIsInstance(stats.utc_timestamp, float) - self.assertEqual(stats.private_ip, '10.10.11.12') - self.assertIsInstance(stats.cpu, node_stats.NodeCPU) - self.assertIsInstance(stats.memory, node_stats.NodeMemory) - self.assertIsInstance(stats.swap, node_stats.NodeSwap) - self.assertIsInstance(stats.disk_io, node_stats.NodeDiskIO) - self.assertIsInstance(stats.partitions_dict, dict) - self.assertIsInstance(stats.partitions_dict['/'], node_stats.NodePartition) - self.assertIsInstance(stats.network, node_stats.NodeNetwork) - self.assertIsInstance(stats.loadavg, node_stats.NodeLoadAvg) + # Asserting expectations + assert isinstance(stats, node_stats.NodeStatsSnapshot) + assert isinstance(stats.utc_timestamp, float) + assert stats.private_ip == '10.10.11.12' + assert isinstance(stats.cpu, node_stats.NodeCPU) + assert isinstance(stats.memory, node_stats.NodeMemory) + assert isinstance(stats.swap, node_stats.NodeSwap) + assert isinstance(stats.disk_io, node_stats.NodeDiskIO) + assert isinstance(stats.partitions_dict, dict) + assert isinstance(stats.partitions_dict['/'], node_stats.NodePartition) + assert isinstance(stats.network, node_stats.NodeNetwork) + assert isinstance(stats.loadavg, node_stats.NodeLoadAvg) diff --git a/Hermes/appscale/hermes/producers/tests/test_process.py b/Hermes/appscale/hermes/producers/tests/test_process.py index e9e57bd930..b618eba8ec 100644 --- a/Hermes/appscale/hermes/producers/tests/test_process.py +++ b/Hermes/appscale/hermes/producers/tests/test_process.py @@ -1,12 +1,11 @@ import os -import unittest from mock import patch, call from appscale.hermes.unified_service_names import ServicesEnum from appscale.hermes.producers import process_stats -MONIT_STATUS = """ +MONIT_STATUS = b""" The Monit daemon 5.6 uptime: 20h 22m Process 'haproxy' @@ -61,65 +60,66 @@ """ -class TestCurrentProcessesStats(unittest.TestCase): - - @patch.object(process_stats.appscale_info, 'get_private_ip') - @patch.object(process_stats, '_process_stats') - @patch.object(process_stats.subprocess, 'check_output') - def test_reading_monit_status(self, mock_check_output, mock_process_stats, - mock_get_private_ip): - # Mocking `monit status` output and appscale_info.get_private_ip - mock_check_output.return_value = MONIT_STATUS - mock_get_private_ip.return_value = '1.1.1.1' - - # Calling method under test - snapshot = process_stats.ProcessesStatsSource.get_current() - - # Checking expectations - mock_process_stats.assert_has_calls([ - call(8466, ServicesEnum.HAPROXY, 'haproxy', '1.1.1.1'), - call(5045, ServicesEnum.APPLICATION, 'app___my-25app-20003', '1.1.1.1') - ]) - self.assertIsInstance(snapshot, process_stats.ProcessesStatsSnapshot) - - @patch.object(process_stats.appscale_info, 'get_private_ip') - @patch.object(process_stats.subprocess, 'check_output') - @patch.object(process_stats.logger, 'warn') - def test_process_stats(self, mock_logging_warn, mock_check_output, - mock_get_private_ip): - # Mocking `monit status` output and appscale_info.get_private_ip - mock_check_output.return_value = ( - "Process 'app___fakeapp-testprocess-321'\n" - " pid {mypid}\n" - "Process 'proc-with-invalid-PID'\n" - " pid 70000\n".format(mypid=os.getpid()) - ) - mock_get_private_ip.return_value = '10.10.11.12' - - # Calling method under test - stats_snapshot = process_stats.ProcessesStatsSource.get_current() - - # Verifying outcomes - self.assertIsInstance(stats_snapshot.utc_timestamp, float) - processes_stats = stats_snapshot.processes_stats - mock_logging_warn.assert_called_once_with( - "Unable to get process stats for proc-with-invalid-PID " - "(psutil.NoSuchProcess no process found with pid 70000)" - ) - self.assertEqual(len(processes_stats), 1) - stats = processes_stats[0] - self.assertIsInstance(stats, process_stats.ProcessStats) - self.assertEqual(stats.pid, os.getpid()) - self.assertEqual(stats.monit_name, 'app___fakeapp-testprocess-321') - self.assertEqual(stats.unified_service_name, 'application') - self.assertEqual(stats.application_id, 'fakeapp-testprocess') - self.assertEqual(stats.private_ip, '10.10.11.12') - self.assertEqual(stats.port, 321) - self.assertIsInstance(stats.cmdline, list) - self.assertIsInstance(stats.cpu, process_stats.ProcessCPU) - self.assertIsInstance(stats.memory, process_stats.ProcessMemory) - self.assertIsInstance(stats.disk_io, process_stats.ProcessDiskIO) - self.assertIsInstance(stats.network, process_stats.ProcessNetwork) - self.assertIsInstance(stats.threads_num, int) - self.assertIsInstance(stats.children_stats_sum, process_stats.ProcessChildrenSum) - self.assertIsInstance(stats.children_num, int) +@patch('appscale.common.appscale_info.get_private_ip') +@patch('appscale.hermes.producers.process_stats._process_stats') +@patch('subprocess.check_output') +def test_reading_monit_status(mock_check_output, mock_process_stats, + mock_get_private_ip): + # Mocking `monit status` output and appscale_info.get_private_ip + mock_check_output.return_value = MONIT_STATUS + mock_get_private_ip.return_value = '1.1.1.1' + + # Calling method under test + snapshot = process_stats.ProcessesStatsSource.get_current() + + # Checking expectations + mock_process_stats.assert_has_calls([ + call(8466, ServicesEnum.HAPROXY, 'haproxy', '1.1.1.1'), + call(5045, ServicesEnum.APPLICATION, 'app___my-25app-20003', '1.1.1.1') + ]) + assert isinstance(snapshot, process_stats.ProcessesStatsSnapshot) + + +@patch('appscale.admin.service_manager.ServiceManager.get_state') +@patch('appscale.common.appscale_info.get_private_ip') +@patch('subprocess.check_output') +@patch('appscale.hermes.producers.process_stats.logger.warning') +def test_process_stats(mock_logging_warn, mock_check_output, + mock_get_private_ip, mock_get_state): + # Mocking `monit status` output and appscale_info.get_private_ip + mock_check_output.return_value = ( + "Process 'app___fakeapp-testprocess-321'\n" + " pid {mypid}\n" + "Process 'proc-with-invalid-PID'\n" + " pid 70000\n".format(mypid=os.getpid()).encode() + ) + mock_get_private_ip.return_value = '10.10.11.12' + mock_get_state.return_value = [] + + # Calling method under test + stats_snapshot = process_stats.ProcessesStatsSource.get_current() + + # Verifying outcomes + assert isinstance(stats_snapshot.utc_timestamp, float) + processes_stats = stats_snapshot.processes_stats + mock_logging_warn.assert_called_once_with( + "Unable to get process stats for proc-with-invalid-PID " + "(psutil.NoSuchProcess no process found with pid 70000)" + ) + assert len(processes_stats) == 1 + stats = processes_stats[0] + assert isinstance(stats, process_stats.ProcessStats) + assert stats.pid == os.getpid() + assert stats.monit_name == 'app___fakeapp-testprocess-321' + assert stats.unified_service_name == 'application' + assert stats.application_id == 'fakeapp-testprocess' + assert stats.private_ip == '10.10.11.12' + assert stats.port == 321 + assert isinstance(stats.cmdline, list) + assert isinstance(stats.cpu, process_stats.ProcessCPU) + assert isinstance(stats.memory, process_stats.ProcessMemory) + assert isinstance(stats.disk_io, process_stats.ProcessDiskIO) + assert isinstance(stats.network, process_stats.ProcessNetwork) + assert isinstance(stats.threads_num, int) + assert isinstance(stats.children_stats_sum, process_stats.ProcessChildrenSum) + assert isinstance(stats.children_num, int) diff --git a/Hermes/appscale/hermes/producers/tests/test_proxy.py b/Hermes/appscale/hermes/producers/tests/test_proxy.py index 466d075d1f..0b3ca87142 100644 --- a/Hermes/appscale/hermes/producers/tests/test_proxy.py +++ b/Hermes/appscale/hermes/producers/tests/test_proxy.py @@ -1,8 +1,9 @@ +import asyncio import os from os import path -import unittest import attr +import pytest from mock import patch, MagicMock from appscale.hermes.constants import MISSED @@ -12,179 +13,223 @@ TEST_DATA_DIR = os.path.join(CUR_DIR, 'test-data') -class TestCurrentProxiesStats(unittest.TestCase): - - def setUp(self): - self.stats_file = None - - def tearDown(self): - if self.stats_file: - self.stats_file.close() - - @patch.object(proxy_stats.socket, 'socket') - def test_haproxy_stats_v1_5(self, mock_socket): - # Mocking haproxy stats socket with csv file - self.stats_file = open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.5.csv')) - fake_socket = MagicMock(recv=self.stats_file.read) - mock_socket.return_value = fake_socket +def future(value=None): + future_obj = asyncio.Future() + future_obj.set_result(value) + return future_obj + + +class TestCurrentProxiesStats: + + @staticmethod + @pytest.mark.asyncio + async def test_haproxy_stats_v1_5(): + with open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.5.csv')) as stats_file: + stats_bytes = stats_file.read().encode() + # Mocking haproxy stats socket with csv content + fake_reader = MagicMock(read=MagicMock( + side_effect=[ + future(stats_bytes), # First call + future(b'') # Second call + ] + )) + fake_writer = MagicMock(write=MagicMock(return_value=None)) + socket_patcher = patch( + 'asyncio.open_unix_connection', + return_value=future((fake_reader, fake_writer)) + ) - # Running method under test - stats_snapshot = proxy_stats.ProxiesStatsSource.get_current() + with socket_patcher: + # Running method under test + stats_snapshot = await proxy_stats.ProxiesStatsSource.get_current() # Verifying outcomes - self.assertIsInstance(stats_snapshot.utc_timestamp, float) + assert isinstance(stats_snapshot.utc_timestamp, float) proxies_stats = stats_snapshot.proxies_stats - self.assertEqual(len(proxies_stats), 5) + assert len(proxies_stats) == 5 proxies_stats_dict = { - proxy_stats.name: proxy_stats for proxy_stats in proxies_stats + px_stats.name: px_stats for px_stats in proxies_stats } - self.assertEqual(set(proxies_stats_dict), { + assert set(proxies_stats_dict) == { 'TaskQueue', 'UserAppServer', 'appscale-datastore_server', 'as_blob_server', 'gae_appscaledashboard' - }) + } # There are 5 proxies, let's choose one for deeper verification dashboard = proxies_stats_dict['gae_appscaledashboard'] - self.assertEqual(dashboard.name, 'gae_appscaledashboard') - self.assertEqual(dashboard.unified_service_name, 'application') - self.assertEqual(dashboard.application_id, 'appscaledashboard') + assert dashboard.name == 'gae_appscaledashboard' + assert dashboard.unified_service_name == 'application' + assert dashboard.application_id == 'appscaledashboard' # Frontend stats shouldn't have Nones frontend = dashboard.frontend for field in list(attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys()): - self.assertIsNotNone(getattr(frontend, field)) + assert getattr(frontend, field) is not None # Backend stats shouldn't have Nones backend = dashboard.backend for field in list(attr.fields_dict(proxy_stats.HAProxyBackendStats).keys()): - self.assertIsNotNone(getattr(backend, field)) + assert getattr(backend, field) is not None # Backend stats can have Nones only in some fields servers = dashboard.servers - self.assertIsInstance(servers, list) - self.assertEqual(len(servers), 3) + assert isinstance(servers, list) + assert len(servers) == 3 for server in servers: for field in list(attr.fields_dict(proxy_stats.HAProxyServerStats).keys()): if field in {'qlimit', 'throttle', 'tracked', 'check_code', 'last_chk', 'last_agt'}: continue - self.assertIsNotNone(getattr(server, field)) + assert getattr(server, field) is not None # We don't have listeners on stats - self.assertEqual(dashboard.listeners, []) + assert dashboard.listeners == [] + + @staticmethod + @pytest.mark.asyncio + async def test_haproxy_stats_v1_4(): + with open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.4.csv')) as stats_file: + stats_bytes = stats_file.read().encode() + # Mocking haproxy stats socket with csv content + fake_reader = MagicMock(read=MagicMock( + side_effect=[ + future(stats_bytes), # First call + future(b'') # Second call + ] + )) + fake_writer = MagicMock(write=MagicMock(return_value=None)) + socket_patcher = patch( + 'asyncio.open_unix_connection', + return_value=future((fake_reader, fake_writer)) + ) - @patch.object(proxy_stats.socket, 'socket') - @patch.object(proxy_stats.logger, 'warn') - def test_haproxy_stats_v1_4(self, mock_logging_warn, mock_socket): - # Mocking "echo 'show stat' | socat stdio unix-connect:{}" with csv file - self.stats_file = open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.4.csv')) - fake_socket = MagicMock(recv=self.stats_file.read) - mock_socket.return_value = fake_socket + # Mock logger warning method + warning_patcher = patch( + 'appscale.hermes.producers.proxy_stats.logger.warning' + ) - # Running method under test - stats_snapshot = proxy_stats.ProxiesStatsSource.get_current() + with socket_patcher: + with warning_patcher as mock_logging_warn: + # Running method under test + proxy_stats.ProxiesStatsSource.first_run = True + stats_snapshot = await proxy_stats.ProxiesStatsSource.get_current() # Verifying outcomes - self.assertIsInstance(stats_snapshot.utc_timestamp, float) + assert isinstance(stats_snapshot.utc_timestamp, float) proxies_stats = stats_snapshot.proxies_stats - mock_logging_warn.assert_called_once_with( - "HAProxy stats fields ['rtime', 'ctime', 'comp_in', 'qtime', 'comp_byp', " - "'lastsess', 'comp_rsp', 'last_chk', 'ttime', 'comp_out', 'last_agt'] " - "are missed. Old version of HAProxy is probably used (v1.5+ is expected)" + assert ( + 'Old version of HAProxy is probably used (v1.5+ is expected)' in + mock_logging_warn.call_args[0][0] ) - self.assertEqual(len(proxies_stats), 5) + assert len(proxies_stats) == 5 proxies_stats_dict = { - proxy_stats.name: proxy_stats for proxy_stats in proxies_stats + px_stats.name: px_stats for px_stats in proxies_stats } - self.assertEqual(set(proxies_stats_dict), { + assert set(proxies_stats_dict) == { 'TaskQueue', 'UserAppServer', 'appscale-datastore_server', 'as_blob_server', 'gae_appscaledashboard' - }) + } # There are 5 proxies, let's choose one for deeper verification dashboard = proxies_stats_dict['gae_appscaledashboard'] - self.assertEqual(dashboard.name, 'gae_appscaledashboard') - self.assertEqual(dashboard.unified_service_name, 'application') - self.assertEqual(dashboard.application_id, 'appscaledashboard') + assert dashboard.name == 'gae_appscaledashboard' + assert dashboard.unified_service_name == 'application' + assert dashboard.application_id == 'appscaledashboard' # Frontend stats shouldn't have Nones frontend = dashboard.frontend for field in list(attr.fields_dict(proxy_stats.HAProxyFrontendStats).keys()): - self.assertIsNotNone(getattr(frontend, field)) + assert getattr(frontend, field) is not None # New columns should be highlighted for new_in_v1_5 in ('comp_byp', 'comp_rsp', 'comp_out', 'comp_in'): - self.assertIs(getattr(frontend, new_in_v1_5), MISSED) + assert getattr(frontend, new_in_v1_5) is MISSED # Backend stats shouldn't have Nones backend = dashboard.backend for field in list(attr.fields_dict(proxy_stats.HAProxyBackendStats).keys()): - self.assertIsNotNone(getattr(backend, field)) + assert getattr(backend, field) is not None # New columns should be highlighted for new_in_v1_5 in ('comp_byp', 'lastsess', 'comp_rsp', 'comp_out', 'comp_in', 'ttime', 'rtime', 'ctime', 'qtime'): - self.assertIs(getattr(backend, new_in_v1_5), MISSED) + assert getattr(backend, new_in_v1_5) is MISSED # Backend stats can have Nones only in some fields servers = dashboard.servers - self.assertIsInstance(servers, list) - self.assertEqual(len(servers), 3) + assert isinstance(servers, list) + assert len(servers) == 3 for server in servers: for field in list(attr.fields_dict(proxy_stats.HAProxyServerStats).keys()): if field in {'qlimit', 'throttle', 'tracked', 'check_code', 'last_chk', 'last_agt'}: continue - self.assertIsNotNone(getattr(server, field)) + assert getattr(server, field) is not None # New columns should be highlighted for new_in_v1_5 in ('lastsess', 'last_chk', 'ttime', 'last_agt', 'rtime', 'ctime', 'qtime'): - self.assertIs(getattr(server, new_in_v1_5), MISSED) + assert getattr(server, new_in_v1_5) is MISSED # We don't have listeners on stats - self.assertEqual(dashboard.listeners, []) - - -class TestGetServiceInstances(unittest.TestCase): - def setUp(self): - stats_file = open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.5.csv')) - fake_socket = MagicMock(recv=stats_file.read) - self.socket_patcher = patch.object(proxy_stats.socket, 'socket') - socket_mock = self.socket_patcher.start() - socket_mock.return_value = fake_socket - - def tearDown(self): - self.socket_patcher.stop() - - def test_taskqueue_instances(self): - taskqueue = proxy_stats.get_service_instances('mocked', 'TaskQueue') - self.assertEqual(taskqueue, [ + assert dashboard.listeners == [] + + +class TestGetServiceInstances: + @staticmethod + @pytest.fixture(autouse=True) + def haproxy_stats_v1_5(): + with open(path.join(TEST_DATA_DIR, 'haproxy-stats-v1.5.csv')) as stats_file: + stats_bytes = stats_file.read().encode() + # Mocking haproxy stats socket with csv content + fake_reader = MagicMock(read=MagicMock(return_value=future(stats_bytes))) + fake_writer = MagicMock(write=MagicMock(return_value=None)) + socket_patcher = patch( + 'asyncio.open_unix_connection', + return_value=future((fake_reader, fake_writer)) + ) + socket_patcher.start() + yield socket_patcher + socket_patcher.stop() + + @staticmethod + @pytest.mark.asyncio + async def test_taskqueue_instances(): + taskqueue = await proxy_stats.get_service_instances('mocked', 'TaskQueue') + assert taskqueue == [ '10.10.7.86:17447', '10.10.7.86:17448', '10.10.7.86:17449', '10.10.7.86:17450' - ]) + ] - def test_datastore_instances(self): - datastore = proxy_stats.get_service_instances( + @staticmethod + @pytest.mark.asyncio + async def test_datastore_instances(): + datastore = await proxy_stats.get_service_instances( 'mocked', 'appscale-datastore_server' ) - self.assertEqual(datastore, [ + assert datastore == [ '10.10.7.86:4000', '10.10.7.86:4001', '10.10.7.86:4002', '10.10.7.86:4003' - ]) + ] - def test_dashboard_instances(self): - dashboard = proxy_stats.get_service_instances( + @staticmethod + @pytest.mark.asyncio + async def test_dashboard_instances(): + dashboard = await proxy_stats.get_service_instances( 'mocked', 'gae_appscaledashboard' ) - self.assertEqual(dashboard, [ + assert dashboard == [ '10.10.9.111:20000', '10.10.9.111:20001', '10.10.9.111:20002' - ]) + ] - def test_unknown_proxy(self): - unknown = proxy_stats.get_service_instances('mocked', 'gae_not_running') - self.assertEqual(unknown, []) + @staticmethod + @pytest.mark.asyncio + async def test_unknown_proxy(): + unknown = await proxy_stats.get_service_instances( + 'mocked', 'gae_not_running' + ) + assert unknown == [] diff --git a/Hermes/appscale/hermes/producers/tests/test_taskqueue.py b/Hermes/appscale/hermes/producers/tests/test_taskqueue.py index 999098e52f..d851a67382 100644 --- a/Hermes/appscale/hermes/producers/tests/test_taskqueue.py +++ b/Hermes/appscale/hermes/producers/tests/test_taskqueue.py @@ -1,154 +1,180 @@ +import asyncio import json import os -import socket -from mock import patch, mock -from tornado import testing, gen, httpclient +import aiohttp +import pytest +from mock import patch, MagicMock -from appscale.hermes.producers import taskqueue_stats, proxy_stats +from appscale.hermes.producers import taskqueue_stats CUR_DIR = os.path.dirname(os.path.realpath(__file__)) TEST_DATA_DIR = os.path.join(CUR_DIR, 'test-data') -class TestTaskqueueStatsSource(testing.AsyncTestCase): - - @patch.object(proxy_stats, 'get_service_instances') - @patch.object(taskqueue_stats.httpclient.AsyncHTTPClient, 'fetch') - @testing.gen_test - def test_taskqueue_stats(self, mock_fetch, mock_get_instances): - # Read test data from json file - test_data_path = os.path.join(TEST_DATA_DIR, 'taskqueue-stats.json') - with open(test_data_path) as json_file: - tq_stats = json.load(json_file) - - # Tell that we have 2 taskqueue servers - tq_responses = { - '10.10.7.86:17447': mock.MagicMock( - code=200, reason='OK', body=json.dumps(tq_stats['10.10.7.86:17447']) - ), - '10.10.7.86:17448': mock.MagicMock( - code=200, reason='OK', body=json.dumps(tq_stats['10.10.7.86:17448']) - ), - '10.10.7.86:17449': httpclient.HTTPError( - 504, - "Gateway Timeout", - mock.MagicMock(code=504, reason='Gateway Timeout', body=None) - ), - '10.10.7.86:17450': socket.error("Connection refused") - } - mock_get_instances.return_value = list(tq_responses.keys()) - - # Mock taskqueue service stats API - def fetch(request, **kwargs): - ip_port = request.url.split('://')[1].split('/')[0] - result = tq_responses[ip_port] - future_response = gen.Future() - if isinstance(result, Exception): - future_response.set_exception(result) - else: - future_response.set_result(result) - return future_response - - mock_fetch.side_effect = fetch - - # Environment is mocked, so we can do a test. - # Call method under test - stats_source = taskqueue_stats.TaskqueueStatsSource() - stats_snapshot = yield stats_source.get_current() - - self.assertIsInstance(stats_snapshot.utc_timestamp, int) - self.assertEqual(stats_snapshot.current_requests, 4) - - # Check summarised cumulative stats - self.assertEqual(stats_snapshot.cumulative.total, 40) - self.assertEqual(stats_snapshot.cumulative.failed, 9) - self.assertEqual(stats_snapshot.cumulative.pb_reqs, 22) - self.assertEqual(stats_snapshot.cumulative.rest_reqs, 18) - - # Check summarised recent stats - self.assertEqual(stats_snapshot.recent.total, 33) - self.assertEqual(stats_snapshot.recent.failed, 8) - self.assertEqual(stats_snapshot.recent.avg_latency, 83) - self.assertEqual(stats_snapshot.recent.pb_reqs, 19) - self.assertEqual(stats_snapshot.recent.rest_reqs, 14) - self.assertEqual(stats_snapshot.recent.by_pb_method, { - "BulkAdd": 4, "PauseQueue": 4, "FetchTask": 11 - }) - self.assertEqual(stats_snapshot.recent.by_rest_method, { - "get_tasks": 2, "post_tasks": 6, "patch_task": 6 - }) - self.assertEqual(stats_snapshot.recent.by_pb_status, { - "OK": 16, "UNKNOWN_TASK": 2, "UNKNOWN_QUEUE": 1 - }) - self.assertEqual(stats_snapshot.recent.by_rest_status, { - "200": 9, "404": 3, "500": 2 - }) - - # Check instances - self.assertEqual(len(stats_snapshot.instances), 2) - tq_17447 = next(instance for instance in stats_snapshot.instances - if instance.ip_port == '10.10.7.86:17447') - tq_17448 = next(instance for instance in stats_snapshot.instances - if instance.ip_port == '10.10.7.86:17448') - - # TaskQueue on port 17447 - self.assertEqual(tq_17447.start_timestamp_ms, 1494240000000) - self.assertEqual(tq_17447.current_requests, 3) - self.assertEqual(tq_17447.cumulative.total, 15) - self.assertEqual(tq_17447.cumulative.failed, 5) - self.assertEqual(tq_17447.cumulative.pb_reqs, 6) - self.assertEqual(tq_17447.cumulative.rest_reqs, 9) - self.assertEqual(tq_17447.recent.total, 13) - self.assertEqual(tq_17447.recent.failed, 5) - self.assertEqual(tq_17447.recent.avg_latency, 64) - self.assertEqual(tq_17447.recent.pb_reqs, 6) - self.assertEqual(tq_17447.recent.rest_reqs, 7) - self.assertEqual(tq_17447.recent.by_pb_method, { - "BulkAdd": 4, "PauseQueue": 2 - }) - self.assertEqual(tq_17447.recent.by_rest_method, { - "get_tasks": 2, "post_tasks": 5 - }) - self.assertEqual(tq_17447.recent.by_pb_status, { - "OK": 4, "UNKNOWN_TASK": 2 - }) - self.assertEqual(tq_17447.recent.by_rest_status, { - "200": 4, "404": 3 - }) - - # TaskQueue on port 17448 - self.assertEqual(tq_17448.start_timestamp_ms, 1494240000250) - self.assertEqual(tq_17448.current_requests, 1) - self.assertEqual(tq_17448.cumulative.total, 25) - self.assertEqual(tq_17448.cumulative.failed, 4) - self.assertEqual(tq_17448.cumulative.pb_reqs, 16) - self.assertEqual(tq_17448.cumulative.rest_reqs, 9) - self.assertEqual(tq_17448.recent.total, 20) - self.assertEqual(tq_17448.recent.failed, 3) - self.assertEqual(tq_17448.recent.avg_latency, 96) - self.assertEqual(tq_17448.recent.pb_reqs, 13) - self.assertEqual(tq_17448.recent.rest_reqs, 7) - self.assertEqual(tq_17448.recent.by_pb_method, { - "PauseQueue": 2, "FetchTask": 11 - }) - self.assertEqual(tq_17448.recent.by_rest_method, { - "post_tasks": 1, "patch_task": 6 - }) - self.assertEqual(tq_17448.recent.by_pb_status, { - "OK": 12, "UNKNOWN_QUEUE": 1 - }) - self.assertEqual(tq_17448.recent.by_rest_status, { - "200": 5, "500": 2 - }) - - self.assertEqual(stats_snapshot.instances_count, 2) - - # Check Failures - self.assertEqual(len(stats_snapshot.failures), 2) - tq_17449 = next(instance for instance in stats_snapshot.failures - if instance.ip_port == '10.10.7.86:17449') - tq_17450 = next(instance for instance in stats_snapshot.failures - if instance.ip_port == '10.10.7.86:17450') - self.assertEqual(tq_17449.error, 'HTTP 504: Gateway Timeout') - self.assertEqual(tq_17450.error, 'Connection refused') +def future(value=None): + future_obj = asyncio.Future() + future_obj.set_result(value) + return future_obj + + +class AsyncContextMock(MagicMock): + async def __aenter__(self): + return self.aenter + + async def __aexit__(self, exc_type, exc_val, exc_tb): + return None + + +@pytest.mark.asyncio +async def test_taskqueue_stats(): + # Read test data from json file + test_data_path = os.path.join(TEST_DATA_DIR, 'taskqueue-stats.json') + with open(test_data_path) as json_file: + tq_stats = json.load(json_file) + + def fake_get(url, *args, **kwargs): + ip_port = url.split('://')[1].split('/')[0] + if ip_port == '10.10.7.86:17447': + json_method = MagicMock(return_value=future(tq_stats['10.10.7.86:17447'])) + response = MagicMock(json=json_method) + return AsyncContextMock(aenter=response) + if ip_port == '10.10.7.86:17448': + json_method = MagicMock(return_value=future(tq_stats['10.10.7.86:17448'])) + response = MagicMock(json=json_method) + return AsyncContextMock(aenter=response) + if ip_port == '10.10.7.86:17449': + error = aiohttp.ClientError('HTTP 504: Gateway Timeout') + response = MagicMock(raise_for_status=MagicMock(side_effect=error)) + return AsyncContextMock(aenter=response) + if ip_port == '10.10.7.86:17450': + raise aiohttp.ClientError('Connection refused') + + get_patcher = patch( + 'aiohttp.ClientSession.get', + side_effect=fake_get + ) + # Tell that we have 4 taskqueue servers + get_instances_patcher = patch( + 'appscale.hermes.producers.proxy_stats.get_service_instances', + return_value=future([ + '10.10.7.86:17447', + '10.10.7.86:17448', + '10.10.7.86:17449', + '10.10.7.86:17450' + ]) + ) + with get_patcher: + with get_instances_patcher: + # Environment is mocked, so we can do a test. + # Call method under test + stats_source = taskqueue_stats.TaskqueueStatsSource() + stats_snapshot = await stats_source.get_current() + + assert isinstance(stats_snapshot.utc_timestamp, int) + assert stats_snapshot.current_requests == 4 + + # Check summarised cumulative stats + assert stats_snapshot.cumulative.total == 40 + assert stats_snapshot.cumulative.failed == 9 + assert stats_snapshot.cumulative.pb_reqs == 22 + assert stats_snapshot.cumulative.rest_reqs == 18 + + # Check summarised recent stats + assert stats_snapshot.recent.total == 33 + assert stats_snapshot.recent.failed == 8 + assert stats_snapshot.recent.avg_latency == 83 + assert stats_snapshot.recent.pb_reqs == 19 + assert stats_snapshot.recent.rest_reqs == 14 + assert ( + stats_snapshot.recent.by_pb_method == + {"BulkAdd": 4, "PauseQueue": 4, "FetchTask": 11} + ) + assert ( + stats_snapshot.recent.by_rest_method == + {"get_tasks": 2, "post_tasks": 6, "patch_task": 6} + ) + assert ( + stats_snapshot.recent.by_pb_status == + {"OK": 16, "UNKNOWN_TASK": 2, "UNKNOWN_QUEUE": 1} + ) + assert ( + stats_snapshot.recent.by_rest_status == + {"200": 9, "404": 3, "500": 2} + ) + + # Check instances + assert len(stats_snapshot.instances) == 2 + tq_17447 = next(instance for instance in stats_snapshot.instances + if instance.ip_port == '10.10.7.86:17447') + tq_17448 = next(instance for instance in stats_snapshot.instances + if instance.ip_port == '10.10.7.86:17448') + + # TaskQueue on port 17447 + assert tq_17447.start_timestamp_ms == 1494240000000 + assert tq_17447.current_requests == 3 + assert tq_17447.cumulative.total == 15 + assert tq_17447.cumulative.failed == 5 + assert tq_17447.cumulative.pb_reqs == 6 + assert tq_17447.cumulative.rest_reqs == 9 + assert tq_17447.recent.total == 13 + assert tq_17447.recent.failed == 5 + assert tq_17447.recent.avg_latency == 64 + assert tq_17447.recent.pb_reqs == 6 + assert tq_17447.recent.rest_reqs == 7 + assert ( + tq_17447.recent.by_pb_method == + {"BulkAdd": 4, "PauseQueue": 2} + ) + assert ( + tq_17447.recent.by_rest_method == + {"get_tasks": 2, "post_tasks": 5} + ) + assert ( + tq_17447.recent.by_pb_status == + {"OK": 4, "UNKNOWN_TASK": 2} + ) + assert ( + tq_17447.recent.by_rest_status == + {"200": 4, "404": 3} + ) + + # TaskQueue on port 17448 + assert tq_17448.start_timestamp_ms == 1494240000250 + assert tq_17448.current_requests == 1 + assert tq_17448.cumulative.total == 25 + assert tq_17448.cumulative.failed == 4 + assert tq_17448.cumulative.pb_reqs == 16 + assert tq_17448.cumulative.rest_reqs == 9 + assert tq_17448.recent.total == 20 + assert tq_17448.recent.failed == 3 + assert tq_17448.recent.avg_latency == 96 + assert tq_17448.recent.pb_reqs == 13 + assert tq_17448.recent.rest_reqs == 7 + assert ( + tq_17448.recent.by_pb_method == + {"PauseQueue": 2, "FetchTask": 11} + ) + assert ( + tq_17448.recent.by_rest_method == + {"post_tasks": 1, "patch_task": 6} + ) + assert ( + tq_17448.recent.by_pb_status == + {"OK": 12, "UNKNOWN_QUEUE": 1} + ) + assert ( + tq_17448.recent.by_rest_status == + {"200": 5, "500": 2} + ) + + assert stats_snapshot.instances_count == 2 + + # Check Failures + assert len(stats_snapshot.failures) == 2 + tq_17449 = next(instance for instance in stats_snapshot.failures + if instance.ip_port == '10.10.7.86:17449') + tq_17450 = next(instance for instance in stats_snapshot.failures + if instance.ip_port == '10.10.7.86:17450') + assert tq_17449.error == 'HTTP 504: Gateway Timeout' + assert tq_17450.error == 'Connection refused' diff --git a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py index 71b1e85c80..6aac471658 100644 --- a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py +++ b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py @@ -1,11 +1,9 @@ -import unittest - from appscale.hermes.unified_service_names import ( ServicesEnum, find_service_by_monit_name, find_service_by_pxname, Service ) -class TestMonitNames(unittest.TestCase): +class TestMonitNames: def test_search_for_known_service(self): monit_name_to_expectation = { 'uaserver': ServicesEnum.UASERVER, @@ -31,42 +29,42 @@ def test_search_for_known_service(self): 'appmanagerserver': ServicesEnum.APPMANAGER, } for monit_name, expected in monit_name_to_expectation.items(): - self.assertEqual(find_service_by_monit_name(monit_name), expected) + assert find_service_by_monit_name(monit_name) == expected def test_search_for_unknown_service(self): service = find_service_by_monit_name('irrelevant-monit-process') - self.assertEqual(service.name, 'irrelevant-monit-process') + assert service.name == 'irrelevant-monit-process' def test_parsing_application_id(self): # Celery service celery = ServicesEnum.CELERY app = celery.get_application_id_by_monit_name('celery-app-ppa-9999') - self.assertEqual(app, 'app-ppa') + assert app == 'app-ppa' # Application service application = ServicesEnum.APPLICATION app = application.get_application_id_by_monit_name('app___appppa-20008') - self.assertEqual(app, 'appppa') + assert app == 'appppa' def test_parsing_port(self): # Celery service celery = ServicesEnum.CELERY port = celery.get_port_by_monit_name('celery-app-ppa-9999') - self.assertEqual(port, 9999) + assert port == 9999 # Application service application = ServicesEnum.APPLICATION port = application.get_port_by_monit_name('app___appppa-20008') - self.assertEqual(port, 20008) + assert port == 20008 # Taskqueue service taskqueue = ServicesEnum.TASKQUEUE port = taskqueue.get_port_by_monit_name('taskqueue-17448') - self.assertEqual(port, 17448) + assert port == 17448 # Datastore service datastore = ServicesEnum.DATASTORE port = datastore.get_port_by_monit_name('datastore_server-4002') - self.assertEqual(port, 4002) + assert port == 4002 -class TestHAProxyNames(unittest.TestCase): +class TestHAProxyNames: def test_search_for_known_service(self): proxy_name_to_expectation = { 'UserAppServer': ServicesEnum.UASERVER, @@ -76,62 +74,62 @@ def test_search_for_known_service(self): 'gae_app3-3': ServicesEnum.APPLICATION, } for proxy_name, expected in proxy_name_to_expectation.items(): - self.assertEqual(find_service_by_pxname(proxy_name), expected) + assert find_service_by_pxname(proxy_name) == expected def test_search_for_unknown_service(self): service = find_service_by_pxname('irrelevant-haproxy-proxy') - self.assertEqual(service.name, 'irrelevant-haproxy-proxy') + assert service.name == 'irrelevant-haproxy-proxy' def test_parsing_application_id(self): app = ServicesEnum.APPLICATION.get_application_id_by_pxname('gae_app3-3') - self.assertEqual(app, 'app3-3') + assert app == 'app3-3' def test_parsing_ip_port(self): # IP/Port for uaserver ip, port = ServicesEnum.UASERVER.get_ip_port_by_svname( 'UserAppServer-10.10.8.9:4342') - self.assertEqual(ip, '10.10.8.9') - self.assertEqual(port, 4342) + assert ip == '10.10.8.9' + assert port == 4342 # IP/Port for taskqueue ip, port = ServicesEnum.TASKQUEUE.get_ip_port_by_svname( 'TaskQueue-10.10.8.9:17448') - self.assertEqual(ip, '10.10.8.9') - self.assertEqual(port, 17448) + assert ip == '10.10.8.9' + assert port == 17448 # IP/Port for datastore ip, port = ServicesEnum.DATASTORE.get_ip_port_by_svname( 'appscale-datastore_server-10.10.8.9:4002') - self.assertEqual(ip, '10.10.8.9') - self.assertEqual(port, 4002) + assert ip == '10.10.8.9' + assert port == 4002 # IP/Port for blobstore ip, port = ServicesEnum.BLOBSTORE.get_ip_port_by_svname( 'as_blob_server-10.10.8.9:6107') - self.assertEqual(ip, '10.10.8.9') - self.assertEqual(port, 6107) + assert ip == '10.10.8.9' + assert port == 6107 # IP/Port for application ip, port = ServicesEnum.APPLICATION.get_ip_port_by_svname( 'gae_app3-3-10.10.8.9:20008') - self.assertEqual(ip, '10.10.8.9') - self.assertEqual(port, 20008) + assert ip == '10.10.8.9' + assert port == 20008 -class TestUnknownService(unittest.TestCase): +class TestUnknownService: def test_unknown_service(self): service = Service(name='smth-out-of-stats-28') - self.assertEqual(service.name, 'smth-out-of-stats-28') + assert service.name == 'smth-out-of-stats-28' # Application ID by unknown monit name app = service.get_application_id_by_monit_name('smth-out-of-stats-28') - self.assertIsNone(app) + assert app is None # Application ID by unknown haproxy name app = service.get_application_id_by_pxname('smth-out-of-stats-1.1.1.1:2') - self.assertIsNone(app) + assert app is None # Port by unknown monit name port = service.get_port_by_monit_name('smth-out-of-stats-28') - self.assertIsNone(port) + assert port is None # IP/Port by unknown haproxy ip, port = service.get_ip_port_by_svname('smth-out-of-stats-1.1.1.1:2') - self.assertIsNone(ip) - self.assertIsNone(port) + assert ip is None + assert port is None From e3cc8a6843ee2767fad8716b7173222794fc1cb8 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 10 Apr 2019 17:34:57 +0300 Subject: [PATCH 011/221] Move hermes test files --- Hermes/appscale/hermes/producers/tests/__init__.py | 0 .../hermes/producers => }/tests/test-data/haproxy-stats-v1.4.csv | 0 .../hermes/producers => }/tests/test-data/haproxy-stats-v1.5.csv | 0 .../hermes/producers => }/tests/test-data/node-stats.json | 0 .../hermes/producers => }/tests/test-data/processes-stats.json | 0 .../hermes/producers => }/tests/test-data/proxies-stats.json | 0 .../hermes/producers => }/tests/test-data/taskqueue-stats.json | 0 Hermes/{appscale/hermes/producers => }/tests/test_cassandra.py | 0 .../{appscale/hermes/producers => }/tests/test_cluster_stats.py | 0 Hermes/{appscale/hermes/producers => }/tests/test_node.py | 0 Hermes/{appscale/hermes/producers => }/tests/test_process.py | 0 Hermes/{appscale/hermes/producers => }/tests/test_proxy.py | 0 Hermes/{appscale/hermes/producers => }/tests/test_taskqueue.py | 0 .../hermes/producers => }/tests/test_unified_service_names.py | 0 14 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 Hermes/appscale/hermes/producers/tests/__init__.py rename Hermes/{appscale/hermes/producers => }/tests/test-data/haproxy-stats-v1.4.csv (100%) rename Hermes/{appscale/hermes/producers => }/tests/test-data/haproxy-stats-v1.5.csv (100%) rename Hermes/{appscale/hermes/producers => }/tests/test-data/node-stats.json (100%) rename Hermes/{appscale/hermes/producers => }/tests/test-data/processes-stats.json (100%) rename Hermes/{appscale/hermes/producers => }/tests/test-data/proxies-stats.json (100%) rename Hermes/{appscale/hermes/producers => }/tests/test-data/taskqueue-stats.json (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_cassandra.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_cluster_stats.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_node.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_process.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_proxy.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_taskqueue.py (100%) rename Hermes/{appscale/hermes/producers => }/tests/test_unified_service_names.py (100%) diff --git a/Hermes/appscale/hermes/producers/tests/__init__.py b/Hermes/appscale/hermes/producers/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/Hermes/appscale/hermes/producers/tests/test-data/haproxy-stats-v1.4.csv b/Hermes/tests/test-data/haproxy-stats-v1.4.csv similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/haproxy-stats-v1.4.csv rename to Hermes/tests/test-data/haproxy-stats-v1.4.csv diff --git a/Hermes/appscale/hermes/producers/tests/test-data/haproxy-stats-v1.5.csv b/Hermes/tests/test-data/haproxy-stats-v1.5.csv similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/haproxy-stats-v1.5.csv rename to Hermes/tests/test-data/haproxy-stats-v1.5.csv diff --git a/Hermes/appscale/hermes/producers/tests/test-data/node-stats.json b/Hermes/tests/test-data/node-stats.json similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/node-stats.json rename to Hermes/tests/test-data/node-stats.json diff --git a/Hermes/appscale/hermes/producers/tests/test-data/processes-stats.json b/Hermes/tests/test-data/processes-stats.json similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/processes-stats.json rename to Hermes/tests/test-data/processes-stats.json diff --git a/Hermes/appscale/hermes/producers/tests/test-data/proxies-stats.json b/Hermes/tests/test-data/proxies-stats.json similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/proxies-stats.json rename to Hermes/tests/test-data/proxies-stats.json diff --git a/Hermes/appscale/hermes/producers/tests/test-data/taskqueue-stats.json b/Hermes/tests/test-data/taskqueue-stats.json similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test-data/taskqueue-stats.json rename to Hermes/tests/test-data/taskqueue-stats.json diff --git a/Hermes/appscale/hermes/producers/tests/test_cassandra.py b/Hermes/tests/test_cassandra.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_cassandra.py rename to Hermes/tests/test_cassandra.py diff --git a/Hermes/appscale/hermes/producers/tests/test_cluster_stats.py b/Hermes/tests/test_cluster_stats.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_cluster_stats.py rename to Hermes/tests/test_cluster_stats.py diff --git a/Hermes/appscale/hermes/producers/tests/test_node.py b/Hermes/tests/test_node.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_node.py rename to Hermes/tests/test_node.py diff --git a/Hermes/appscale/hermes/producers/tests/test_process.py b/Hermes/tests/test_process.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_process.py rename to Hermes/tests/test_process.py diff --git a/Hermes/appscale/hermes/producers/tests/test_proxy.py b/Hermes/tests/test_proxy.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_proxy.py rename to Hermes/tests/test_proxy.py diff --git a/Hermes/appscale/hermes/producers/tests/test_taskqueue.py b/Hermes/tests/test_taskqueue.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_taskqueue.py rename to Hermes/tests/test_taskqueue.py diff --git a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py b/Hermes/tests/test_unified_service_names.py similarity index 100% rename from Hermes/appscale/hermes/producers/tests/test_unified_service_names.py rename to Hermes/tests/test_unified_service_names.py From c7ba4fefb6c51cc38dccb56048a9102b5192633b Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 5 Apr 2019 12:57:06 +0300 Subject: [PATCH 012/221] Update Hermes installer and test runner --- AppController/djinn.rb | 8 ++++---- Hermes/setup.py | 20 +++++++++----------- Rakefile | 3 ++- debian/appscale_install_functions.sh | 13 +++++++++++-- debian/control.jessie | 2 ++ debian/control.stretch | 2 ++ debian/control.trusty | 2 ++ debian/control.xenial | 2 ++ 8 files changed, 34 insertions(+), 18 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 812b8b1333..f7da7ea725 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3434,8 +3434,7 @@ def start_app_manager_server def start_hermes @state = "Starting Hermes" Djinn.log_info("Starting Hermes service.") - script = `which appscale-hermes`.chomp - start_cmd = "/usr/bin/python2 #{script}" + start_cmd = "/opt/appscale_hermes/bin/appscale-hermes" start_cmd << ' --verbose' if @options['verbose'].downcase == 'true' MonitInterface.start(:hermes, start_cmd) if my_node.is_shadow? @@ -3700,12 +3699,13 @@ def build_java_appserver def build_hermes Djinn.log_info('Building uncommitted Hermes changes') - unless system('pip install --upgrade --no-deps ' + + unless system('/opt/appscale_hermes/bin/pip install --upgrade --no-deps ' + "#{APPSCALE_HOME}/Hermes > /dev/null 2>&1") Djinn.log_error('Unable to build Hermes (install failed).') return end - unless system("pip install #{APPSCALE_HOME}/Hermes > /dev/null 2>&1") + unless system('/opt/appscale_hermes/bin/pip install ' + + "#{APPSCALE_HOME}/Hermes > /dev/null 2>&1") Djinn.log_error('Unable to build Hermes (install dependencies failed).') return end diff --git a/Hermes/setup.py b/Hermes/setup.py index d65f85b707..99b76865de 100644 --- a/Hermes/setup.py +++ b/Hermes/setup.py @@ -2,9 +2,8 @@ setup( name='appscale-hermes', - version='0.1.2', - description='AppScale module which takes care of periodical backup and ' - 'restore tasks and provides statistics API.', + version='0.2.0', + description='AppScale module which provides statistics API.', author='AppScale Systems, Inc.', url='https://github.com/AppScale/appscale', license='Apache License 2.0', @@ -12,18 +11,17 @@ platforms='Posix', install_requires=[ 'appscale-common', - 'kazoo', - 'tornado', - 'psutil==5.1.3', - 'attrs>=18.1.0', - 'mock', + 'appscale-admin', + 'psutil==5.6.1', + 'attrs==19.1.0', + 'mock==2.0.0', + 'aiohttp==2.3.9' ], - test_suite='appscale.hermes', classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 3 - Beta', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 2.7' + 'Programming Language :: Python :: 3.5' ], namespace_packages=['appscale'], packages=['appscale', diff --git a/Rakefile b/Rakefile index 0e0ed42114..7fe276c8a4 100644 --- a/Rakefile +++ b/Rakefile @@ -64,7 +64,8 @@ end namespace :hermes do task :test do - sh 'python -m unittest discover -b -v -s Hermes/appscale/hermes' + sh '/opt/appscale_hermes/bin/pip install pytest pytest-asyncio' + sh '/opt/appscale_hermes/bin/pytest Hermes/tests' end end diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index c2b7e2f3d3..73e8d184e2 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -608,8 +608,17 @@ installadminserver() installhermes() { - pip install --upgrade --no-deps ${APPSCALE_HOME}/Hermes - pip install ${APPSCALE_HOME}/Hermes + # Create virtual environment based on Python 3 + rm -rf /opt/appscale_hermes + python3 -m venv /opt/appscale_hermes/ + # Install Hermes and its dependencies in it + HERMES_PIP=/opt/appscale_hermes/bin/pip + ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/common + ${HERMES_PIP} install ${APPSCALE_HOME}/common + ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/AdminServer + ${HERMES_PIP} install ${APPSCALE_HOME}/AdminServer + ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/Hermes + ${HERMES_PIP} install ${APPSCALE_HOME}/Hermes } installinfrastructuremanager() diff --git a/debian/control.jessie b/debian/control.jessie index 375635bf3f..b02a1284a4 100644 --- a/debian/control.jessie +++ b/debian/control.jessie @@ -83,6 +83,8 @@ Depends: appscale-tools, python-twisted, python-xmpp, python-yaml, + python3-venv, + python3.5-dev, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.stretch b/debian/control.stretch index f6c404a1d4..0efda0f983 100644 --- a/debian/control.stretch +++ b/debian/control.stretch @@ -88,6 +88,8 @@ Depends: appscale-tools, python-virtualenv, python-xmpp, python-yaml, + python3-venv, + python3.5-dev, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.trusty b/debian/control.trusty index aa295fd948..6d7ad2ac99 100644 --- a/debian/control.trusty +++ b/debian/control.trusty @@ -86,6 +86,8 @@ Depends: appscale-tools, python-virtualenv, python-xmpp, python-yaml, + python3-venv, + python3.5-dev, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.xenial b/debian/control.xenial index a9f0f172b0..b1f16259fa 100644 --- a/debian/control.xenial +++ b/debian/control.xenial @@ -88,6 +88,8 @@ Depends: appscale-tools, python-twisted, python-virtualenv, python-xmpp, + python3-venv, + python3.5-dev, rabbitmq-server, rsync, rsyslog, From 79fdbc8714dba50415499866bd53a2883da35379 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 5 Apr 2019 13:28:57 +0300 Subject: [PATCH 013/221] Remove profiling log properties from djinn --- AppController/djinn.rb | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index f7da7ea725..4d711b2b95 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -477,14 +477,6 @@ class Djinn 'use_spot_instances' => [TrueClass, nil, false], 'user_commands' => [String, nil, true], 'verbose' => [TrueClass, 'False', true], - 'write_nodes_stats_log' => [TrueClass, 'False', true], - 'nodes_stats_log_interval' => [Fixnum, '15', true], - 'write_processes_stats_log' => [TrueClass, 'False', true], - 'processes_stats_log_interval' => [Fixnum, '65', true], - 'write_proxies_stats_log' => [TrueClass, 'False', true], - 'proxies_stats_log_interval' => [Fixnum, '35', true], - 'write_detailed_processes_stats_log' => [TrueClass, 'False', true], - 'write_detailed_proxies_stats_log' => [TrueClass, 'False', true], 'zone' => [String, nil, true] }.freeze @@ -1368,27 +1360,6 @@ def set_property(property_name, property_value, secret) end @options[key] = val - - if key.include? 'stats_log' - if key.include? 'nodes' - ZKInterface.update_hermes_nodes_profiling_conf( - @options['write_nodes_stats_log'].downcase == 'true', - @options['nodes_stats_log_interval'].to_i - ) - elsif key.include? 'processes' - ZKInterface.update_hermes_processes_profiling_conf( - @options['write_processes_stats_log'].downcase == 'true', - @options['processes_stats_log_interval'].to_i, - @options['write_detailed_processes_stats_log'].downcase == 'true' - ) - elsif key.include? 'proxies' - ZKInterface.update_hermes_proxies_profiling_conf( - @options['write_proxies_stats_log'].downcase == 'true', - @options['proxies_stats_log_interval'].to_i, - @options['write_detailed_proxies_stats_log'].downcase == 'true' - ) - end - end Djinn.log_info("Successfully set #{key} to #{val}.") } # Act upon changes. From be47a6efdf11cd34d49776b0bfc19129ffe351ae Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 5 Apr 2019 13:51:36 +0300 Subject: [PATCH 014/221] Fix TaskQueue test after changes in appscale_info --- .../test/unit/test_taskqueue_server.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/AppTaskQueue/test/unit/test_taskqueue_server.py b/AppTaskQueue/test/unit/test_taskqueue_server.py index 4a03acc5d3..026674e489 100644 --- a/AppTaskQueue/test/unit/test_taskqueue_server.py +++ b/AppTaskQueue/test/unit/test_taskqueue_server.py @@ -11,19 +11,22 @@ class TestDistributedTaskQueue(unittest.TestCase): """ A set of test cases for the distributed taskqueue module """ - def setUp(self): - self._read_patcher = patch.object( - file_io, 'read', return_value='192.168.0.1') - self.read_mock = self._read_patcher.start() - - def tearDown(self): - self._read_patcher.stop() @staticmethod def test_distributed_tq_initialization(): db_access = MagicMock() zk_client = MagicMock() - distributed_tq.DistributedTaskQueue(db_access, zk_client) + lb_ips_patcher = patch( + 'appscale.common.appscale_info.get_load_balancer_ips', + return_value=['192.168.0.1'] + ) + db_proxy_patcher = patch( + 'appscale.common.appscale_info.get_db_proxy', + return_value=['192.168.0.1'] + ) + with lb_ips_patcher: + with db_proxy_patcher: + distributed_tq.DistributedTaskQueue(db_access, zk_client) # TODO: # def test_fetch_queue_stats(self): From d76beedecb1c2e15882349b944527ff7f0a75c32 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 5 Apr 2019 15:54:10 +0300 Subject: [PATCH 015/221] Fixes in AdminServer. - Don't import anything from Hermes (define own HERMES_PORT). - Reffer to admin_server module in setup.py. --- AdminServer/appscale/admin/instance_manager/constants.py | 3 +++ AdminServer/appscale/admin/instance_manager/routing_client.py | 4 ++-- AdminServer/setup.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/AdminServer/appscale/admin/instance_manager/constants.py b/AdminServer/appscale/admin/instance_manager/constants.py index cc17ae2fc1..ef371582df 100644 --- a/AdminServer/appscale/admin/instance_manager/constants.py +++ b/AdminServer/appscale/admin/instance_manager/constants.py @@ -133,3 +133,6 @@ def http_response(self, request, response): # The ZooKeeper node that keeps track of running AppServers by version. VERSION_REGISTRATION_NODE = '/appscale/instances_by_version' + +# The port Hermes listens on. +HERMES_PORT = 4378 diff --git a/AdminServer/appscale/admin/instance_manager/routing_client.py b/AdminServer/appscale/admin/instance_manager/routing_client.py index b2f3d88f87..3db8764e41 100644 --- a/AdminServer/appscale/admin/instance_manager/routing_client.py +++ b/AdminServer/appscale/admin/instance_manager/routing_client.py @@ -7,11 +7,11 @@ from tornado import gen from tornado.httpclient import AsyncHTTPClient -from appscale.admin.instance_manager.constants import VERSION_REGISTRATION_NODE +from appscale.admin.instance_manager.constants import VERSION_REGISTRATION_NODE, \ + HERMES_PORT from appscale.admin.instance_manager.instance import Instance from appscale.common import appscale_info from appscale.common.constants import GAE_PREFIX, VERSION_PATH_SEPARATOR -from appscale.hermes.constants import HERMES_PORT logger = logging.getLogger(__name__) diff --git a/AdminServer/setup.py b/AdminServer/setup.py index a8ed8f40bc..9f587d9b15 100644 --- a/AdminServer/setup.py +++ b/AdminServer/setup.py @@ -45,7 +45,7 @@ 'appscale.admin.routing': ['templates/*']}, include_package_data=True, entry_points={'console_scripts': [ - 'appscale-admin=appscale.admin:main', + 'appscale-admin=appscale.admin.admin_server:main', 'appscale-instance-manager=appscale.admin.instance_manager.server:main', 'appscale-stop-instance=appscale.admin.instance_manager.stop_instance:main', 'appscale-stop-services=appscale.admin.stop_services:main', From e94d0c412ca979d7d8a7983d6a54497889a1f17a Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 21 May 2019 19:24:13 +0300 Subject: [PATCH 016/221] Update Hermes package version --- Hermes/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hermes/setup.py b/Hermes/setup.py index 99b76865de..ffde8fc226 100644 --- a/Hermes/setup.py +++ b/Hermes/setup.py @@ -2,7 +2,7 @@ setup( name='appscale-hermes', - version='0.2.0', + version='0.4.0', description='AppScale module which provides statistics API.', author='AppScale Systems, Inc.', url='https://github.com/AppScale/appscale', From b3070d84afebb88afdebfe0c3673757726dde0ae Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 23 May 2019 16:51:26 +0300 Subject: [PATCH 017/221] Better log format in Hermes, venv in /opt/appscale_venvs --- AppController/djinn.rb | 9 +++++++-- Hermes/appscale/hermes/hermes_server.py | 3 ++- Rakefile | 4 ++-- debian/appscale_install_functions.sh | 7 ++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 04f45208b4..8c38a1d63c 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3450,7 +3450,7 @@ def start_app_manager_server def start_hermes @state = "Starting Hermes" Djinn.log_info("Starting Hermes service.") - start_cmd = "/opt/appscale_hermes/bin/appscale-hermes" + start_cmd = "/opt/appscale_venvs/hermes/bin/appscale-hermes" start_cmd << ' --verbose' if @options['verbose'].downcase == 'true' MonitInterface.start(:hermes, start_cmd) if my_node.is_shadow? @@ -3655,12 +3655,16 @@ def build_uncommitted_changes update_python_package("#{APPSCALE_HOME}/common") update_python_package("#{APPSCALE_HOME}/common", '/opt/appscale_venvs/api_server/bin/pip') + update_python_package("#{APPSCALE_HOME}/common", + '/opt/appscale_venvs/hermes/bin/pip') end if status.include?('AppControllerClient') update_python_package("#{APPSCALE_HOME}/AppControllerClient") end if status.include?('AdminServer') update_python_package("#{APPSCALE_HOME}/AdminServer") + update_python_package("#{APPSCALE_HOME}/AdminServer", + '/opt/appscale_venvs/hermes/bin/pip') end if status.include?('AppTaskQueue') extras = TaskQueue::OPTIONAL_FEATURES.join(',') @@ -3673,7 +3677,8 @@ def build_uncommitted_changes update_python_package("#{APPSCALE_HOME}/InfrastructureManager") end if status.include?('Hermes') - update_python_package("#{APPSCALE_HOME}/Hermes") + update_python_package("#{APPSCALE_HOME}/Hermes", + '/opt/appscale_venvs/hermes/bin/pip') end if status.include?('APIServer') build_api_server diff --git a/Hermes/appscale/hermes/hermes_server.py b/Hermes/appscale/hermes/hermes_server.py index 0c3565a035..25add17214 100644 --- a/Hermes/appscale/hermes/hermes_server.py +++ b/Hermes/appscale/hermes/hermes_server.py @@ -150,4 +150,5 @@ def main(): app.router.add_get(route, handler) logger.info("Starting Hermes on port: {}.".format(args.port)) - web.run_app(app, port=args.port, access_log=logger) + web.run_app(app, port=args.port, access_log=logger, + access_log_format='%a "%r" %s %bB %Tfs "%{User-Agent}i"') diff --git a/Rakefile b/Rakefile index 7fe276c8a4..c342a5f158 100644 --- a/Rakefile +++ b/Rakefile @@ -64,8 +64,8 @@ end namespace :hermes do task :test do - sh '/opt/appscale_hermes/bin/pip install pytest pytest-asyncio' - sh '/opt/appscale_hermes/bin/pytest Hermes/tests' + sh '/opt/appscale_venvs/hermes/bin/pip install pytest pytest-asyncio' + sh '/opt/appscale_venvs/hermes/bin/pytest Hermes/tests' end end diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index e1c386be64..18f0ea8806 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -618,10 +618,11 @@ installadminserver() installhermes() { # Create virtual environment based on Python 3 - rm -rf /opt/appscale_hermes - python3 -m venv /opt/appscale_hermes/ + mkdir -p /opt/appscale_venvs + rm -rf /opt/appscale_venvs/hermes + python3 -m venv /opt/appscale_venvs/hermes/ # Install Hermes and its dependencies in it - HERMES_PIP=/opt/appscale_hermes/bin/pip + HERMES_PIP=/opt/appscale_venvs/hermes/bin/pip ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/common ${HERMES_PIP} install ${APPSCALE_HOME}/common ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/AdminServer From 0c5b5f580972bf6810a8688944972d1b4d8c82ef Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Fri, 21 Jun 2019 16:19:32 -0700 Subject: [PATCH 018/221] Addressed comments. Thanks! --- AppController/djinn.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 94ee9d3191..860dcb5f5d 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -2465,7 +2465,7 @@ def get_all_compute_nodes # This method checks that nodes above index are compute only and thus # can be easily terminated. - def can_we_scale_down?(index) + def can_we_scale_down?(min_machines) @state_change_lock.synchronize { nodes_to_check = @nodes.drop(min_machines) } From 73b7dfc137935e9ca55b86d1038579e431e2d3b7 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 21 Feb 2019 18:22:05 +0200 Subject: [PATCH 019/221] Sending app logs to logstash --- .../api/logservice/logservice_stub.py | 177 +++++++++++++++++- scripts/init-filebeat.sh | 49 +++++ 2 files changed, 217 insertions(+), 9 deletions(-) create mode 100644 scripts/init-filebeat.sh diff --git a/AppServer/google/appengine/api/logservice/logservice_stub.py b/AppServer/google/appengine/api/logservice/logservice_stub.py index 765e3aa279..be28c72d7f 100644 --- a/AppServer/google/appengine/api/logservice/logservice_stub.py +++ b/AppServer/google/appengine/api/logservice/logservice_stub.py @@ -20,23 +20,119 @@ import base64 import capnp # pylint: disable=unused-import import logging + +import json import logging_capnp import socket import struct import time - from collections import defaultdict + +import os + +from datetime import datetime + +import multiprocessing + +import threading from google.appengine.api import apiproxy_stub from google.appengine.api.logservice import log_service_pb +from google.appengine.api.modules import ( + get_current_module_name, get_current_version_name +) from google.appengine.runtime import apiproxy_errors -from Queue import Queue, Empty +from Queue import Queue, Empty, Full # Add path to import file_io from appscale.common import file_io + _I_SIZE = struct.calcsize('I') +LEVELS = { + 0: 'DEBUG', + 1: 'INFO', + 2: 'WARNING', + 3: 'ERROR', + 4: 'CRITICAL', +} + + +class RequestsLogger(threading.Thread): + + FILENAME_TEMPLATE = ( + '/opt/appscale/logserver/requests-{app}-{service}-{version}-{port}.log' + ) + QUEUE_SIZE = 1024 * 16 + + def __init__(self): + super(RequestsLogger, self).__init__() + self.setDaemon(True) + self._logs_queue = multiprocessing.Queue(self.QUEUE_SIZE) + self._log_file = None + self._shutting_down = False + + def _open_log_file(self, request_info): + # Init logger lazily when application info is available + app_id = request_info['appId'] + service_id = request_info['serviceName'] + version_id = request_info['versionName'] + port = request_info['port'] + # Prepare filename + filename = self.FILENAME_TEMPLATE.format( + app=app_id, service=service_id, version=version_id, port=port) + # Open log file + self._log_file = open(filename, 'a') + + def run(self): + request_info = None + while True: + try: + try: + if not request_info: + # Get new info from the queue if previous has been saved + request_info = self._logs_queue.get() + if not request_info and self._shutting_down: + return + if not self._log_file: + self._open_log_file(request_info) + json.dump(request_info, self._log_file) + self._log_file.write('\n') + self._log_file.flush() + request_info = None + + except (OSError, IOError): + # Close file to reopen it again later + logging.exception( + 'Failed to write request_info to log file\n Request info: {}' + .format(request_info or "-")) + log_file = self._log_file + self._log_file = None + log_file.close() + time.sleep(5) + + except Exception: + logging.exception( + 'Failed to write request_info to log file\n Request info: {}' + .format(request_info or "-")) + time.sleep(5) + + except Exception: + # There were cases where exception was thrown at writing error + pass + + def stop(self): + self._shutting_down = True + self._logs_queue.put(None) + + def write(self, requests_info): + try: + # Put an item on the queue if a free slot is immediately available + self._logs_queue.put(requests_info, block=False) + except Full: + logging.error('Request logs queue is crowded') + def _cleanup_logserver_connection(connection): try: @@ -80,6 +176,7 @@ def _fill_request_log(requestLog, log, include_app_logs): line.set_level(appLog.level) line.set_log_message(appLog.message) + class LogServiceStub(apiproxy_stub.APIProxyStub): """Python stub for Log Service service.""" @@ -112,6 +209,15 @@ def __init__(self, persist=False, logs_path=None, request_data=None): #get head node_private ip from /etc/appscale/head_node_private_ip self._log_server_ip = file_io.read("/etc/appscale/head_node_private_ip").rstrip() + self._requests_logger = RequestsLogger() + self._requests_logger.start() + + def stop_requests_logger(self): + self._requests_logger.stop() + + def is_requests_logger_alive(self): + return self._requests_logger.is_alive() + def _get_log_server(self, app_id, blocking): key = (blocking, app_id) queue = self._log_server[key] @@ -148,7 +254,7 @@ def _query_log_server(self, app_id, packet): key, log_server = self._get_log_server(app_id, True) if not log_server: raise apiproxy_errors.ApplicationError( - log_service_pb.LogServiceError.STORAGE_ERROR) + log_service_pb.LogServiceError.STORAGE_ERROR) try: log_server.send(packet) fh = log_server.makefile('rb') @@ -238,6 +344,59 @@ def end_request(self, request_id, status, response_size, end_time=None): rl.responseSize = response_size rl.endTime = end_time self._pending_requests_applogs[request_id].finish() + #rl.finished = 1 + #rl.appLogs = self._pending_requests_applogs[request_id] + start_time = rl.startTime + start_time_ms = float(start_time) / 1000 + end_time_ms = float(end_time) / 1000 + + # Render app logs: + try: + app_logs_str = u'\n'.join([ + u'{} {} {}'.format( + LEVELS[log.level], + datetime.utcfromtimestamp(log.time/1000000) + .strftime('%Y-%m-%d %H:%M:%S'), + log.message + ) + for log in rl.appLogs + ]) + except UnicodeError: + app_logs_str = u'\n'.join([ + u'{} {} {}'.format( + LEVELS[log.level], + datetime.utcfromtimestamp(log.time/1000000) + .strftime('%Y-%m-%d %H:%M:%S'), + unicode(log.message, 'ascii', 'ignore') + ) + for log in rl.appLogs + ]) + + request_info = { + 'generated_id': '{}-{}'.format(start_time, request_id), + 'serviceName': get_current_module_name(), + 'versionName': get_current_version_name(), + 'startTime': start_time_ms, + 'endTime': end_time_ms, + 'latency': int(end_time_ms - start_time_ms), + 'level': max(0, 0, *[ + log.level for log in rl.appLogs + ]), + 'appId': rl.appId, + 'appscale-host': os.environ['MY_IP_ADDRESS'], + 'port': int(os.environ['MY_PORT']), + 'ip': rl.ip, + 'method': rl.method, + 'requestId': request_id, + 'resource': rl.resource, + 'responseSize': rl.responseSize, + 'status': rl.status, + 'userAgent': rl.userAgent, + 'appLogs': app_logs_str + } + + self._requests_logger.write(request_info) + buf = rl.to_bytes() packet = 'l%s%s' % (struct.pack('I', len(buf)), buf) self._send_to_logserver(rl.appId, packet) @@ -268,17 +427,17 @@ def _Dynamic_Read(self, request, response, request_id): if request.module_version_size() > 0 and request.version_id_size() > 0: raise apiproxy_errors.ApplicationError( - log_service_pb.LogServiceError.INVALID_REQUEST) + log_service_pb.LogServiceError.INVALID_REQUEST) if (request.request_id_size() and - (request.has_start_time() or request.has_end_time() or - request.has_offset())): + (request.has_start_time() or request.has_end_time() or + request.has_offset())): raise apiproxy_errors.ApplicationError( - log_service_pb.LogServiceError.INVALID_REQUEST) + log_service_pb.LogServiceError.INVALID_REQUEST) rl = self._pending_requests.get(request_id, None) if rl is None: raise apiproxy_errors.ApplicationError( - log_service_pb.LogServiceError.INVALID_REQUEST) + log_service_pb.LogServiceError.INVALID_REQUEST) query = logging_capnp.Query.new_message() if request.module_version(0).has_module_id(): @@ -321,7 +480,7 @@ def _Dynamic_Read(self, request, response, request_id): except: logging.exception("Failed to retrieve logs") raise apiproxy_errors.ApplicationError( - log_service_pb.LogServiceError.INVALID_REQUEST) + log_service_pb.LogServiceError.INVALID_REQUEST) def _Dynamic_SetStatus(self, unused_request, unused_response, unused_request_id): diff --git a/scripts/init-filebeat.sh b/scripts/init-filebeat.sh new file mode 100644 index 0000000000..3e29a4a8dc --- /dev/null +++ b/scripts/init-filebeat.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +set -e +set -u + +usage() +{ + echo "usage: init-filebeat.sh --logstash IP:PORT" +} + + +if [[ $# == 2 && $1 == '--logstash' ]]; then + LOGSTASH_LOCATION=$2 +else + usage + exit 1 +fi + +while fuser /var/cache/apt/archives/lock /var/lib/apt/lists/lock /var/lib/dpkg/lock ; do + echo "Waiting for apt lock" + sleep 60 +done + +if ! systemctl | grep -q filebeat; then + echo "Installing Filebeat..." + curl -L -O https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-5.6.4-amd64.deb + sudo dpkg -i filebeat-5.6.4-amd64.deb +else + echo "Filebeat has been already installed" +fi + + +echo "Configuring Filebeat..." +cat > /etc/filebeat/filebeat.yml << FILEBEAT_YML + +filebeat.prospectors: +- input_type: log + paths: ["/opt/appscale/logserver/requests-*"] + json.keys_under_root: true + +output.logstash: + hosts: ["${LOGSTASH_LOCATION}"] + +FILEBEAT_YML + + +echo "Starting Filebeat service..." +systemctl enable filebeat.service +systemctl start filebeat.service From 2f7525f051bca62fed206ce06ba611058c04cd78 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 21 Feb 2019 20:26:40 +0200 Subject: [PATCH 020/221] Make request logger optional --- .../api/logservice/logservice_stub.py | 116 +++++++++--------- .../tools/devappserver2/dispatcher.py | 8 ++ scripts/init-filebeat.sh | 3 + 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/AppServer/google/appengine/api/logservice/logservice_stub.py b/AppServer/google/appengine/api/logservice/logservice_stub.py index be28c72d7f..b5cc08c426 100644 --- a/AppServer/google/appengine/api/logservice/logservice_stub.py +++ b/AppServer/google/appengine/api/logservice/logservice_stub.py @@ -140,6 +140,7 @@ def _cleanup_logserver_connection(connection): except socket.error: pass + def _fill_request_log(requestLog, log, include_app_logs): log.set_request_id(requestLog.requestId) log.set_app_id(requestLog.appId) @@ -186,10 +187,8 @@ class LogServiceStub(apiproxy_stub.APIProxyStub): _ACCEPTS_REQUEST_ID = True - _DEFAULT_READ_COUNT = 20 - def __init__(self, persist=False, logs_path=None, request_data=None): """Initializer. @@ -206,11 +205,16 @@ def __init__(self, persist=False, logs_path=None, request_data=None): self._pending_requests = defaultdict(logging_capnp.RequestLog.new_message) self._pending_requests_applogs = dict() self._log_server = defaultdict(Queue) - #get head node_private ip from /etc/appscale/head_node_private_ip + # get head node_private ip from /etc/appscale/head_node_private_ip self._log_server_ip = file_io.read("/etc/appscale/head_node_private_ip").rstrip() - self._requests_logger = RequestsLogger() - self._requests_logger.start() + if os.path.exists('/etc/appscale/elk-enabled'): + self._requests_logger = RequestsLogger() + self._requests_logger.start() + self.is_elk_enabled = True + else: + self._requests_logger = None + self.is_elk_enabled = False def stop_requests_logger(self): self._requests_logger.stop() @@ -344,58 +348,58 @@ def end_request(self, request_id, status, response_size, end_time=None): rl.responseSize = response_size rl.endTime = end_time self._pending_requests_applogs[request_id].finish() - #rl.finished = 1 - #rl.appLogs = self._pending_requests_applogs[request_id] - start_time = rl.startTime - start_time_ms = float(start_time) / 1000 - end_time_ms = float(end_time) / 1000 - # Render app logs: - try: - app_logs_str = u'\n'.join([ - u'{} {} {}'.format( - LEVELS[log.level], - datetime.utcfromtimestamp(log.time/1000000) - .strftime('%Y-%m-%d %H:%M:%S'), - log.message - ) - for log in rl.appLogs - ]) - except UnicodeError: - app_logs_str = u'\n'.join([ - u'{} {} {}'.format( - LEVELS[log.level], - datetime.utcfromtimestamp(log.time/1000000) - .strftime('%Y-%m-%d %H:%M:%S'), - unicode(log.message, 'ascii', 'ignore') - ) - for log in rl.appLogs - ]) - - request_info = { - 'generated_id': '{}-{}'.format(start_time, request_id), - 'serviceName': get_current_module_name(), - 'versionName': get_current_version_name(), - 'startTime': start_time_ms, - 'endTime': end_time_ms, - 'latency': int(end_time_ms - start_time_ms), - 'level': max(0, 0, *[ - log.level for log in rl.appLogs - ]), - 'appId': rl.appId, - 'appscale-host': os.environ['MY_IP_ADDRESS'], - 'port': int(os.environ['MY_PORT']), - 'ip': rl.ip, - 'method': rl.method, - 'requestId': request_id, - 'resource': rl.resource, - 'responseSize': rl.responseSize, - 'status': rl.status, - 'userAgent': rl.userAgent, - 'appLogs': app_logs_str - } - - self._requests_logger.write(request_info) + if self.is_elk_enabled: + start_time = rl.startTime + start_time_ms = float(start_time) / 1000 + end_time_ms = float(end_time) / 1000 + + # Render app logs: + try: + app_logs_str = u'\n'.join([ + u'{} {} {}'.format( + LEVELS[log.level], + datetime.utcfromtimestamp(log.time/1000000) + .strftime('%Y-%m-%d %H:%M:%S'), + log.message + ) + for log in rl.appLogs + ]) + except UnicodeError: + app_logs_str = u'\n'.join([ + u'{} {} {}'.format( + LEVELS[log.level], + datetime.utcfromtimestamp(log.time/1000000) + .strftime('%Y-%m-%d %H:%M:%S'), + unicode(log.message, 'ascii', 'ignore') + ) + for log in rl.appLogs + ]) + + request_info = { + 'generated_id': '{}-{}'.format(start_time, request_id), + 'serviceName': get_current_module_name(), + 'versionName': get_current_version_name(), + 'startTime': start_time_ms, + 'endTime': end_time_ms, + 'latency': int(end_time_ms - start_time_ms), + 'level': max(0, 0, *[ + log.level for log in rl.appLogs + ]), + 'appId': rl.appId, + 'appscale-host': os.environ['MY_IP_ADDRESS'], + 'port': int(os.environ['MY_PORT']), + 'ip': rl.ip, + 'method': rl.method, + 'requestId': request_id, + 'resource': rl.resource, + 'responseSize': rl.responseSize, + 'status': rl.status, + 'userAgent': rl.userAgent, + 'appLogs': app_logs_str + } + + self._requests_logger.write(request_info) buf = rl.to_bytes() packet = 'l%s%s' % (struct.pack('I', len(buf)), buf) diff --git a/AppServer/google/appengine/tools/devappserver2/dispatcher.py b/AppServer/google/appengine/tools/devappserver2/dispatcher.py index c41daf2e6e..62b45f940b 100644 --- a/AppServer/google/appengine/tools/devappserver2/dispatcher.py +++ b/AppServer/google/appengine/tools/devappserver2/dispatcher.py @@ -24,6 +24,7 @@ import urlparse import wsgiref.headers +from google.appengine.api import apiproxy_stub_map from google.appengine.api import request_info from google.appengine.tools.devappserver2 import constants from google.appengine.tools.devappserver2 import instance @@ -225,6 +226,13 @@ def quit(self): requests_in_progress = True if not requests_in_progress: + logservice = apiproxy_stub_map.apiproxy.GetStub('logservice') + if logservice.is_elk_enabled: + logging.info('Waiting for Request Logger to finish.') + logservice.stop_requests_logger() + while logservice.is_requests_logger_alive(): + time.sleep(.5) + logging.info('Request Logger has finished.') break time.sleep(.5) diff --git a/scripts/init-filebeat.sh b/scripts/init-filebeat.sh index 3e29a4a8dc..8b4b6e3f69 100644 --- a/scripts/init-filebeat.sh +++ b/scripts/init-filebeat.sh @@ -44,6 +44,9 @@ output.logstash: FILEBEAT_YML +# It's just a flag used in AppServer/../logservice_stub +touch /etc/appscale/elk-enabled + echo "Starting Filebeat service..." systemctl enable filebeat.service systemctl start filebeat.service From 5c4c20e6104b9e34295f2d367ce29fe26ae5573e Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Mon, 27 May 2019 17:25:44 +0300 Subject: [PATCH 021/221] Use newer filebeat, config logrotate for JSON log --- .../appscale/admin/instance_manager/utils.py | 12 +++++++++++- .../appengine/api/logservice/logservice_stub.py | 2 +- scripts/init-filebeat.sh | 13 +++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/AdminServer/appscale/admin/instance_manager/utils.py b/AdminServer/appscale/admin/instance_manager/utils.py index 84951d2e8a..1f5fc30b46 100644 --- a/AdminServer/appscale/admin/instance_manager/utils.py +++ b/AdminServer/appscale/admin/instance_manager/utils.py @@ -147,7 +147,17 @@ def setup_logrotate(app_name, log_size): notifempty copytruncate }} -""".format(log_prefix=log_prefix, size=log_size) + +/opt/appscale/logserver/requests-{app_name}*.log {{ + size {size} + missingok + rotate 3 + compress + delaycompress + notifempty + copytruncate +}} +""".format(log_prefix=log_prefix, app_name=app_name, size=log_size) logger.debug("Logrotate file: {} - Contents:\n{}". format(app_logrotate_script, contents)) diff --git a/AppServer/google/appengine/api/logservice/logservice_stub.py b/AppServer/google/appengine/api/logservice/logservice_stub.py index b5cc08c426..9ab0275f01 100644 --- a/AppServer/google/appengine/api/logservice/logservice_stub.py +++ b/AppServer/google/appengine/api/logservice/logservice_stub.py @@ -97,7 +97,7 @@ def run(self): return if not self._log_file: self._open_log_file(request_info) - json.dump(request_info, self._log_file) + self._log_file.write(json.dumps(request_info)) self._log_file.write('\n') self._log_file.flush() request_info = None diff --git a/scripts/init-filebeat.sh b/scripts/init-filebeat.sh index 8b4b6e3f69..f174b7ad13 100644 --- a/scripts/init-filebeat.sh +++ b/scripts/init-filebeat.sh @@ -18,13 +18,13 @@ fi while fuser /var/cache/apt/archives/lock /var/lib/apt/lists/lock /var/lib/dpkg/lock ; do echo "Waiting for apt lock" - sleep 60 + sleep 20 done -if ! systemctl | grep -q filebeat; then +if ! apt-cache policy filebeat | grep Installed | grep -q ' 6.8'; then echo "Installing Filebeat..." - curl -L -O https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-5.6.4-amd64.deb - sudo dpkg -i filebeat-5.6.4-amd64.deb + curl -L -O https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-6.8.0-amd64.deb + sudo dpkg -i filebeat-6.8.0-amd64.deb else echo "Filebeat has been already installed" fi @@ -33,8 +33,9 @@ fi echo "Configuring Filebeat..." cat > /etc/filebeat/filebeat.yml << FILEBEAT_YML -filebeat.prospectors: -- input_type: log +filebeat.inputs: +- type: log + enabled: true paths: ["/opt/appscale/logserver/requests-*"] json.keys_under_root: true From 8e4242e1ca8bd42384ae3d27c4ead9d80e46ccd7 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 10 Jul 2019 18:01:46 +0300 Subject: [PATCH 022/221] Addressing PR comments: - don't touch logs encoding. - use logging module for writing JSON log. --- .../api/logservice/logservice_stub.py | 151 ++++++++---------- 1 file changed, 67 insertions(+), 84 deletions(-) diff --git a/AppServer/google/appengine/api/logservice/logservice_stub.py b/AppServer/google/appengine/api/logservice/logservice_stub.py index 9ab0275f01..4cc9cd8d8b 100644 --- a/AppServer/google/appengine/api/logservice/logservice_stub.py +++ b/AppServer/google/appengine/api/logservice/logservice_stub.py @@ -18,6 +18,8 @@ import base64 +from logging.handlers import WatchedFileHandler + import capnp # pylint: disable=unused-import import logging @@ -33,8 +35,6 @@ from datetime import datetime -import multiprocessing - import threading from google.appengine.api import apiproxy_stub from google.appengine.api.logservice import log_service_pb @@ -69,11 +69,11 @@ class RequestsLogger(threading.Thread): def __init__(self): super(RequestsLogger, self).__init__() self.setDaemon(True) - self._logs_queue = multiprocessing.Queue(self.QUEUE_SIZE) - self._log_file = None + self._logs_queue = Queue(self.QUEUE_SIZE) + self._logger = None self._shutting_down = False - def _open_log_file(self, request_info): + def _init_logger(self, request_info): # Init logger lazily when application info is available app_id = request_info['appId'] service_id = request_info['serviceName'] @@ -82,45 +82,36 @@ def _open_log_file(self, request_info): # Prepare filename filename = self.FILENAME_TEMPLATE.format( app=app_id, service=service_id, version=version_id, port=port) - # Open log file - self._log_file = open(filename, 'a') + # Initialize logger + formatter = logging.Formatter('%(message)s') + file_handler = WatchedFileHandler(filename) + file_handler.setFormatter(formatter) + file_handler.setLevel(logging.INFO) + self._logger = logging.Logger('request-logger', logging.INFO) + self._logger.addHandler(file_handler) def run(self): request_info = None while True: + if not request_info: + # Get new info from the queue if previous has been saved + request_info = self._logs_queue.get() + if not request_info and self._shutting_down: + return try: + if not self._logger: + self._init_logger(request_info) + self._logger.info(json.dumps(request_info)) + request_info = None + except Exception: try: - if not request_info: - # Get new info from the queue if previous has been saved - request_info = self._logs_queue.get() - if not request_info and self._shutting_down: - return - if not self._log_file: - self._open_log_file(request_info) - self._log_file.write(json.dumps(request_info)) - self._log_file.write('\n') - self._log_file.flush() - request_info = None - - except (OSError, IOError): - # Close file to reopen it again later logging.exception( 'Failed to write request_info to log file\n Request info: {}' .format(request_info or "-")) - log_file = self._log_file - self._log_file = None - log_file.close() - time.sleep(5) - except Exception: - logging.exception( - 'Failed to write request_info to log file\n Request info: {}' - .format(request_info or "-")) - time.sleep(5) - - except Exception: - # There were cases where exception was thrown at writing error - pass + # There were cases where exception was thrown at writing error + pass + time.sleep(5) def stop(self): self._shutting_down = True @@ -222,6 +213,47 @@ def stop_requests_logger(self): def is_requests_logger_alive(self): return self._requests_logger.is_alive() + def save_to_file_for_elk(self, request_id, end_time, request_log): + start_time = request_log.startTime + start_time_ms = float(start_time) / 1000 + end_time_ms = float(end_time) / 1000 + + # Render app logs: + app_logs_str = '\n'.join([ + '{} {} {}'.format( + LEVELS[log.level], + datetime.utcfromtimestamp(log.time/1000000) + .strftime('%Y-%m-%d %H:%M:%S'), + log.message + ) + for log in request_log.appLogs + ]) + + request_info = { + 'generated_id': '{}-{}'.format(start_time, request_id), + 'serviceName': get_current_module_name(), + 'versionName': get_current_version_name(), + 'startTime': start_time_ms, + 'endTime': end_time_ms, + 'latency': int(end_time_ms - start_time_ms), + 'level': max(0, 0, *[ + log.level for log in request_log.appLogs + ]), + 'appId': request_log.appId, + 'appscale-host': os.environ['MY_IP_ADDRESS'], + 'port': int(os.environ['MY_PORT']), + 'ip': request_log.ip, + 'method': request_log.method, + 'requestId': request_id, + 'resource': request_log.resource, + 'responseSize': request_log.responseSize, + 'status': request_log.status, + 'userAgent': request_log.userAgent, + 'appLogs': app_logs_str + } + + self._requests_logger.write(request_info) + def _get_log_server(self, app_id, blocking): key = (blocking, app_id) queue = self._log_server[key] @@ -350,56 +382,7 @@ def end_request(self, request_id, status, response_size, end_time=None): self._pending_requests_applogs[request_id].finish() if self.is_elk_enabled: - start_time = rl.startTime - start_time_ms = float(start_time) / 1000 - end_time_ms = float(end_time) / 1000 - - # Render app logs: - try: - app_logs_str = u'\n'.join([ - u'{} {} {}'.format( - LEVELS[log.level], - datetime.utcfromtimestamp(log.time/1000000) - .strftime('%Y-%m-%d %H:%M:%S'), - log.message - ) - for log in rl.appLogs - ]) - except UnicodeError: - app_logs_str = u'\n'.join([ - u'{} {} {}'.format( - LEVELS[log.level], - datetime.utcfromtimestamp(log.time/1000000) - .strftime('%Y-%m-%d %H:%M:%S'), - unicode(log.message, 'ascii', 'ignore') - ) - for log in rl.appLogs - ]) - - request_info = { - 'generated_id': '{}-{}'.format(start_time, request_id), - 'serviceName': get_current_module_name(), - 'versionName': get_current_version_name(), - 'startTime': start_time_ms, - 'endTime': end_time_ms, - 'latency': int(end_time_ms - start_time_ms), - 'level': max(0, 0, *[ - log.level for log in rl.appLogs - ]), - 'appId': rl.appId, - 'appscale-host': os.environ['MY_IP_ADDRESS'], - 'port': int(os.environ['MY_PORT']), - 'ip': rl.ip, - 'method': rl.method, - 'requestId': request_id, - 'resource': rl.resource, - 'responseSize': rl.responseSize, - 'status': rl.status, - 'userAgent': rl.userAgent, - 'appLogs': app_logs_str - } - - self._requests_logger.write(request_info) + self.save_to_file_for_elk(request_id, end_time, rl) buf = rl.to_bytes() packet = 'l%s%s' % (struct.pack('I', len(buf)), buf) From c1ce37deabf40736ef6bcc9e4e3bfc3755933a50 Mon Sep 17 00:00:00 2001 From: Tanvi Marballi Date: Tue, 16 Jul 2019 15:08:54 -0700 Subject: [PATCH 023/221] Build only directory specified in the `appscale up --update {dir}` --- AppController/djinn.rb | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 4d4f1b8e4c..898c2d4d41 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -485,7 +485,8 @@ class Djinn 'proxies_stats_log_interval' => [Fixnum, '35', true], 'write_detailed_processes_stats_log' => [TrueClass, 'False', true], 'write_detailed_proxies_stats_log' => [TrueClass, 'False', true], - 'zone' => [String, nil, true] + 'zone' => [String, nil, true], + 'update' => [String, nil, false] }.freeze # Template used for rsyslog configuration files. @@ -3682,39 +3683,43 @@ def build_api_server # Run a build on modified directories so that changes will take effect. def build_uncommitted_changes - status = `git -C #{APPSCALE_HOME} status` + if @options['update'].nil? + return + end + + update_dir = @options['update'] # Update Python packages across corresponding virtual environments - if status.include?('common') + if update_dir.include?('common') update_python_package("#{APPSCALE_HOME}/common") update_python_package("#{APPSCALE_HOME}/common", '/opt/appscale_venvs/api_server/bin/pip') end - if status.include?('AppControllerClient') + if update_dir.include?('app_controller') update_python_package("#{APPSCALE_HOME}/AppControllerClient") end - if status.include?('AdminServer') + if update_dir.include?('admin_server') update_python_package("#{APPSCALE_HOME}/AdminServer") end - if status.include?('AppTaskQueue') + if update_dir.include?('taskqueue') extras = TaskQueue::OPTIONAL_FEATURES.join(',') update_python_package("#{APPSCALE_HOME}/AppTaskQueue[#{extras}]") end - if status.include?('AppDB') + if update_dir.include?('app_db') update_python_package("#{APPSCALE_HOME}/AppDB") end - if status.include?('InfrastructureManager') + if update_dir.include?('iaas_manager') update_python_package("#{APPSCALE_HOME}/InfrastructureManager") end - if status.include?('Hermes') + if update_dir.include?('hermes') update_python_package("#{APPSCALE_HOME}/Hermes") end - if status.include?('APIServer') + if update_dir.include?('api_server') build_api_server end # Update Java AppServer - build_java_appserver if status.include?('AppServer_Java') + build_java_appserver if update_dir.include?('appserver_java') end def configure_ejabberd_cert From d62ba3eaa0c2dfb9a81d665f2ec65c3cef52da2e Mon Sep 17 00:00:00 2001 From: Tanvi Marballi Date: Thu, 18 Jul 2019 17:00:26 -0700 Subject: [PATCH 024/221] Add a way to update and build all directories. --- AppController/djinn.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 84d42eeeec..311ff410dc 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -131,6 +131,10 @@ # The exit code that indicates the data layout version is unexpected. INVALID_VERSION_EXIT_CODE = 64 +# The allowed list of code directories to specify for updating the code and building it. +ALLOWED_DIR_UPDATES = ["common", "app_controller", "admin_server", "taskqueue", "app_db", + "iaas_manager", "hermes", "api_server", "appserver_java"] + # Djinn (interchangeably known as 'the AppController') automatically # configures and deploys all services for a single node. It relies on other # Djinns or the AppScale Tools to tell it what services (roles) it should @@ -3713,6 +3717,10 @@ def build_uncommitted_changes update_dir = @options['update'] + if update_dir == "all" + update_dir = ALLOWED_DIR_UPDATES.join(',') + end + # Update Python packages across corresponding virtual environments if update_dir.include?('common') update_python_package("#{APPSCALE_HOME}/common") From 64ea66bef7f8c66c86740d85209f9f593d9c7cf6 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Thu, 6 Jun 2019 17:51:03 +0300 Subject: [PATCH 025/221] Move constants from queue.py module to constants.py module. --- AppTaskQueue/appscale/taskqueue/constants.py | 9 +++++++++ AppTaskQueue/appscale/taskqueue/queue.py | 10 ---------- AppTaskQueue/appscale/taskqueue/task.py | 4 ++-- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/constants.py b/AppTaskQueue/appscale/taskqueue/constants.py index e15c27b6f3..9d745c68b8 100644 --- a/AppTaskQueue/appscale/taskqueue/constants.py +++ b/AppTaskQueue/appscale/taskqueue/constants.py @@ -37,3 +37,12 @@ class TaskNotFound(Exception): TARGET_REGEX = re.compile(r'^([a-zA-Z0-9\-]+[\.]?[a-zA-Z0-9\-]*)$') SHUTTING_DOWN_TIMEOUT = 10 # Limit time for finishing request + +MAX_QUEUE_NAME_LENGTH = 100 + +# A regex rule for validating queue names. +FULL_QUEUE_NAME_PATTERN = r'^(projects/[a-zA-Z0-9-]+/taskqueues/)?' \ + r'[a-zA-Z0-9-]{1,%s}$' % MAX_QUEUE_NAME_LENGTH + +# A compiled regex rule for validating queue names. +FULL_QUEUE_NAME_RE = re.compile(FULL_QUEUE_NAME_PATTERN) diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 3e6db93008..630b1ca664 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -2,7 +2,6 @@ import base64 import json -import re import sys import time import uuid @@ -97,8 +96,6 @@ def on_unavailable(self, query, consistency, required_replicas, # A policy that does not retry statements. NO_RETRIES = FallthroughRetryPolicy() -MAX_QUEUE_NAME_LENGTH = 100 - TRANSIENT_CASSANDRA_ERRORS = ( cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, cassandra.OperationTimedOut, cassandra.cluster.NoHostAvailable) @@ -110,13 +107,6 @@ def on_unavailable(self, query, consistency, required_replicas, # part of a leased task. This is to mimic a GCP oddity/bug. LONG_QUEUE_FORM = 'projects/{app}/taskqueues/{queue}' -# A regex rule for validating queue names. -FULL_QUEUE_NAME_PATTERN = r'^(projects/[a-zA-Z0-9-]+/taskqueues/)?' \ - r'[a-zA-Z0-9-]{1,%s}$' % MAX_QUEUE_NAME_LENGTH - -# A compiled regex rule for validating queue names. -FULL_QUEUE_NAME_RE = re.compile(FULL_QUEUE_NAME_PATTERN) - # All possible fields to include in a queue's JSON representation. QUEUE_FIELDS = ( 'kind', 'id', 'maxLeases', diff --git a/AppTaskQueue/appscale/taskqueue/task.py b/AppTaskQueue/appscale/taskqueue/task.py index afcc40e073..fcb71ac2b3 100644 --- a/AppTaskQueue/appscale/taskqueue/task.py +++ b/AppTaskQueue/appscale/taskqueue/task.py @@ -5,8 +5,8 @@ import string import sys -import queue from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from .constants import FULL_QUEUE_NAME_RE from .protocols import taskqueue_service_pb2 sys.path.append(APPSCALE_PYTHON_APPSERVER) @@ -24,7 +24,7 @@ # Validation rules for queue parameters. QUEUE_ATTRIBUTE_RULES = { 'id': lambda name: TASK_NAME_RE.match(name), - 'queueName': lambda name: queue.FULL_QUEUE_NAME_RE.match(name), + 'queueName': lambda name: FULL_QUEUE_NAME_RE.match(name), 'tag': lambda tag: tag is None or len(tag) <= MAX_TAG_LENGTH } From 283296ee81655b23f6e490c092bd380b60c6c4a0 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Thu, 6 Jun 2019 18:09:58 +0300 Subject: [PATCH 026/221] Change import to compatible with py3 and move dict's iteritems() method to items(). --- .../appscale/taskqueue/distributed_tq.py | 27 ++++++++++--------- AppTaskQueue/appscale/taskqueue/queue.py | 6 ++--- .../common/service_stats/stats_manager.py | 7 +++-- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/distributed_tq.py b/AppTaskQueue/appscale/taskqueue/distributed_tq.py index ab521c9374..efa58d361f 100644 --- a/AppTaskQueue/appscale/taskqueue/distributed_tq.py +++ b/AppTaskQueue/appscale/taskqueue/distributed_tq.py @@ -11,7 +11,6 @@ import socket import sys import time -import tq_lib from appscale.common import appscale_info from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER @@ -28,6 +27,10 @@ DatastorePermanentError, DatastoreTransientError, Entity) +from .protocols import taskqueue_service_pb2 +from .protocols.taskqueue_service_pb2 import ( + TaskQueueServiceError +) from .queue import ( InvalidLeaseRequest, PostgresPullQueue, @@ -35,19 +38,19 @@ PushQueue, TransientError ) +from .queue_manager import GlobalQueueManager +from .service_manager import GlobalServiceManager from .task import Task -from .tq_lib import TASK_STATES +from .tq_lib import ( + choose_task_name, + TASK_STATES, + verify_task_queue_add_request +) from .utils import ( get_celery_queue_name, get_queue_function_name, logger ) -from .queue_manager import GlobalQueueManager -from .service_manager import GlobalServiceManager -from .protocols import taskqueue_service_pb2 -from .protocols.taskqueue_service_pb2 import ( - TaskQueueServiceError -) sys.path.append(APPSCALE_PYTHON_APPSERVER) @@ -158,7 +161,7 @@ def fetch_queue_stats(self, app_id, http_data): oldest_eta = queue.oldest_eta() else: num_tasks = self.datastore_client.query_count(app_id, - [("state =", tq_lib.TASK_STATES.QUEUED), + [("state =", TASK_STATES.QUEUED), ("queue =", queue_name), ("app_id =", app_id)]) # This is not supported for push queues yet. @@ -368,7 +371,7 @@ def __bulk_add(self, source_info, request, response): task_result.chosen_task_name = new_task.id continue - result = tq_lib.verify_task_queue_add_request(add_request.app_id, + result = verify_task_queue_add_request(add_request.app_id, add_request, now) # Tasks go from SKIPPED to OK once they're run. If there are # any failures from other tasks then we pass this request @@ -378,7 +381,7 @@ def __bulk_add(self, source_info, request, response): if add_request.HasField("task_name"): task_name = add_request.task_name - namespaced_name = tq_lib.choose_task_name(add_request.app_id, + namespaced_name = choose_task_name(add_request.app_id, add_request.queue_name, user_chosen=task_name) add_request.task_name = namespaced_name @@ -457,7 +460,7 @@ def __create_task_name(self, project_id, queue_name, task_name, retries=3): task_name: A string specifying the task name key. retries: An integer specifying how many times to retry the create. """ - entity = Entity(key_name=task_name, state=tq_lib.TASK_STATES.QUEUED, + entity = Entity(key_name=task_name, state=TASK_STATES.QUEUED, queue=queue_name, app_id=project_id) try: self.datastore_client.put(project_id, entity) diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 630b1ca664..90825bfc26 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -199,7 +199,7 @@ def validate_config(self): Raises: InvalidQueueConfiguration if there is an invalid attribute. """ - for attribute, rule in QUEUE_ATTRIBUTE_RULES.iteritems(): + for attribute, rule in QUEUE_ATTRIBUTE_RULES.items(): try: value = getattr(self, attribute) except AttributeError: @@ -300,7 +300,7 @@ def __repr__(self): attributes[attribute] = getattr(self, attribute) attr_str = ', '.join('{}={}'.format(attr, val) - for attr, val in attributes.iteritems()) + for attr, val in attributes.items()) return ''.format(self.name, attr_str) @@ -1646,7 +1646,7 @@ def _lease_batch(self, indexes, new_eta): futures[result_num] = (future, not success) index_update_futures = [] - for result_num, (future, lease_timed_out) in futures.iteritems(): + for result_num, (future, lease_timed_out) in futures.items(): index = indexes[result_num] try: read_result = future.result()[0] diff --git a/common/appscale/common/service_stats/stats_manager.py b/common/appscale/common/service_stats/stats_manager.py index d48b9daf2f..354ada6609 100644 --- a/common/appscale/common/service_stats/stats_manager.py +++ b/common/appscale/common/service_stats/stats_manager.py @@ -3,7 +3,6 @@ import time import copy -from future.utils import iteritems from appscale.common.service_stats import samples @@ -387,11 +386,11 @@ def _render_recent(self, metrics_config, requests): if nested_config is None: # Compute single metric for each category - for category, requests_group in iteritems(grouped_by_category): + for category, requests_group in grouped_by_category.items(): categories_stats[category] = metric(requests_group) else: # Render nested stats for each category - for category, requests_group in iteritems(grouped_by_category): + for category, requests_group in grouped_by_category.items(): categories_stats[category] = self._render_recent( nested_config, requests_group ) @@ -497,7 +496,7 @@ def _convert_config_dict(init_dict): (name, categorizer, summarizer/metric, nested_config) """ result = [] - for key, value in iteritems(init_dict): + for key, value in init_dict.items(): if isinstance(key, str): # if key is string => value is summarizer or metric result.append((key, None, value, None)) From d35399f1ede556c986024b54ee3b9f9e2dc2d2b5 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Thu, 6 Jun 2019 18:27:52 +0300 Subject: [PATCH 027/221] Add postprocessing to make py3-compatible imports. --- AppController/djinn.rb | 1 + .../protocols/compile_and_prepare.sh | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100755 AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh diff --git a/AppController/djinn.rb b/AppController/djinn.rb index f261611a4b..7aeafd8f6a 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3691,6 +3691,7 @@ def build_taskqueue Djinn.log_error('Unable to compile AppTaskQueue proto files') return end + system("sed -i -E 's/^import.*_pb2/from . \0/' #{src}*.py") extras = TaskQueue::OPTIONAL_FEATURES.join(',') update_python_package("#{APPSCALE_HOME}/AppTaskQueue[#{extras}]", TaskQueue::TASKQUEUE_PIP) diff --git a/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh b/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh new file mode 100755 index 0000000000..11af748a14 --- /dev/null +++ b/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Compiles protocols and prepares venv for tq + +set -e +set -x + +PROTOCOLS_DIR="$( realpath --strip "$( dirname "${BASH_SOURCE[0]}" )" )" +TQ_DIR="$( dirname "$( dirname "$( dirname "${PROTOCOLS_DIR}" )" )" )" +COMMON_DIR="$( dirname "${TQ_DIR}" )"/common + +if ! protoc --version | grep -E ' (3\.)|(2\.)' ; then + echo "Couldn't compile *.proto files because protoc version 3 was not found." + exit 1 +fi + +echo "Compiling Protocol buffer *.proto files.." +(cd "${TQ_DIR}"/appscale/taskqueue/protocols && \ + protoc --python_out=./ *.proto && \ + sed -i -E 's/^import.*_pb2/from . \0/' *.py) + +if [ "$1" == "" ]; then + echo "Positional parameter 1 is required and should contain pip executable to use." + exit 1 +fi + +PIP="$1" + +echo "Upgrading appscale-common.." +"${PIP}" install --upgrade --no-deps "${COMMON_DIR}" +echo "Installing appscale-common dependencies if any missing.." +"${PIP}" install "${COMMON_DIR}" +echo "Upgrading appscale-taskqueue.." +"${PIP}" install --upgrade --no-deps "${TQ_DIR}[celery_gui]" +echo "Installing appscale-taskqueue dependencies if any missing.." +"${PIP}" install "${TQ_DIR}[celery_gui]" + +echo "appscale-taskqueue has been successfully installed." From 9ad187d3923ecae1c914deaffeb061a7dc850462 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Wed, 3 Jul 2019 17:55:22 +0300 Subject: [PATCH 028/221] Make creation of py3 venv correctly. --- AppController/djinn.rb | 1 - .../protocols/compile_and_prepare.sh | 38 ------------------- .../taskqueue/protocols/compile_protocols.sh | 9 +++-- .../test/helpers/restart-taskqueue.sh | 20 +++++++++- debian/appscale_install_functions.sh | 16 +++++++- 5 files changed, 38 insertions(+), 46 deletions(-) delete mode 100755 AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 7aeafd8f6a..f261611a4b 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3691,7 +3691,6 @@ def build_taskqueue Djinn.log_error('Unable to compile AppTaskQueue proto files') return end - system("sed -i -E 's/^import.*_pb2/from . \0/' #{src}*.py") extras = TaskQueue::OPTIONAL_FEATURES.join(',') update_python_package("#{APPSCALE_HOME}/AppTaskQueue[#{extras}]", TaskQueue::TASKQUEUE_PIP) diff --git a/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh b/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh deleted file mode 100755 index 11af748a14..0000000000 --- a/AppTaskQueue/appscale/taskqueue/protocols/compile_and_prepare.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -# Compiles protocols and prepares venv for tq - -set -e -set -x - -PROTOCOLS_DIR="$( realpath --strip "$( dirname "${BASH_SOURCE[0]}" )" )" -TQ_DIR="$( dirname "$( dirname "$( dirname "${PROTOCOLS_DIR}" )" )" )" -COMMON_DIR="$( dirname "${TQ_DIR}" )"/common - -if ! protoc --version | grep -E ' (3\.)|(2\.)' ; then - echo "Couldn't compile *.proto files because protoc version 3 was not found." - exit 1 -fi - -echo "Compiling Protocol buffer *.proto files.." -(cd "${TQ_DIR}"/appscale/taskqueue/protocols && \ - protoc --python_out=./ *.proto && \ - sed -i -E 's/^import.*_pb2/from . \0/' *.py) - -if [ "$1" == "" ]; then - echo "Positional parameter 1 is required and should contain pip executable to use." - exit 1 -fi - -PIP="$1" - -echo "Upgrading appscale-common.." -"${PIP}" install --upgrade --no-deps "${COMMON_DIR}" -echo "Installing appscale-common dependencies if any missing.." -"${PIP}" install "${COMMON_DIR}" -echo "Upgrading appscale-taskqueue.." -"${PIP}" install --upgrade --no-deps "${TQ_DIR}[celery_gui]" -echo "Installing appscale-taskqueue dependencies if any missing.." -"${PIP}" install "${TQ_DIR}[celery_gui]" - -echo "appscale-taskqueue has been successfully installed." diff --git a/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh b/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh index 335450eb4c..d35bce75a4 100755 --- a/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh +++ b/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh @@ -3,7 +3,6 @@ # Compiles protocols and prepares venv for tq set -e -set -x PROTOCOLS_DIR="$( realpath --strip "$( dirname "${BASH_SOURCE[0]}" )" )" @@ -13,7 +12,9 @@ if ! protoc --version | grep -E ' (3\.)|(2\.)' ; then fi echo "Compiling Protocol buffer *.proto files.." -#(cd "${TQ_DIR}"/appscale/taskqueue/protocols && protoc --python_out=./ *.proto) -protoc --proto_path=$PROTOCOLS_DIR --python_out=$PROTOCOLS_DIR \ - $PROTOCOLS_DIR/*.proto + +(cd "${PROTOCOLS_DIR}" && \ + protoc --python_out=./ *.proto && \ + sed -i -E 's/^import.*_pb2/from . \0/' *.py) + echo "Protocols have been successfully compiled." diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index 9a1e1be24e..f84580a31e 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -84,14 +84,30 @@ if [ -z ${PORTS} ] || [ -z ${DB_IP} ] || [ -z ${ZK_IP} ] || [ -z ${LB_IP} ]; the fi if [ ! -z ${TQ_SOURCE_DIR} ]; then - log "Installing TaskQueue from specified sources" + echo "Installing python3-venv" + attempt=1 + while ! (yes | apt-get install python3-venv) + do + if (( attempt > 15 )); then + log "Failed to install python3-venv after ${attempt} attempts" "ERROR" + exit 1 + fi + log "Failed to install python3-venv. Retrying." "WARNING" + ((attempt++)) + sleep ${attempt} + done + rm -rf /opt/appscale_venvs/appscale_taskqueue/ - python -m virtualenv /opt/appscale_venvs/appscale_taskqueue/ + python3 -m venv /opt/appscale_venvs/appscale_taskqueue/ TASKQUEUE_PIP=/opt/appscale_venvs/appscale_taskqueue/bin/pip + "${TASKQUEUE_PIP}" install wheel "${TQ_SOURCE_DIR}/appscale/taskqueue/protocols/compile_protocols.sh" COMMON_SOURCE_DIR="$( dirname "${TQ_SOURCE_DIR}" )"/common + + echo "Installing TaskQueue from specified sources" + echo "Upgrading appscale-common.." "${TASKQUEUE_PIP}" install --upgrade --no-deps "${COMMON_SOURCE_DIR}" echo "Installing appscale-common dependencies if any missing.." diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 9e67cd00ce..5a36a26ff6 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -624,10 +624,24 @@ installinfrastructuremanager() installtaskqueue() { + echo "Installing python3-venv" + attempt=1 + while ! (yes | apt-get install python3-venv) + do + if (( attempt > 15 )); then + echo "Failed to install python3-venv after ${attempt} attempts" "ERROR" + exit 1 + fi + echo "Failed to install python3-venv. Retrying." "WARNING" + ((attempt++)) + sleep ${attempt} + done + rm -rf /opt/appscale_venvs/appscale_taskqueue/ - python -m virtualenv /opt/appscale_venvs/appscale_taskqueue/ + python3 -m venv /opt/appscale_venvs/appscale_taskqueue/ TASKQUEUE_PIP=/opt/appscale_venvs/appscale_taskqueue/bin/pip + "${TASKQUEUE_PIP}" install wheel "${APPSCALE_HOME}/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh" From 4049f5985df48e4f327cebad3f9292d2b69c5cf3 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Wed, 3 Jul 2019 17:57:03 +0300 Subject: [PATCH 029/221] Add string representation of Entity object. --- AppTaskQueue/appscale/taskqueue/datastore_client.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/AppTaskQueue/appscale/taskqueue/datastore_client.py b/AppTaskQueue/appscale/taskqueue/datastore_client.py index 7df8ef40e1..dadf52f3b2 100644 --- a/AppTaskQueue/appscale/taskqueue/datastore_client.py +++ b/AppTaskQueue/appscale/taskqueue/datastore_client.py @@ -76,6 +76,14 @@ def __init__(self, key_name, queue, state, app_id, timestamp=None, endtime=None) else: self.timestamp = int(time.time()) + def __str__(self): + return 'Key name: ' + self.key_name + \ + '.\nQueue: ' + self.queue + \ + '.\nState: ' + self.state + \ + '\nApplication ID: ' + self.app_id + \ + '.\nTimestamp: ' + str(self.timestamp) + \ + '.\nEnd time: ' + str(self.endtime) + '.\n' + def toPb(self, project_id, kind): """ Converts Entity object to datastore_v3_pb2.EntityProto. @@ -300,7 +308,7 @@ def _make_request(self, project_id, method, body): remote_api_pb2.Response object. Raises: DatastoreTransientError if some retry-able error occurred. - DatastorePermanentError if some permanent error occured. + DatastorePermanentError if some permanent error occurred. """ request = remote_api_pb2.Request() request.service_name = self.SERVICE_NAME From a038bec9e456b70a6cc1e4ff5d5d977a8f7f8e3c Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Fri, 19 Jul 2019 19:15:38 +0300 Subject: [PATCH 030/221] Make tq py3 compatible. --- .../appscale/taskqueue/distributed_tq.py | 87 ++++++++++--------- .../appscale/taskqueue/push_worker.py | 21 ++++- AppTaskQueue/appscale/taskqueue/queue.py | 9 +- .../appscale/taskqueue/queue_manager.py | 4 +- .../appscale/taskqueue/service_manager.py | 2 +- AppTaskQueue/appscale/taskqueue/task.py | 6 +- AppTaskQueue/appscale/taskqueue/utils.py | 2 +- 7 files changed, 80 insertions(+), 51 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/distributed_tq.py b/AppTaskQueue/appscale/taskqueue/distributed_tq.py index efa58d361f..0478fdcbda 100644 --- a/AppTaskQueue/appscale/taskqueue/distributed_tq.py +++ b/AppTaskQueue/appscale/taskqueue/distributed_tq.py @@ -91,7 +91,7 @@ def __init__(self, zk_client): """ setup_env() - self.load_balancers = appscale_info.get_load_balancer_ips() + self.load_balancers = list(appscale_info.get_load_balancer_ips()) self.queue_manager = GlobalQueueManager(zk_client) self.service_manager = GlobalServiceManager(zk_client) self.datastore_client = DatastoreClient() @@ -150,9 +150,9 @@ def fetch_queue_stats(self, app_id, http_data): for queue_name in request.queue_name: try: - queue = self.get_queue(app_id, queue_name) + queue = self.get_queue(app_id, queue_name.decode('utf-8')) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) stats_response = response.queuestats.add() @@ -190,9 +190,9 @@ def purge_queue(self, app_id, http_data): response = taskqueue_service_pb2.TaskQueuePurgeQueueResponse() try: - queue = self.get_queue(app_id, request.queue_name) + queue = self.get_queue(app_id, request.queue_name.decode('utf-8')) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) queue.purge() return (response.SerializeToString(), 0, "") @@ -211,12 +211,12 @@ def delete(self, app_id, http_data): response = taskqueue_service_pb2.TaskQueueDeleteResponse() try: - queue = self.get_queue(app_id, request.queue_name) + queue = self.get_queue(app_id, request.queue_name.decode('utf-8')) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) for task_name in request.task_name: - queue.delete_task(Task({'id': task_name})) + queue.delete_task(Task({'id': task_name.decode('utf-8')})) response.result.append(TaskQueueServiceError.OK) @@ -236,14 +236,13 @@ def query_and_own_tasks(self, app_id, http_data): response = taskqueue_service_pb2.TaskQueueQueryAndOwnTasksResponse() try: - queue = self.get_queue(app_id, request.queue_name) + queue = self.get_queue(app_id, request.queue_name.decode('utf-8')) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) tag = None if request.HasField("tag"): - tag = request.tag - + tag = request.tag.decode('utf-8') try: tasks = queue.lease_tasks(request.max_tasks, request.lease_seconds, group_by_tag=request.group_by_tag, tag=tag) @@ -270,7 +269,7 @@ def add(self, source_info, http_data): # Just call bulk add with one task. request = taskqueue_service_pb2.TaskQueueAddRequest() request.ParseFromString(http_data) - request.app_id = source_info['app_id'] + request.app_id = source_info['app_id'].encode('utf-8') response = taskqueue_service_pb2.TaskQueueAddResponse() bulk_request = taskqueue_service_pb2.TaskQueueBulkAddRequest() bulk_response = taskqueue_service_pb2.TaskQueueBulkAddResponse() @@ -279,13 +278,13 @@ def add(self, source_info, http_data): try: self.__bulk_add(source_info, bulk_request, bulk_response) except TransientError as error: - return '', TaskQueueServiceError.TRANSIENT_ERROR, str(error) + return b'', TaskQueueServiceError.TRANSIENT_ERROR, str(error) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) except DatastorePermanentError as error: - return '', TaskQueueServiceError.INTERNAL_ERROR, str(error) + return b'', TaskQueueServiceError.INTERNAL_ERROR, str(error) except BadFilterConfiguration as error: - return '', TaskQueueServiceError.INTERNAL_ERROR, str(error) + return b'', TaskQueueServiceError.INTERNAL_ERROR, str(error) if len(bulk_response.taskresult) == 1: result = bulk_response.taskresult[0].result @@ -317,13 +316,13 @@ def bulk_add(self, source_info, http_data): try: self.__bulk_add(source_info, request, response) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) except TransientError as error: - return '', TaskQueueServiceError.TRANSIENT_ERROR, str(error) + return b'', TaskQueueServiceError.TRANSIENT_ERROR, str(error) except DatastorePermanentError as error: - return '', TaskQueueServiceError.INTERNAL_ERROR, str(error) + return b'', TaskQueueServiceError.INTERNAL_ERROR, str(error) except BadFilterConfiguration as error: - return '', TaskQueueServiceError.INTERNAL_ERROR, str(error) + return b'', TaskQueueServiceError.INTERNAL_ERROR, str(error) return (response.SerializeToString(), 0, "") @@ -351,24 +350,25 @@ def __bulk_add(self, source_info, request, response): if (add_request.HasField("mode") and add_request.mode == taskqueue_service_pb2.TaskQueueMode.PULL): - queue = self.get_queue(add_request.app_id, add_request.queue_name) + queue = self.get_queue(add_request.app_id.decode('utf-8'), + add_request.queue_name.decode('utf-8')) if not isinstance(queue, (PullQueue, PostgresPullQueue)): task_result.result = TaskQueueServiceError.INVALID_QUEUE_MODE error_found = True continue encoded_payload = base64.urlsafe_b64encode(add_request.body) - task_info = {'payloadBase64': encoded_payload, + task_info = {'payloadBase64': encoded_payload.decode('utf-8'), 'leaseTimestamp': add_request.eta_usec} if add_request.HasField("task_name"): - task_info['id'] = add_request.task_name + task_info['id'] = add_request.task_name.decode('utf-8') if add_request.HasField("tag"): - task_info['tag'] = add_request.tag + task_info['tag'] = add_request.tag.decode('utf-8') new_task = Task(task_info) queue.add_task(new_task) task_result.result = TaskQueueServiceError.OK - task_result.chosen_task_name = new_task.id + task_result.chosen_task_name = new_task.id.encode('utf-8') continue result = verify_task_queue_add_request(add_request.app_id, @@ -384,7 +384,7 @@ def __bulk_add(self, source_info, request, response): namespaced_name = choose_task_name(add_request.app_id, add_request.queue_name, user_chosen=task_name) - add_request.task_name = namespaced_name + add_request.task_name = namespaced_name.encode('utf-8') # Wait until the result is known before setting the chosen task name on # the response. @@ -411,7 +411,8 @@ def __bulk_add(self, source_info, request, response): else: task_result.result = TaskQueueServiceError.OK if namespaced_names[index] is not None: - task_result.chosen_task_name = namespaced_names[index] + task_result.chosen_task_name = \ + namespaced_names[index].encode('utf-8') def __method_mapping(self, method): """ Maps an int index to a string. @@ -489,7 +490,8 @@ def __check_and_store_task_names(self, request): Raises: An ApplicationError of TASK_ALREADY_EXISTS. """ - task_name = request.task_name + task_name = request.task_name.decode('utf-8') + task_name = task_name.replace("b'", "").replace("'", "") try: item = self.__get_task_name(request.app_id, task_name) @@ -534,9 +536,11 @@ def __enqueue_push_task(self, source_info, request): countdown = int(headers['X-AppEngine-TaskETA']) - \ int(datetime.datetime.now().strftime("%s")) - push_queue = self.get_queue(request.app_id, request.queue_name) + push_queue = self.get_queue(request.app_id.decode('utf-8'), + request.queue_name.decode('utf-8')) task_func = get_queue_function_name(push_queue.name) - celery_queue = get_celery_queue_name(request.app_id, push_queue.name) + celery_queue = get_celery_queue_name( + request.app_id.decode('utf-8'), push_queue.name) push_queue.celery.send_task( task_func, @@ -576,11 +580,11 @@ def get_task_args(self, source_info, headers, request): args['max_doublings'] = self.DEFAULT_MAX_DOUBLINGS # Load queue info into cache. - app_id = self.__cleanse(request.app_id) + app_id = self.__cleanse(request.app_id.decode('utf-8')) queue_name = request.queue_name # Use queue defaults. - queue = self.get_queue(app_id, queue_name) + queue = self.get_queue(app_id, queue_name.decode('utf-8')) if queue is not None: if not isinstance(queue, PushQueue): raise Exception('Only push queues are implemented') @@ -623,7 +627,8 @@ def get_task_args(self, source_info, headers, request): target_url = self.get_target_url(app_id, source_info, host) - args['url'] = "{target}{url}".format(target=target_url, url=request.url) + args['url'] = "{target}{url}".format(target=target_url, + url=request.url.decode('utf-8')) return args def get_target_url(self, app_id, source_info, target): @@ -688,8 +693,8 @@ def get_task_headers(self, request): # This header is how we authenticate that it's an internal request secret = appscale_info.get_secret() - secret_hash = hashlib.sha1(request.app_id + '/' + \ - secret).hexdigest() + secret_hash = hashlib.sha1(request.app_id + '/'.encode('utf-8') + \ + secret.encode('utf-8')).hexdigest() headers['X-AppEngine-Fake-Is-Admin'] = secret_hash headers['X-AppEngine-QueueName'] = request.queue_name headers['X-AppEngine-TaskName'] = request.task_name @@ -766,20 +771,20 @@ def modify_task_lease(self, app_id, http_data): request.ParseFromString(http_data) try: - queue = self.get_queue(app_id, request.queue_name) + queue = self.get_queue(app_id, request.queue_name.decode('utf-8')) except QueueNotFound as error: - return '', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) + return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) - task_info = {'id': request.task_name, + task_info = {'id': request.task_name.decode('utf-8'), 'leaseTimestamp': request.eta_usec} try: # The Python AppServer sets eta_usec with a resolution of 1 second, # so update_lease can't be used. It checks with millisecond precision. task = queue.update_task(Task(task_info), request.lease_seconds) except InvalidLeaseRequest as lease_error: - return '', TaskQueueServiceError.TASK_LEASE_EXPIRED, str(lease_error) + return b'', TaskQueueServiceError.TASK_LEASE_EXPIRED, str(lease_error) except TaskNotFound as error: - return '', TaskQueueServiceError.TASK_LEASE_EXPIRED, str(error) + return b'', TaskQueueServiceError.TASK_LEASE_EXPIRED, str(error) epoch = datetime.datetime.utcfromtimestamp(0) updated_usec = int((task.leaseTimestamp - epoch).total_seconds() * 1000000) diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index 70fdeed0bd..18427db34e 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -12,7 +12,7 @@ from eventlet.green.httplib import BadStatusLine from eventlet.timeout import Timeout as EventletTimeout from socket import error as SocketError -from urlparse import urlparse +from urllib.parse import urlparse from .datastore_client import DatastoreClient, DatastoreTransientError from .tq_lib import TASK_STATES from .utils import ( @@ -39,6 +39,10 @@ logger.setLevel(logging.INFO) +def clean_task_name(task_name): + return task_name.decode('utf-8').replace("b'", "").replace("'", "") + + def get_wait_time(retries, args): """ Calculates how long we should wait to execute a failed task, based on how many times it's failed in the past. @@ -111,6 +115,9 @@ def execute_task(task, headers, args): """ start_time = datetime.datetime.utcnow() + args['task_name'] = clean_task_name(args['task_name']) + headers['X-AppEngine-TaskName'] = clean_task_name(headers['X-AppEngine-TaskName']) + content_length = len(args['body']) loggable_args = {key: args[key] for key in args @@ -207,6 +214,9 @@ def execute_task(task, headers, args): logger.warning( '{task} failed before receiving response. It will retry in {wait} ' 'seconds.'.format(task=args['task_name'], wait=wait_time)) + args['task_name'] = args['task_name'].encode('utf-8') + headers['X-AppEngine-TaskName'] = \ + headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) if 200 <= response.status < 300: @@ -226,6 +236,9 @@ def execute_task(task, headers, args): args['task_name'], redirect_url)) url = urlparse(redirect_url) if redirects_left == 0: + args['task_name'] = args['task_name'].encode('utf-8') + headers['X-AppEngine-TaskName'] = \ + headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) redirects_left -= 1 else: @@ -234,12 +247,18 @@ def execute_task(task, headers, args): task=args['task_name'], wait=wait_time)) logger.warning(message) + args['task_name'] = args['task_name'].encode('utf-8') + headers['X-AppEngine-TaskName'] = \ + headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) except EventletTimeout as thrown_timeout: if thrown_timeout != timeout: raise logger.exception('Task {} timed out. Retrying.'.format(args['task_name'])) + args['task_name'] = args['task_name'].encode('utf-8') + headers['X-AppEngine-TaskName'] = \ + headers['X-AppEngine-TaskName'].encode('utf-8') # This could probably be calculated, but for now, just retry immediately. raise task.retry(countdown=0) finally: diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 90825bfc26..5b11683fdf 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -820,7 +820,8 @@ def _task_from_row(self, columns, row, **other_attrs): # TODO: remove it when task.payloadBase64 is replaced with task.payload if 'payload' in columns: payload = task_info.pop('payload') - task_info['payloadBase64'] = base64.urlsafe_b64encode(payload) + task_info['payloadBase64'] = \ + base64.urlsafe_b64encode(payload).decode('utf-8') return Task(task_info) @@ -1388,6 +1389,7 @@ def _insert_task(self, parameters, retries): IF NOT EXISTS """, retry_policy=NO_RETRIES) try: + parameters['payload'] = parameters['payload'].decode('utf-8') result = self.session.execute(insert_statement, parameters) except TRANSIENT_CASSANDRA_ERRORS as error: retries_left = retries - 1 @@ -1406,8 +1408,9 @@ def _insert_task(self, parameters, retries): raise TransientError('Unable to insert task') if not success: - raise InvalidTaskInfo( - 'Task name already taken: {}'.format(parameters['id'])) + error = InvalidTaskInfo() + error.message = 'Task name already taken: {}'.format(parameters['id']) + raise error def _update_lease(self, parameters, retries, check_lease=True): """ Update lease expiration on a task entry. diff --git a/AppTaskQueue/appscale/taskqueue/queue_manager.py b/AppTaskQueue/appscale/taskqueue/queue_manager.py index bbdf39edc0..2b425d775a 100644 --- a/AppTaskQueue/appscale/taskqueue/queue_manager.py +++ b/AppTaskQueue/appscale/taskqueue/queue_manager.py @@ -38,7 +38,7 @@ def __init__(self, zk_client, project_id): # TODO: PostgresConnectionWrapper may need an update when # TaskQueue becomes concurrent self.pg_connection_wrapper = ( - pg_connection_wrapper.PostgresConnectionWrapper(dsn=pg_dsn[0]) + pg_connection_wrapper.PostgresConnectionWrapper(dsn=pg_dsn[0].decode('utf-8')) ) self._configure_periodical_flush() except NoNodeError: @@ -62,7 +62,7 @@ def update_queues(self, queue_config): if not queue_config: new_queue_config = {'default': {'rate': '5/s'}} else: - new_queue_config = json.loads(queue_config)['queue'] + new_queue_config = json.loads(queue_config.decode('utf-8'))['queue'] # Clean up obsolete queues. to_stop = [queue for queue in self if queue not in new_queue_config] diff --git a/AppTaskQueue/appscale/taskqueue/service_manager.py b/AppTaskQueue/appscale/taskqueue/service_manager.py index 0914b7a34d..adbddbc0f0 100644 --- a/AppTaskQueue/appscale/taskqueue/service_manager.py +++ b/AppTaskQueue/appscale/taskqueue/service_manager.py @@ -35,7 +35,7 @@ def update_version_ports(self, new_versions_list): for version_id in new_versions_list: if version_id not in self: version_info = json.loads(self.zk_client.get("{0}/{1}".format( - self.versions_node, version_id))[0]) + self.versions_node, version_id))[0].decode('utf-8')) self[version_id] = version_info['appscaleExtensions']['haproxyPort'] def _update_versions_watch(self, new_versions): diff --git a/AppTaskQueue/appscale/taskqueue/task.py b/AppTaskQueue/appscale/taskqueue/task.py index fcb71ac2b3..8bcec82646 100644 --- a/AppTaskQueue/appscale/taskqueue/task.py +++ b/AppTaskQueue/appscale/taskqueue/task.py @@ -112,7 +112,7 @@ def validate_info(self): Raises: InvalidTaskInfo if one of the attribute fails validation. """ - for attribute, rule in QUEUE_ATTRIBUTE_RULES.iteritems(): + for attribute, rule in QUEUE_ATTRIBUTE_RULES.items(): try: value = getattr(self, attribute) except AttributeError: @@ -197,8 +197,10 @@ def json_safe_dict(self, fields=TASK_FIELDS): # All numbers are represented as strings in the GCP ecosystem for # Javascript compatibility reasons. We convert to string so that # the response can be successfully parsed by Google API clients. - value = str(long((value - epoch).total_seconds() * 1000000)) + value = str(int((value - epoch).total_seconds() * 1000000)) task[attribute] = value + if attribute == 'payloadBase64': + task[attribute] = task[attribute].decode('utf-8') return task diff --git a/AppTaskQueue/appscale/taskqueue/utils.py b/AppTaskQueue/appscale/taskqueue/utils.py index b28b957cbf..a8c1b7c9b3 100644 --- a/AppTaskQueue/appscale/taskqueue/utils.py +++ b/AppTaskQueue/appscale/taskqueue/utils.py @@ -100,7 +100,7 @@ def create_celery_for_app(app, rates): kombu_queues = [] annotations = [] - for queue_name, rate in rates.iteritems(): + for queue_name, rate in rates.items(): celery_name = get_celery_queue_name(app, queue_name) kombu_queue = KombuQueue(celery_name, Exchange(app), routing_key=celery_name) From 1a494131d14f4d580b34e290b62e398947a17494 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Fri, 19 Jul 2019 19:16:26 +0300 Subject: [PATCH 031/221] Fix e2e and unit tests for py3. --- AppTaskQueue/setup.py | 3 +-- AppTaskQueue/test/helpers/restart-taskqueue.sh | 2 ++ AppTaskQueue/test/unit/test_service_stats.py | 18 +++++++----------- .../test/unit/test_taskqueue_server.py | 2 +- debian/appscale_install_functions.sh | 2 ++ 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/AppTaskQueue/setup.py b/AppTaskQueue/setup.py index fdbbb47a1a..36a023bd87 100644 --- a/AppTaskQueue/setup.py +++ b/AppTaskQueue/setup.py @@ -16,7 +16,6 @@ 'celery>=3.1,<4.0.0', 'eventlet==0.22', 'kazoo', - 'mock', 'protobuf', 'psycopg2-binary', 'PyYaml>=4.2b1', @@ -29,7 +28,7 @@ 'Environment :: Console', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', ], namespace_packages=['appscale'], packages=[ diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index f84580a31e..5ef03c43d5 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -102,6 +102,8 @@ if [ ! -z ${TQ_SOURCE_DIR} ]; then TASKQUEUE_PIP=/opt/appscale_venvs/appscale_taskqueue/bin/pip "${TASKQUEUE_PIP}" install wheel + "${TASKQUEUE_PIP}" install --upgrade pip + "${TASKQUEUE_PIP}" install --upgrade setuptools "${TQ_SOURCE_DIR}/appscale/taskqueue/protocols/compile_protocols.sh" COMMON_SOURCE_DIR="$( dirname "${TQ_SOURCE_DIR}" )"/common diff --git a/AppTaskQueue/test/unit/test_service_stats.py b/AppTaskQueue/test/unit/test_service_stats.py index 79114676f1..a5da6b9b6d 100644 --- a/AppTaskQueue/test/unit/test_service_stats.py +++ b/AppTaskQueue/test/unit/test_service_stats.py @@ -1,17 +1,13 @@ -import sys - +from importlib import reload import json +from unittest import mock +from unittest.mock import patch from appscale.common.service_stats import stats_manager -from mock import mock, patch from tornado.testing import AsyncHTTPTestCase -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER - from appscale.taskqueue import appscale_taskqueue, rest_api, statistics -sys.path.append(APPSCALE_PYTHON_APPSERVER) - class TestServiceStatistics(AsyncHTTPTestCase): @@ -130,7 +126,7 @@ def test_stats(self): # Fetch statistics raw_stats = self.fetch('/service-stats').body - stats = json.loads(raw_stats) + stats = json.loads(raw_stats.decode('utf-8')) # Pop and check time-related fields self.assertGreater(stats['cumulative_counters'].pop('from'), 0) @@ -192,7 +188,7 @@ def test_scroll_stats(self): self.time_mock.return_value = 99999 # current time doesn't matter # for scrolling raw_stats = self.fetch('/service-stats?cursor=1500000').body - stats = json.loads(raw_stats) + stats = json.loads(raw_stats.decode('utf-8')) self.assertEqual(stats['cumulative_counters']['all'], 3) self.assertEqual(stats['recent_stats']['all'], 2) @@ -211,7 +207,7 @@ def test_recent_stats(self): # Fetch statistics as if it was in the future self.time_mock.return_value = 99999 # current time does matter for recent raw_stats = self.fetch('/service-stats?last_milliseconds=2000000').body - stats = json.loads(raw_stats) + stats = json.loads(raw_stats.decode('utf-8')) self.assertEqual(stats['cumulative_counters']['all'], 3) self.assertEqual(stats['recent_stats']['all'], 0) # 0 for last 2 seconds @@ -219,7 +215,7 @@ def test_recent_stats(self): # Fetch statistics as if it was just after requests self.time_mock.return_value = 3500 # current time does matter for recent raw_stats = self.fetch('/service-stats?last_milliseconds=2000000').body - stats = json.loads(raw_stats) + stats = json.loads(raw_stats.decode('utf-8')) self.assertEqual(stats['cumulative_counters']['all'], 3) self.assertEqual(stats['recent_stats']['all'], 2) # 2 for last 2 seconds diff --git a/AppTaskQueue/test/unit/test_taskqueue_server.py b/AppTaskQueue/test/unit/test_taskqueue_server.py index 47eb98a70e..74d4bbe986 100644 --- a/AppTaskQueue/test/unit/test_taskqueue_server.py +++ b/AppTaskQueue/test/unit/test_taskqueue_server.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import unittest +from unittest.mock import MagicMock, patch -from mock import MagicMock, patch from appscale.common import file_io from appscale.taskqueue import distributed_tq diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 5a36a26ff6..553d51937d 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -642,6 +642,8 @@ installtaskqueue() TASKQUEUE_PIP=/opt/appscale_venvs/appscale_taskqueue/bin/pip "${TASKQUEUE_PIP}" install wheel + "${TASKQUEUE_PIP}" install --upgrade pip + "${TASKQUEUE_PIP}" install --upgrade setuptools "${APPSCALE_HOME}/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh" From 42859aa62e5233cc024489859d0f30b898972e2c Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 26 Jul 2019 18:31:09 +0300 Subject: [PATCH 032/221] Add administration scripts and ListIndexes method - Basic implementation of ListIndexes method. - List collections script was implemented. - Delete collection script was implemented. - Reindex collection (index) screip was implemented. --- SearchService2/appscale/search/api_methods.py | 19 +- SearchService2/appscale/search/models.py | 8 + SearchService2/appscale/search/scripts.py | 184 ++++++++++++++++++ .../appscale/search/search_server.py | 6 +- .../appscale/search/solr_adapter.py | 20 +- SearchService2/setup.py | 3 + 6 files changed, 230 insertions(+), 10 deletions(-) create mode 100644 SearchService2/appscale/search/scripts.py diff --git a/SearchService2/appscale/search/api_methods.py b/SearchService2/appscale/search/api_methods.py index 2640d1cc46..47395987ad 100644 --- a/SearchService2/appscale/search/api_methods.py +++ b/SearchService2/appscale/search/api_methods.py @@ -14,11 +14,11 @@ from appscale.search import solr_adapter from appscale.search.constants import ( - InvalidRequest, UnknownFieldTypeException, - UnknownFacetTypeException + InvalidRequest, UnknownFieldTypeException, UnknownFacetTypeException, + SolrClientError, SolrServerError) +from appscale.search.models import ( + Field, Document, Facet, FacetRefinement, FacetRequest ) -from appscale.search.models import Field, Document, Facet, FacetRefinement, \ - FacetRequest from appscale.search.protocols import search_pb2 logger = logging.getLogger(__name__) @@ -99,8 +99,15 @@ async def list_indexes(self, list_indexes_request, list_indexes_response): list_indexes_request: A search_pb2.ListIndexesRequest. list_indexes_response: A search_pb2.ListIndexesResponse. """ - raise InvalidRequest("List indexes method is not implemented " - "in AppScale SearchService2 yet") + app_id = list_indexes_request.app_id.decode('utf-8') + # TODO list indexes params should be proceeded + # params = list_indexes_request.params + indexes_metadata = await self.solr_adapter.list_indexes(app_id) + list_indexes_response.status.code = search_pb2.SearchServiceError.OK + for metadata in indexes_metadata: + metadata_pb = list_indexes_response.index_metadata.add() + metadata_pb.index_spec.namespace = metadata.namespace + metadata_pb.index_spec.name = metadata.index_name async def list_documents(self, list_documents_request, list_documents_response): """ List all documents within an index. diff --git a/SearchService2/appscale/search/models.py b/SearchService2/appscale/search/models.py index 558cddfc55..14fb715aca 100644 --- a/SearchService2/appscale/search/models.py +++ b/SearchService2/appscale/search/models.py @@ -103,6 +103,14 @@ class SearchResult(object): facet_results = attr.ib() +@attr.s(hash=False, slots=True, frozen=True) +class IndexMetadata(object): + app_id = attr.ib() + namespace = attr.ib() + index_name = attr.ib() + # TODO we may need to add more metadata fields. + + # ====================================== # HELPER MODELS FOR SOLR ADAPTER # -------------------------------------- diff --git a/SearchService2/appscale/search/scripts.py b/SearchService2/appscale/search/scripts.py new file mode 100644 index 0000000000..35ed8c4f12 --- /dev/null +++ b/SearchService2/appscale/search/scripts.py @@ -0,0 +1,184 @@ +""" +This module provides helper scripts for managing Solr collections. +""" +import argparse +import logging + +import sys + +from appscale.search.constants import SolrServerError, SolrClientError +from kazoo.client import KazooClient +from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS +from tornado import ioloop, gen + +from appscale.search import solr_adapter + +logger = logging.getLogger(__name__) + + +def list_solr_collections(): + """ Lists all Solr collections. + """ + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument( + '-v', '--verbose', action='store_true', + help='Output debug-level logging') + parser.add_argument( + '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') + args = parser.parse_args() + + if args.verbose: + logging.getLogger('appscale').setLevel(logging.DEBUG) + + zk_client = KazooClient( + hosts=','.join(args.zk_locations), + connection_retry=ZK_PERSISTENT_RECONNECTS + ) + zk_client.start() + + async def list_collections(): + """ Asynchronously fetches a list of Solr collections + from SolrCloud and prints it to stdout. + """ + adapter = solr_adapter.SolrAdapter(zk_client) + try: + collections, broken = await adapter.solr.list_collections() + logger.info('Collections:\n {}'.format('\n '.join(collections))) + if broken: + logger.warning('Broken collections:\n {}'.format('\n '.join(broken))) + except (SolrServerError, SolrClientError) as err: + logger.error('Failed to list Solr collections ({}).'.format(err)) + io_loop.stop() + io_loop.close() + + io_loop = ioloop.IOLoop.current() + io_loop.spawn_callback(list_collections) + io_loop.start() + + +def delete_solr_collection(): + """ Deletes specific Solr collection. + """ + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument( + '-v', '--verbose', action='store_true', + help='Output debug-level logging') + parser.add_argument( + '--no-prompt', action='store_true', + help='') + parser.add_argument( + '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') + parser.add_argument( + '--collection', required=True, help='ZooKeeper location(s)') + args = parser.parse_args() + + if args.verbose: + logging.getLogger('appscale').setLevel(logging.DEBUG) + + zk_client = KazooClient( + hosts=','.join(args.zk_locations), + connection_retry=ZK_PERSISTENT_RECONNECTS + ) + zk_client.start() + + async def delete_collection(): + """ Asynchronously deletes Solr collection. + """ + adapter = solr_adapter.SolrAdapter(zk_client) + try: + await adapter.solr.delete_collection(args.collection) + except (SolrServerError, SolrClientError) as err: + logger.error('Failed to delete Solr collection ({}).'.format(err)) + io_loop.stop() + io_loop.close() + + io_loop = ioloop.IOLoop.current() + + if args.no_prompt: + io_loop.spawn_callback(delete_collection) + + else: + answer = input('Type collection name to confirm you want to delete it: ' + .format(args.collection)) + if answer.strip() != args.collection: + logger.error('Collection deletion was not confirmed') + sys.exit(1) + io_loop.spawn_callback(delete_collection) + + io_loop.start() + + +def reindex(): + """ Reindexes all documents in specified collection. + This command suppose to be used when Solr schema is changed. + """ + logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument( + '-v', '--verbose', action='store_true', + help='Output debug-level logging') + parser.add_argument( + '--project', required=True, help='The name of GAE project') + parser.add_argument( + '--namespace', default='', help='The name of GAE namespace') + parser.add_argument( + '--index', required=True, help='The name of GAE Search index') + parser.add_argument( + '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') + args = parser.parse_args() + + if args.verbose: + logging.getLogger('appscale').setLevel(logging.DEBUG) + + zk_client = KazooClient( + hosts=','.join(args.zk_locations), + connection_retry=ZK_PERSISTENT_RECONNECTS + ) + zk_client.start() + + async def reindex_documents(): + """ Loops through all documents in the index and + puts it to index again in order to apply any schema changes. + """ + logger.info('Reindexing documents from {}|{}|{}' + .format(args.project, args.namespace, args.index)) + adapter = solr_adapter.SolrAdapter(zk_client) + has_more = True + start_doc_id = None + total = 0 + while has_more: + try: + documents = await adapter.list_documents( + args.project, args.namespace, args.index, start_doc_id=start_doc_id, + include_start_doc=False, limit=100, keys_only=False + ) + logger.info('Retrieved {} documents starting from doc_id "{}"' + .format(len(documents), start_doc_id)) + if documents: + await adapter.index_documents( + args.project, args.namespace, args.index, documents + ) + total += len(documents) + logger.info('Indexed {} documents starting from doc_id "{}"' + .format(len(documents), start_doc_id)) + start_doc_id = documents[-1].doc_id + else: + has_more = False + except SolrServerError as err: + logger.exception(err) + logger.info("Retrying in 1 second") + await gen.sleep(1) + continue + + logger.info('Successfully reindexed {} documents'.format(total)) + io_loop.stop() + io_loop.close() + + io_loop = ioloop.IOLoop.current() + io_loop.spawn_callback(reindex_documents) + io_loop.start() diff --git a/SearchService2/appscale/search/search_server.py b/SearchService2/appscale/search/search_server.py index 7f4ecedbb9..6059fe1fb3 100644 --- a/SearchService2/appscale/search/search_server.py +++ b/SearchService2/appscale/search/search_server.py @@ -207,11 +207,11 @@ def main(): '-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument( - '--host', help='The host to listen on') + '--host', required=True, help='The host to listen on') parser.add_argument( - '--port', type=int, help='The port to listen on') + '--port', required=True, type=int, help='The port to listen on') parser.add_argument( - '--zk-locations', nargs='+', help='ZooKeeper location(s)') + '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') args = parser.parse_args() if args.verbose: diff --git a/SearchService2/appscale/search/solr_adapter.py b/SearchService2/appscale/search/solr_adapter.py index 72b4912f84..638114b253 100644 --- a/SearchService2/appscale/search/solr_adapter.py +++ b/SearchService2/appscale/search/solr_adapter.py @@ -17,7 +17,7 @@ UnknownFacetTypeException, InvalidRequest) from appscale.search.models import ( Field, ScoredDocument, SearchResult, SolrIndexSchemaInfo, SolrSchemaFieldInfo, - Facet + Facet, IndexMetadata ) from appscale.search.settings import SearchServiceSettings from appscale.search.solr_api import SolrAPI @@ -46,6 +46,24 @@ def __init__(self, zk_client): self._settings = SearchServiceSettings(zk_client) self.solr = SolrAPI(zk_client, SOLR_ZK_ROOT, self._settings) + async def list_indexes(self, app_id): + """ Retrieves basic indexes metadata. + + Args: + app_id: a str representing Application ID. + Return (asynchronously): + A list of models.IndexMetadata. + """ + solr_collections, broken = await self.solr.list_collections() + indexes_metadata = [] + for collection in solr_collections: + _, app, namespace, index = collection.split('_') + if app != app_id: + continue + metadata = IndexMetadata(app, namespace, index) + indexes_metadata.append(metadata) + return indexes_metadata + async def index_documents(self, app_id, namespace, index_name, documents): """ Puts specified documents into the index (asynchronously). diff --git a/SearchService2/setup.py b/SearchService2/setup.py index 393060cfe4..e628d36a2a 100644 --- a/SearchService2/setup.py +++ b/SearchService2/setup.py @@ -34,5 +34,8 @@ include_package_data=True, entry_points={'console_scripts': [ 'appscale-search2=appscale.search.search_server:main', + 'appscale-search2-reindex=appscale.search.scripts:reindex', + 'list-solr-collections=appscale.search.scripts:list_solr_collections', + 'delete-solr-collection=appscale.search.scripts:delete_solr_collection', ]} ) From 7a90837870273c84aba335409267e1d7d5a04127 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Mon, 29 Jul 2019 17:26:13 +0300 Subject: [PATCH 033/221] Use asyncio run_until_complete in scripts --- SearchService2/appscale/search/api_methods.py | 2 +- SearchService2/appscale/search/scripts.py | 23 ++++--------------- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/SearchService2/appscale/search/api_methods.py b/SearchService2/appscale/search/api_methods.py index 47395987ad..bb2ff8b0a0 100644 --- a/SearchService2/appscale/search/api_methods.py +++ b/SearchService2/appscale/search/api_methods.py @@ -100,7 +100,7 @@ async def list_indexes(self, list_indexes_request, list_indexes_response): list_indexes_response: A search_pb2.ListIndexesResponse. """ app_id = list_indexes_request.app_id.decode('utf-8') - # TODO list indexes params should be proceeded + # TODO list indexes params should be processed # params = list_indexes_request.params indexes_metadata = await self.solr_adapter.list_indexes(app_id) list_indexes_response.status.code = search_pb2.SearchServiceError.OK diff --git a/SearchService2/appscale/search/scripts.py b/SearchService2/appscale/search/scripts.py index 35ed8c4f12..b326bf5f41 100644 --- a/SearchService2/appscale/search/scripts.py +++ b/SearchService2/appscale/search/scripts.py @@ -2,6 +2,7 @@ This module provides helper scripts for managing Solr collections. """ import argparse +import asyncio import logging import sys @@ -50,12 +51,8 @@ async def list_collections(): logger.warning('Broken collections:\n {}'.format('\n '.join(broken))) except (SolrServerError, SolrClientError) as err: logger.error('Failed to list Solr collections ({}).'.format(err)) - io_loop.stop() - io_loop.close() - io_loop = ioloop.IOLoop.current() - io_loop.spawn_callback(list_collections) - io_loop.start() + asyncio.get_event_loop().run_until_complete(list_collections()) def delete_solr_collection(): @@ -93,13 +90,9 @@ async def delete_collection(): await adapter.solr.delete_collection(args.collection) except (SolrServerError, SolrClientError) as err: logger.error('Failed to delete Solr collection ({}).'.format(err)) - io_loop.stop() - io_loop.close() - - io_loop = ioloop.IOLoop.current() if args.no_prompt: - io_loop.spawn_callback(delete_collection) + asyncio.get_event_loop().run_until_complete(delete_collection()) else: answer = input('Type collection name to confirm you want to delete it: ' @@ -107,9 +100,7 @@ async def delete_collection(): if answer.strip() != args.collection: logger.error('Collection deletion was not confirmed') sys.exit(1) - io_loop.spawn_callback(delete_collection) - - io_loop.start() + asyncio.get_event_loop().run_until_complete(delete_collection()) def reindex(): @@ -176,9 +167,5 @@ async def reindex_documents(): continue logger.info('Successfully reindexed {} documents'.format(total)) - io_loop.stop() - io_loop.close() - io_loop = ioloop.IOLoop.current() - io_loop.spawn_callback(reindex_documents) - io_loop.start() + asyncio.get_event_loop().run_until_complete(reindex_documents()) From a77cc9670960d0d658db813d92c319b9842a54f4 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 31 Jul 2019 16:25:57 -0700 Subject: [PATCH 034/221] AppController firewall configuration disabled via APPSCALE_FIREWALL env --- AppController/djinn.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index b0ee6a2f74..ea9606f0f7 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -290,7 +290,7 @@ class Djinn # A boolean that indicates whether or not we should turn the firewall on, # and continuously keep it on. Should definitely be on for releases, and # on whenever possible. - FIREWALL_IS_ON = true + FIREWALL_IS_ON = 'true' == (ENV['APPSCALE_FIREWALL'] || 'true') # The location on the local filesystem where AppScale-related configuration # files are written to. From 5ecda419243dfad028f54ff1987b106e48b477e0 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 12 Mar 2019 23:12:45 -0700 Subject: [PATCH 035/221] Deployment open port check should not use login ip --- AdminServer/appscale/admin/__init__.py | 38 ++++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py index 2b9f43244e..011e428886 100644 --- a/AdminServer/appscale/admin/__init__.py +++ b/AdminServer/appscale/admin/__init__.py @@ -98,30 +98,32 @@ def wait_for_port_to_open(http_port, operation_id, deadline): except KeyError: raise OperationTimeout('Operation no longer in cache') + all_lbs = set(appscale_info.get_load_balancer_ips()) + passed_lbs = set() while True: - if time.time() > deadline: - message = 'Deploy operation took too long.' - operation.set_error(message) - raise OperationTimeout(message) - - if utils.port_is_open(options.login_ip, http_port): - break + for load_balancer in all_lbs: + if load_balancer in passed_lbs or time.time() > deadline: + continue - yield gen.sleep(1) + if utils.port_is_open(load_balancer, http_port): + passed_lbs.add(load_balancer) - for load_balancer in appscale_info.get_load_balancer_ips(): - while True: - if time.time() > deadline: - # The version is reachable from the login IP, but it's not reachable - # from every registered load balancer. It makes more sense to mark the - # operation as a success than a failure because the lagging load - # balancers should eventually reflect the registered instances. - break + if len(passed_lbs) == len(all_lbs): + break - if utils.port_is_open(load_balancer, http_port): + if time.time() > deadline: + # If the version is reachable, but it's not reachable from every + # registered load balancer. It makes more sense to mark the + # operation as a success than a failure because the lagging load + # balancers should eventually reflect the registered instances. + if not passed_lbs: + message = 'Deploy operation took too long.' + operation.set_error(message) + raise OperationTimeout(message) + else: break - yield gen.sleep(1) + yield gen.sleep(1) @gen.coroutine From 1e630828010a11fdbb26700f2ffa1ea102fa9012 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 5 Aug 2019 10:05:26 -0700 Subject: [PATCH 036/221] Remove incorrect argument from get_entry call --- AppDB/appscale/datastore/fdb/indexes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index c515d48db8..9dca5567d5 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -1358,7 +1358,7 @@ def update_composite_index(self, project_id, index_pb, cursor=(None, None)): while True: results, more_results = yield result_iterator.next_page() index_entries = [kind_index.decode(result) for result in results] - version_entries = yield [self._data_manager.get_entry(self, tr, entry) + version_entries = yield [self._data_manager.get_entry(tr, entry) for entry in index_entries] for index_entry, version_entry in zip(index_entries, version_entries): new_keys = composite_index.encode_keys( From 483a0e843e57fd098e2383ea539157a921c4b287 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 5 Aug 2019 10:30:23 -0700 Subject: [PATCH 037/221] Add string representation of VersionEntry --- AppDB/appscale/datastore/fdb/data.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index 933d77c6f2..670f013304 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -43,6 +43,17 @@ def __init__(self, project_id, namespace, path, commit_versionstamp=None, self._encoded_entity = encoded_entity self._decoded_entity = None + def __repr__(self): + # Since the encoded entity can be large, it probably does not make sense to + # include it in the string representation. + blob_repr = self._encoded_entity + if blob_repr is not None: + blob_repr = u''.format(len(blob_repr)) + + return u'VersionEntry({!r}, {!r}, {!r}, {!r}, {!r}, {})'.format( + self.project_id, self.namespace, self.path, self.commit_versionstamp, + self.version, blob_repr) + @property def present(self): return self.commit_versionstamp is not None From 39a0268eefeac53ca6da051c07f0ce358ad3dedd Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Thu, 8 Aug 2019 14:16:23 +0300 Subject: [PATCH 038/221] Code refactoring --- .../appscale/taskqueue/distributed_tq.py | 9 ++++---- .../taskqueue/protocols/compile_protocols.sh | 6 +++--- .../protocols/taskqueue_service.proto | 2 +- .../appscale/taskqueue/push_worker.py | 21 ++++--------------- .../test/helpers/restart-taskqueue.sh | 13 ------------ debian/appscale_install_functions.sh | 13 ------------ debian/control.bionic | 1 + debian/control.jessie | 1 + debian/control.stretch | 1 + debian/control.xenial | 1 + 10 files changed, 17 insertions(+), 51 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/distributed_tq.py b/AppTaskQueue/appscale/taskqueue/distributed_tq.py index 0478fdcbda..b39b85b88c 100644 --- a/AppTaskQueue/appscale/taskqueue/distributed_tq.py +++ b/AppTaskQueue/appscale/taskqueue/distributed_tq.py @@ -381,9 +381,11 @@ def __bulk_add(self, source_info, request, response): if add_request.HasField("task_name"): task_name = add_request.task_name - namespaced_name = choose_task_name(add_request.app_id, - add_request.queue_name, - user_chosen=task_name) + namespaced_name = choose_task_name( + add_request.app_id.decode('utf-8'), + add_request.queue_name.decode('utf-8'), + user_chosen=task_name.decode('utf-8')) + add_request.task_name = namespaced_name.encode('utf-8') # Wait until the result is known before setting the chosen task name on @@ -491,7 +493,6 @@ def __check_and_store_task_names(self, request): An ApplicationError of TASK_ALREADY_EXISTS. """ task_name = request.task_name.decode('utf-8') - task_name = task_name.replace("b'", "").replace("'", "") try: item = self.__get_task_name(request.app_id, task_name) diff --git a/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh b/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh index d35bce75a4..c91160d546 100755 --- a/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh +++ b/AppTaskQueue/appscale/taskqueue/protocols/compile_protocols.sh @@ -5,6 +5,7 @@ set -e PROTOCOLS_DIR="$( realpath --strip "$( dirname "${BASH_SOURCE[0]}" )" )" +TASKQUEUE_DIR="$( dirname "$( dirname "$( dirname "${PROTOCOLS_DIR}" )" )" )" if ! protoc --version | grep -E ' (3\.)|(2\.)' ; then echo "Couldn't compile *.proto files because protoc version 3 was not found." @@ -13,8 +14,7 @@ fi echo "Compiling Protocol buffer *.proto files.." -(cd "${PROTOCOLS_DIR}" && \ - protoc --python_out=./ *.proto && \ - sed -i -E 's/^import.*_pb2/from . \0/' *.py) +(cd "${TASKQUEUE_DIR}" && \ + protoc --python_out=. appscale/taskqueue/protocols/*.proto) echo "Protocols have been successfully compiled." diff --git a/AppTaskQueue/appscale/taskqueue/protocols/taskqueue_service.proto b/AppTaskQueue/appscale/taskqueue/protocols/taskqueue_service.proto index 7dfa18e71a..8917435c64 100644 --- a/AppTaskQueue/appscale/taskqueue/protocols/taskqueue_service.proto +++ b/AppTaskQueue/appscale/taskqueue/protocols/taskqueue_service.proto @@ -1,7 +1,7 @@ syntax = "proto2"; option go_package = "taskqueue"; -import "datastore_v3.proto"; +import "appscale/taskqueue/protocols/datastore_v3.proto"; package appengine; diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index 18427db34e..16d88699de 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -115,8 +115,7 @@ def execute_task(task, headers, args): """ start_time = datetime.datetime.utcnow() - args['task_name'] = clean_task_name(args['task_name']) - headers['X-AppEngine-TaskName'] = clean_task_name(headers['X-AppEngine-TaskName']) + task_name = clean_task_name(args['task_name']) content_length = len(args['body']) @@ -146,7 +145,7 @@ def execute_task(task, headers, args): args['task_name'], task.request.id, args['expires'])) celery.control.revoke(task.request.id) - update_task(args['task_name'], TASK_STATES.EXPIRED) + update_task(task_name, TASK_STATES.EXPIRED) return if (args['max_retries'] != 0 and @@ -156,7 +155,7 @@ def execute_task(task, headers, args): args['max_retries'])) celery.control.revoke(task.request.id) - update_task(args['task_name'], TASK_STATES.FAILED) + update_task(task_name, TASK_STATES.FAILED) return # Targets do not get X-Forwarded-Proto from nginx, they use haproxy port. @@ -214,14 +213,11 @@ def execute_task(task, headers, args): logger.warning( '{task} failed before receiving response. It will retry in {wait} ' 'seconds.'.format(task=args['task_name'], wait=wait_time)) - args['task_name'] = args['task_name'].encode('utf-8') - headers['X-AppEngine-TaskName'] = \ - headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) if 200 <= response.status < 300: # Task successful. - update_task(args['task_name'], TASK_STATES.SUCCESS) + update_task(task_name, TASK_STATES.SUCCESS) time_elapsed = datetime.datetime.utcnow() - start_time logger.info( @@ -236,9 +232,6 @@ def execute_task(task, headers, args): args['task_name'], redirect_url)) url = urlparse(redirect_url) if redirects_left == 0: - args['task_name'] = args['task_name'].encode('utf-8') - headers['X-AppEngine-TaskName'] = \ - headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) redirects_left -= 1 else: @@ -247,18 +240,12 @@ def execute_task(task, headers, args): task=args['task_name'], wait=wait_time)) logger.warning(message) - args['task_name'] = args['task_name'].encode('utf-8') - headers['X-AppEngine-TaskName'] = \ - headers['X-AppEngine-TaskName'].encode('utf-8') raise task.retry(countdown=wait_time) except EventletTimeout as thrown_timeout: if thrown_timeout != timeout: raise logger.exception('Task {} timed out. Retrying.'.format(args['task_name'])) - args['task_name'] = args['task_name'].encode('utf-8') - headers['X-AppEngine-TaskName'] = \ - headers['X-AppEngine-TaskName'].encode('utf-8') # This could probably be calculated, but for now, just retry immediately. raise task.retry(countdown=0) finally: diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index 5ef03c43d5..4893cbb902 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -84,19 +84,6 @@ if [ -z ${PORTS} ] || [ -z ${DB_IP} ] || [ -z ${ZK_IP} ] || [ -z ${LB_IP} ]; the fi if [ ! -z ${TQ_SOURCE_DIR} ]; then - echo "Installing python3-venv" - attempt=1 - while ! (yes | apt-get install python3-venv) - do - if (( attempt > 15 )); then - log "Failed to install python3-venv after ${attempt} attempts" "ERROR" - exit 1 - fi - log "Failed to install python3-venv. Retrying." "WARNING" - ((attempt++)) - sleep ${attempt} - done - rm -rf /opt/appscale_venvs/appscale_taskqueue/ python3 -m venv /opt/appscale_venvs/appscale_taskqueue/ diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 553d51937d..2f1cf073f0 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -624,19 +624,6 @@ installinfrastructuremanager() installtaskqueue() { - echo "Installing python3-venv" - attempt=1 - while ! (yes | apt-get install python3-venv) - do - if (( attempt > 15 )); then - echo "Failed to install python3-venv after ${attempt} attempts" "ERROR" - exit 1 - fi - echo "Failed to install python3-venv. Retrying." "WARNING" - ((attempt++)) - sleep ${attempt} - done - rm -rf /opt/appscale_venvs/appscale_taskqueue/ python3 -m venv /opt/appscale_venvs/appscale_taskqueue/ diff --git a/debian/control.bionic b/debian/control.bionic index d5ba2cfe0f..4ed1d894fe 100644 --- a/debian/control.bionic +++ b/debian/control.bionic @@ -86,6 +86,7 @@ Depends: appscale-tools, python-virtualenv, python-xmpp, python-yaml, + python3-venv, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.jessie b/debian/control.jessie index 9da97d46d0..4c7f5f87c2 100644 --- a/debian/control.jessie +++ b/debian/control.jessie @@ -82,6 +82,7 @@ Depends: appscale-tools, python-twisted, python-xmpp, python-yaml, + python3-venv, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.stretch b/debian/control.stretch index c061c1c0f1..582a23e054 100644 --- a/debian/control.stretch +++ b/debian/control.stretch @@ -87,6 +87,7 @@ Depends: appscale-tools, python-virtualenv, python-xmpp, python-yaml, + python3-venv, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.xenial b/debian/control.xenial index 0d18cf72e8..057de390ef 100644 --- a/debian/control.xenial +++ b/debian/control.xenial @@ -87,6 +87,7 @@ Depends: appscale-tools, python-twisted, python-virtualenv, python-xmpp, + python3-venv, rabbitmq-server, rsync, rsyslog, From 7727ca5e662192dc7e4b7a689ab15130d19d7bbf Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 8 Aug 2019 17:05:45 +0300 Subject: [PATCH 039/221] Make up help messages for cmdline commands --- SearchService2/appscale/search/scripts.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/SearchService2/appscale/search/scripts.py b/SearchService2/appscale/search/scripts.py index b326bf5f41..1e46820bac 100644 --- a/SearchService2/appscale/search/scripts.py +++ b/SearchService2/appscale/search/scripts.py @@ -10,7 +10,7 @@ from appscale.search.constants import SolrServerError, SolrClientError from kazoo.client import KazooClient from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS -from tornado import ioloop, gen +from tornado import gen from appscale.search import solr_adapter @@ -24,8 +24,7 @@ def list_solr_collections(): parser = argparse.ArgumentParser() parser.add_argument( - '-v', '--verbose', action='store_true', - help='Output debug-level logging') + '-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument( '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') args = parser.parse_args() @@ -62,15 +61,13 @@ def delete_solr_collection(): parser = argparse.ArgumentParser() parser.add_argument( - '-v', '--verbose', action='store_true', - help='Output debug-level logging') + '-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument( - '--no-prompt', action='store_true', - help='') + '--no-prompt', action='store_true', help='Do not ask for confirmation') parser.add_argument( '--zk-locations', required=True, nargs='+', help='ZooKeeper location(s)') parser.add_argument( - '--collection', required=True, help='ZooKeeper location(s)') + '--collection', required=True, help='Collection name to delete') args = parser.parse_args() if args.verbose: @@ -111,8 +108,7 @@ def reindex(): parser = argparse.ArgumentParser() parser.add_argument( - '-v', '--verbose', action='store_true', - help='Output debug-level logging') + '-v', '--verbose', action='store_true', help='Output debug-level logging') parser.add_argument( '--project', required=True, help='The name of GAE project') parser.add_argument( From 0ef5259bd126f3c1d6f7085cd5820e82e61f361c Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Fri, 9 Aug 2019 15:33:54 +0300 Subject: [PATCH 040/221] Make more accurate work with task names in push_worker module --- AppTaskQueue/appscale/taskqueue/push_worker.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index 16d88699de..b6ff31d879 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -39,10 +39,6 @@ logger.setLevel(logging.INFO) -def clean_task_name(task_name): - return task_name.decode('utf-8').replace("b'", "").replace("'", "") - - def get_wait_time(retries, args): """ Calculates how long we should wait to execute a failed task, based on how many times it's failed in the past. @@ -115,7 +111,7 @@ def execute_task(task, headers, args): """ start_time = datetime.datetime.utcnow() - task_name = clean_task_name(args['task_name']) + task_name = args['task_name'].decode('utf-8') content_length = len(args['body']) From 5fa5592829fba78f85b271146b06809afeeb60e0 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 9 Aug 2019 15:49:06 +0300 Subject: [PATCH 041/221] Use consistent scripts naming (appscale-...) --- SearchService2/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SearchService2/setup.py b/SearchService2/setup.py index e628d36a2a..217bb97bdf 100644 --- a/SearchService2/setup.py +++ b/SearchService2/setup.py @@ -35,7 +35,7 @@ entry_points={'console_scripts': [ 'appscale-search2=appscale.search.search_server:main', 'appscale-search2-reindex=appscale.search.scripts:reindex', - 'list-solr-collections=appscale.search.scripts:list_solr_collections', - 'delete-solr-collection=appscale.search.scripts:delete_solr_collection', + 'appscale-list-solr-collections=appscale.search.scripts:list_solr_collections', + 'appscale-delete-solr-collection=appscale.search.scripts:delete_solr_collection', ]} ) From a98b42e04c804962c594deeedb75034d4d37fba9 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 13 Aug 2019 17:54:35 +0300 Subject: [PATCH 042/221] Start bootstrap refactoring --- bootstrap-upgrade.sh | 313 ++++++++++++++++++++++++++++++++++ bootstrap.sh | 394 ++++++++++++++----------------------------- 2 files changed, 438 insertions(+), 269 deletions(-) create mode 100755 bootstrap-upgrade.sh diff --git a/bootstrap-upgrade.sh b/bootstrap-upgrade.sh new file mode 100755 index 0000000000..b24c3f2084 --- /dev/null +++ b/bootstrap-upgrade.sh @@ -0,0 +1,313 @@ +#!/bin/bash +# +# Simple script to install AppScale and tools from the master branch +# Author: AppScale Team + +set -e + +APPSCALE_REPO="git://github.com/AppScale/appscale.git" +APPSCALE_TOOLS_REPO="git://github.com/AppScale/appscale-tools.git" +AGENTS_REPO="git://github.com/AppScale/appscale-agents.git" +THIRDPARTIES_REPO="git://github.com/AppScale/appscale-thirdparties.git" +GIT_TAG="last" + +usage() { + echo "Usage: ${0} [--tag ]" + echo + echo "Options:" + echo " --tag Git tag (e.g.: 3.7.2) to upgrade to." + echo " Default: '${GIT_TAG}' (use the latest release)." + exit 1 +} + + +echo -n "Checking to make sure you are root..." +if [ "$(id -u)" != "0" ]; then + echo "Failed" 1>&2 + exit 1 +fi +echo "Success" + +echo -n "Checking to make sure \$HOME is /root..." +if [ "$HOME" != "/root" ]; then + echo "Failed" + exit 1 +fi +echo "Success" + +# Let's get the command line argument. +while [ $# -gt 0 ]; do + if [ "${1}" = "--tag" ]; then + shift; if [ -z "${1}" ]; then usage; fi + GIT_TAG="${1}"; + shift; continue + fi + usage +done + +declare -A REPOS=( + ["appscale"]="${APPSCALE_REPO}" + ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" + ["appscale-agents"]="${AGENTS_REPO}" + ["appscale-thirdparties"]="${THIRDPARTIES_REPO}" +) + +# At this time we expect to be installed in $HOME. +cd $HOME + +echo "Ensuring all appscale repos are cloned" +for repo_name in "${!REPOS[@]}"; do + if [ ! -d "/root/${repo_name}" ]; then + git clone ${REPOS[$repo_name]} ${repo_name} + else + cd ${repo_name} + git remote + cd .. + fi +done + +# TODO if tag is "last", firstly identify what tag is actually latest. + +# Let's pull the github repositories. +echo +echo "Will be using the following github repos:" +echo "Repo: ${APPSCALE_REPO} Tag ${GIT_TAG}" +echo "Repo: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" +echo "Repo: ${AGENTS_REPO} Tag ${GIT_TAG}" +echo "Repo: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" +echo "Exit now (ctrl-c) if this is incorrect" +echo + +sleep 5 + +# Wait up to 30 seconds for the package lists lock to become available. +lock_wait_start=$(date +%s) +printed_status=false +while fuser /var/lib/apt/lists/lock; do + if [ "${printed_status}" = false ]; then + echo "Waiting for another process to update package lists" + printed_status=true + fi + current_time=$(date +%s) + elapsed_time=$((current_time - lock_wait_start)) + if [ "${elapsed_time}" -gt 30 ]; then break; fi + sleep 1 +done +apt-get update + +# Wait up to 2 min for the dpkg lock to become available. +lock_wait_start=$(date +%s) +printed_status=false +while fuser /var/lib/dpkg/lock; do + if [ "${printed_status}" = false ]; then + echo "Waiting for another process to update packages" + printed_status=true + fi + current_time=$(date +%s) + elapsed_time=$((current_time - lock_wait_start)) + if [ "${elapsed_time}" -gt 120 ]; then break; fi + sleep 1 +done +apt-get install -y git + +# TODO TODO TODO TODO TODO TODO TODO TODO TODO + +if [ ! -d appscale ]; then + # We split the commands, to ensure it fails if branch doesn't + # exists (Precise git will not fail otherwise). + git clone ${APPSCALE_REPO} appscale + git clone ${APPSCALE_TOOLS_REPO} appscale-tools + git clone ${AGENTS_REPO} appscale-agents + git clone ${THIRDPARTIES_REPO} appscale-thirdparties + + if [ "$GIT_TAG" = "last" ]; then + GIT_TAG="$(cd appscale; git tag | tail -n 1)" + fi + (cd appscale; git checkout "$GIT_TAG") + (cd appscale-tools; git checkout "$GIT_TAG") + (cd appscale-agents; git checkout "$GIT_TAG") + (cd appscale-thirdparties; git checkout "$GIT_TAG") +fi + +# Since the last step in appscale_build.sh is to create the certs directory, +# its existence indicates that appscale has already been installed. +if [ -d /etc/appscale/certs ]; then + UPDATE_REPO="Y" + + # For upgrade, we don't switch across branches. + if [ "${TAG_PARAM_SPECIFIED}" = "N" ]; then + echo "Can't use custom branches when upgrading existing installation." + echo "Use for example '--tag last' or '--tag 3.8.0' instead." + exit 1 + fi + + APPSCALE_MAJOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\1/gp' appscale/VERSION)" + APPSCALE_MINOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\2/gp' appscale/VERSION)" + if [ -z "$APPSCALE_MAJOR" -o -z "$APPSCALE_MINOR" ]; then + echo "Cannot determine version of AppScale!" + exit 1 + fi + + # This is an upgrade, so let's make sure we use a tag that has + # been passed, or the last one available. Let's fetch all the + # available tags first. + (cd appscale; git fetch ${APPSCALE_REPO} -t) + (cd appscale-tools; git fetch ${APPSCALE_TOOLS_REPO} -t) + (cd appscale-agents; git fetch ${AGENTS_REPO} -t) + (cd appscale-thirdparties; git fetch ${THIRDPARTIES_REPO} -t) + + if [ "$GIT_TAG" = "last" ]; then + GIT_TAG="$(cd appscale; git tag | tail -n 1)" + else + # Make sure we have this tag in the official repo. + if ! (cd appscale; git ls-remote --tags ${APPSCALE_REPO} | grep -F $GIT_TAG > /dev/null) ; then + echo "\"$GIT_TAG\" not recognized: use --tag to specify tag to upgrade to." + exit 1 + fi + fi + + # We can pull a tag only if we are on the master branch. + CURRENT_BRANCH="$(cd appscale; git branch --no-color | grep '^*' | cut -f 2 -d ' ')" + if [ "${CURRENT_BRANCH}" != "master" ] && \ + (cd appscale; git tag -l | grep $(git describe)) ; then + CURRENT_BRANCH="$(cd appscale; git tag -l | grep $(git describe))" + if [ "${CURRENT_BRANCH}" = "${GIT_TAG}" ]; then + echo "AppScale repository is already at the"\ + "specified release. Building with current code." + UPDATE_REPO="N" + fi + fi + + # If CURRENT_BRANCH is empty, then we are not on master, and we + # are not on a released version: we don't upgrade then. + if [ -z "${CURRENT_BRANCH}" ]; then + echo "Error: git repository is not 'master' or a released version." + exit 1 + fi + + # Make sure AppScale is not running. + MONIT=$(which monit) + if $MONIT summary | grep controller > /dev/null ; then + echo "AppScale is still running: please stop it" + [ "${FORCE_UPGRADE}" = "Y" ] || exit 1 + elif echo $MONIT | grep local > /dev/null ; then + # AppScale is not running but there is a monit + # leftover from the custom install. + $MONIT quit + fi + + # Let's keep a copy of the old config: we need to move it to avoid + # questions from dpkg. + if [ -e /etc/haproxy/haproxy.cfg ]; then + mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old + fi + + # Remove outdated appscale-controller and appscale-progenitor. + if [ $APPSCALE_MAJOR -le 2 -a $APPSCALE_MINOR -le 2 ]; then + rm -f /etc/init.d/appscale-controller + rm -f /etc/init.d/appscale-progenitor + update-rc.d -f appscale-progenitor remove || true + fi + + # Remove control files we added before 1.14, and re-add the + # default ones. + if [ $APPSCALE_MAJOR -le 1 -a $APPSCALE_MINOR -le 14 ]; then + rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc + if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then + apt-get -o DPkg::Options::="--force-confmiss" --reinstall install haproxy + fi + if dpkg-query -l monit > /dev/null 2> /dev/null ; then + apt-get -o DPkg::Options::="--force-confmiss" --reinstall install monit + fi + fi + + + if [ "${UPDATE_REPO}" = "Y" ]; then + echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR."\ + "An upgrade to the latest version available will be"\ + "attempted in 5 seconds." + sleep 5 + + # Upgrade the repository. If GIT_TAG is empty, we are on HEAD. + if [ -n "${GIT_TAG}" ]; then + if ! (cd appscale; git checkout "$GIT_TAG"); then + echo "Please stash your local unsaved changes and checkout"\ + "the version of AppScale you are currently using to fix"\ + "this error." + echo "e.g.: git stash; git checkout " + exit 1 + fi + + if ! (cd appscale-tools; git checkout "$GIT_TAG"); then + echo "Please stash your local unsaved changes and checkout"\ + "the version of appscale-tools you are currently using"\ + "to fix this error." + echo "e.g.: git stash; git checkout " + exit 1 + fi + elif [ "${FORCE_UPGRADE}" = "N" ]; then + # TODO is it master? + (cd appscale; git pull) + (cd appscale-tools; git pull) + (cd appscale-agents; git pull) + (cd appscale-thirdparties; git pull) + else + RANDOM_KEY="$(echo $(date), $$|md5sum|head -c 6)-$(date +%s)" + REMOTE_REPO_NAME="appscale-bootstrap-${RANDOM_KEY}" + if ! (cd appscale; + git remote add -t "${APPSCALE_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_REPO}"; + git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_BRANCH}"); then + echo "Please make sure the repository url is correct, the"\ + "branch exists, and that you have stashed your local"\ + "changes." + echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ + "{repo_name} {repository_url}; git checkout"\ + "{repo_name}/{remote_branch}" + exit 1 + fi + if ! (cd appscale-tools; + git remote add -t "${APPSCALE_TOOLS_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_TOOLS_REPO}"; + git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_TOOLS_BRANCH}"); then + echo "Please make sure the repository url is correct, the"\ + "branch exists, and that you have stashed your local"\ + "changes." + echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ + "{repo_name} {repository_url}; git checkout"\ + "{repo_name}/{remote_branch}" + exit 1 + fi + fi + fi +fi + +echo -n "Building AppScale..." +if ! (cd appscale/debian; bash appscale_build.sh) ; then + echo "failed!" + exit 1 +fi + +echo -n "Installing AppScale Agents..." +if ! (cd appscale-agents/; make install-no-venv) ; then + echo "Failed to install AppScale Agents" + exit 1 +fi + +echo -n "Building AppScale Tools..." +if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then + echo "failed!" + exit 1 +fi + +echo -n "Downloading Thirdparty artifacts..." +if ! (cd appscale-thirdparties/; bash download_all_artifacts.sh) ; then + echo "failed!" + exit 1 +fi + +# Let's source the profiles so this image can be used right away. +. /etc/profile.d/appscale.sh + +echo "*****************************************" +echo "AppScale and AppScale tools are installed" +echo "*****************************************" +exit 0 diff --git a/bootstrap.sh b/bootstrap.sh index 3fdc62264a..35d6821e48 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -5,28 +5,42 @@ set -e -# Defaults values for repositories and branches. +# Defaults values for script parameters. APPSCALE_REPO="git://github.com/AppScale/appscale.git" APPSCALE_TOOLS_REPO="git://github.com/AppScale/appscale-tools.git" AGENTS_REPO="git://github.com/AppScale/appscale-agents.git" +THIRDPARTIES_REPO="git://github.com/AppScale/appscale-thirdparties.git" APPSCALE_BRANCH="master" APPSCALE_TOOLS_BRANCH="master" AGENTS_BRANCH="master" -FORCE_UPGRADE="N" -UNIT_TEST="n" -GIT_TAG="" +THIRDPARTIES_BRANCH="master" +GIT_TAG="last" + +BRANCH_PARAM_SPECIFIED="N" +TAG_PARAM_SPECIFIED="N" usage() { - echo "Usage: ${0} [--repo ][--tools-repo ][-t]" + echo "Usage: ${0} [--repo ] [--tools-repo ]" + echo " [--tools-repo ] [--tools-branch ]" + echo " [--agents-repo ] [--agents-branch ]" + echo " [--thirdparties-repo ] [--thirdparties-branch ]" + echo " [--tag ]" + echo + echo "Be aware that tag parameter has priority over repo and branch parameters." + echo "So if no tag, repos and branches are specified, tag 'last' will be used." + echo "If you want to bootstrap using master branches of all repos, specify '--tag dev'" echo echo "Options:" - echo " --repo Specify appscale repo (default $APPSCALE_REPO)" - echo " --branch Specify appscale branch (default $APPSCALE_BRANCH)" - echo " --tools-repo Specify appscale-tools repo (default $APPSCALE_TOOLS_REPO" - echo " --tools-branch Specify appscale-tools branch (default $APPSCALE_TOOLS_BRANCH)" - echo " --force-upgrade Force upgrade even if some check fails." - echo " --tag Use git tag (ie 2.2.0) or 'last' to use the latest release or 'dev' for HEAD" - echo " -t Run unit tests" + echo " --repo Specify appscale repo (default $APPSCALE_REPO)" + echo " --branch Specify appscale branch (default $APPSCALE_BRANCH)" + echo " --tools-repo Specify appscale-tools repo (default $APPSCALE_TOOLS_REPO" + echo " --tools-branch Specify appscale-tools branch (default $APPSCALE_TOOLS_BRANCH)" + echo " --agents-repo Specify appscale-agents repo (default $AGENTS_REPO" + echo " --agents-branch Specify appscale-agents branch (default $AGENTS_BRANCH)" + echo " --thirdparties-repo Specify appscale-thirdparties repo (default $THIRDPARTIES_REPO" + echo " --thirdparties-branch Specify appscale-thirdparties branch (default $THIRDPARTIES_BRANCH)" + echo " --tag Use git tag (ie 3.7.2) or 'last' to use the latest release" + echo " or 'dev' for HEAD (default ${GIT_TAG})" exit 1 } @@ -45,109 +59,99 @@ if [ "$HOME" != "/root" ]; then fi echo "Success" -# Let's get the command line arguments. +# Let's get the command line arguments. while [ $# -gt 0 ]; do if [ "${1}" = "--repo" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - APPSCALE_REPO="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + APPSCALE_REPO="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi if [ "${1}" = "--branch" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - APPSCALE_BRANCH="${1}" - shift - continue - fi - if [ "${1}" = "--tag" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - GIT_TAG="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + APPSCALE_BRANCH="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi if [ "${1}" = "--tools-repo" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - APPSCALE_TOOLS_REPO="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + APPSCALE_TOOLS_REPO="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi if [ "${1}" = "--tools-branch" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - APPSCALE_TOOLS_BRANCH="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + APPSCALE_TOOLS_BRANCH="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi if [ "${1}" = "--agents-repo" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - AGENTS_REPO="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + AGENTS_REPO="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi if [ "${1}" = "--agents-branch" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - AGENTS_BRANCH="${1}" - shift - continue + shift; if [ -z "${1}" ]; then usage; fi + AGENTS_BRANCH="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi - if [ "${1}" = "--force-upgrade" ]; then - FORCE_UPGRADE="Y" - shift - continue + if [ "${1}" = "--thirdparties-repo" ]; then + shift; if [ -z "${1}" ]; then usage; fi + THIRDPARTIES_REPO="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue fi - if [ "${1}" = "-t" ]; then - UNIT_TEST="Y" - shift - continue + if [ "${1}" = "--thirdparties-branch" ]; then + shift; if [ -z "${1}" ]; then usage; fi + THIRDPARTIES_BRANCH="${1}"; BRANCH_PARAM_SPECIFIED="Y" + shift; continue + fi + if [ "${1}" = "--tag" ]; then + shift; if [ -z "${1}" ]; then usage; fi + GIT_TAG="${1}"; + if [${GIT_TAG} != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi + shift; continue fi usage done + # Empty tag means we use the latest available. -if [ -z "${GIT_TAG}" ]; then - GIT_TAG="last" -else - # We don't use Tag and Branch at the same time. - if [ "${FORCE_UPGRADE}" = "N" ] && [ "${APPSCALE_BRANCH}" != "master" ]; then - echo "--branch cannot be specified with --tag" - exit 1 - fi +if [ "${BRANCH_PARAM_SPECIFIED}" = "Y" ] \ + && [ "${TAG_PARAM_SPECIFIED}" = "Y" ] \ + && [ "${GIT_TAG}" != "dev" ]; then + echo "Repo/Branch parameters can't be used if --tag parameter is specified" + exit 1 fi -# A tag of 'dev' means don't use tag. -if [ "${GIT_TAG}" = "dev" ]; then - GIT_TAG="" -fi +declare -A REPOS=( + ["appscale"]="${APPSCALE_REPO}" + ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" + ["appscale-agents"]="${AGENTS_REPO}" + ["appscale-thirdparties"]="${THIRDPARTIES_REPO}" +) +declare -A BRANCHES=( + ["appscale"]="${APPSCALE_BRANCH}" + ["appscale-tools"]="${APPSCALE_TOOLS_BRANCH}" + ["appscale-agents"]="${AGENTS_BRANCH}" + ["appscale-thirdparties"]="${THIRDPARTIES_BRANCH}" +) # At this time we expect to be installed in $HOME. cd $HOME # Let's pull the github repositories. echo -echo "Will be using the following github repo:" -echo "Repo: ${APPSCALE_REPO} Branch: ${APPSCALE_BRANCH}" -echo "Repo: ${APPSCALE_TOOLS_REPO} Branch: ${APPSCALE_TOOLS_BRANCH}" -echo "Exit now (ctrl-c) if this is incorrect" +if [ "${TAG_PARAM_SPECIFIED}" = "Y" ]; then + echo "Will be using the following github repos:" + echo "Repo: ${APPSCALE_REPO} Tag ${GIT_TAG}" + echo "Repo: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" + echo "Repo: ${AGENTS_REPO} Tag ${GIT_TAG}" + echo "Repo: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" + echo "Exit now (ctrl-c) if this is incorrect" +else + echo "Will be using the following github repos:" + echo "Repo: ${APPSCALE_REPO} Branch: ${APPSCALE_BRANCH}" + echo "Repo: ${APPSCALE_TOOLS_REPO} Branch: ${APPSCALE_TOOLS_BRANCH}" + echo "Repo: ${AGENTS_REPO} Branch: ${AGENTS_BRANCH}" + echo "Repo: ${THIRDPARTIES_REPO} Branch: ${THIRDPARTIES_BRANCH}" + echo "Exit now (ctrl-c) if this is incorrect" +fi echo sleep 5 @@ -181,181 +185,43 @@ while fuser /var/lib/dpkg/lock; do sleep 1 done apt-get install -y git -if [ ! -d appscale ]; then - # We split the commands, to ensure it fails if branch doesn't - # exists (Precise git will not fail otherwise). - git clone ${APPSCALE_REPO} appscale - (cd appscale; git checkout ${APPSCALE_BRANCH}) - - git clone ${APPSCALE_TOOLS_REPO} appscale-tools - (cd appscale-tools; git checkout ${APPSCALE_TOOLS_BRANCH}) - git clone ${AGENTS_REPO} appscale-agents - (cd appscale-agents; git checkout ${AGENTS_BRANCH}) +APPSCALE_DIRS="\ + /root/appscale /root/appscale-tools /root/appscale-agents /root/appscale-thirdparties \ + /etc/appscale /opt/appscale /var/log/appscale /var/appscale /run/appscale" - - # Use tags if we specified it. - if [ -n "$GIT_TAG" ] && [ "${APPSCALE_BRANCH}" = "master" ]; then - if [ "$GIT_TAG" = "last" ]; then - GIT_TAG="$(cd appscale; git tag | tail -n 1)" - fi - (cd appscale; git checkout "$GIT_TAG") - (cd appscale-tools; git checkout "$GIT_TAG") - (cd appscale-agents; git checkout "$GIT_TAG") - fi -fi - -# Since the last step in appscale_build.sh is to create the certs directory, -# its existence indicates that appscale has already been installed. -if [ -d /etc/appscale/certs ]; then - UPDATE_REPO="Y" - - # For upgrade, we don't switch across branches. - if [ "${FORCE_UPGRADE}" = "N" ] && [ "${APPSCALE_BRANCH}" != "master" ]; then - echo "Cannot use --branch when upgrading" - exit 1 - fi - if [ "${FORCE_UPGRADE}" = "N" ] && [ "${APPSCALE_TOOLS_BRANCH}" != "master" ]; then - echo "Cannot use --tools-branch when upgrading" - exit 1 - fi - if [ "${FORCE_UPGRADE}" = "N" ] && [ -z "$GIT_TAG" ]; then - echo "Cannot use --tag dev when upgrading" - exit 1 +for appscale_presence_marker in ${APPSCALE_DIRS}; do + if [ -d ${appscale_presence_marker} ] ; then + echo "${appscale_presence_marker} already exists!" + echo "bootstrap.sh script should be used for initial installation only." + echo "Use bootstrap-upgrade.sh for upgrading existing deployment" + echo "It can be found here: https://raw.githubusercontent.com/AppScale/appscale/master/bootstrap-upgrade.sh." fi +done - APPSCALE_MAJOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\1/gp' appscale/VERSION)" - APPSCALE_MINOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\2/gp' appscale/VERSION)" - if [ -z "$APPSCALE_MAJOR" -o -z "$APPSCALE_MINOR" ]; then - echo "Cannot determine version of AppScale!" - exit 1 - fi - # This is an upgrade, so let's make sure we use a tag that has - # been passed, or the last one available. Let's fetch all the - # available tags first. - (cd appscale; git fetch ${APPSCALE_REPO} -t) - (cd appscale-tools; git fetch ${APPSCALE_TOOLS_REPO} -t) - (cd appscale-agents; git fetch ${AGENTS_REPO} -t) +echo "Cloning appscale repositories" +# We split the commands, to ensure it fails if branch doesn't +# exists (Precise git will not fail otherwise). +git clone ${APPSCALE_REPO} appscale +git clone ${APPSCALE_TOOLS_REPO} appscale-tools +git clone ${AGENTS_REPO} appscale-agents +git clone ${THIRDPARTIES_REPO} appscale-thirdparties +# Use tags if we specified it. +if [ "$TAG_PARAM_SPECIFIED" = "Y" ]; then if [ "$GIT_TAG" = "last" ]; then GIT_TAG="$(cd appscale; git tag | tail -n 1)" - # Make sure we have this tag in the official repo. - if ! git ls-remote --tags ${APPSCALE_REPO} | grep -F $GIT_TAG > /dev/null ; then - echo "\"$GIT_TAG\" not recognized: use --tag to specify tag to upgrade to." - exit 1 - fi - fi - - # We can pull a tag only if we are on the master branch. - CURRENT_BRANCH="$(cd appscale; git branch --no-color | grep '^*' | cut -f 2 -d ' ')" - if [ "${CURRENT_BRANCH}" != "master" ] && \ - (cd appscale; git tag -l | grep $(git describe)) ; then - CURRENT_BRANCH="$(cd appscale; git tag -l | grep $(git describe))" - if [ "${CURRENT_BRANCH}" = "${GIT_TAG}" ]; then - echo "AppScale repository is already at the"\ - "specified release. Building with current code." - UPDATE_REPO="N" - fi - fi - - # If CURRENT_BRANCH is empty, then we are not on master, and we - # are not on a released version: we don't upgrade then. - if [ "${FORCE_UPGRADE}" = "N" ] && [ -z "${CURRENT_BRANCH}" ]; then - echo "Error: git repository is not 'master' or a released version." - exit 1 - fi - - # Make sure AppScale is not running. - MONIT=$(which monit) - if $MONIT summary | grep controller > /dev/null ; then - echo "AppScale is still running: please stop it" - [ "${FORCE_UPGRADE}" = "Y" ] || exit 1 - elif echo $MONIT | grep local > /dev/null ; then - # AppScale is not running but there is a monit - # leftover from the custom install. - $MONIT quit - fi - - # Let's keep a copy of the old config: we need to move it to avoid - # questions from dpkg. - if [ -e /etc/haproxy/haproxy.cfg ]; then - mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old - fi - - # Remove outdated appscale-controller and appscale-progenitor. - if [ $APPSCALE_MAJOR -le 2 -a $APPSCALE_MINOR -le 2 ]; then - rm -f /etc/init.d/appscale-controller - rm -f /etc/init.d/appscale-progenitor - update-rc.d -f appscale-progenitor remove || true - fi - - # Remove control files we added before 1.14, and re-add the - # default ones. - if [ $APPSCALE_MAJOR -le 1 -a $APPSCALE_MINOR -le 14 ]; then - rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc - if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install haproxy - fi - if dpkg-query -l monit > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install monit - fi - fi - - - if [ "${UPDATE_REPO}" = "Y" ]; then - echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR."\ - "An upgrade to the latest version available will be"\ - "attempted in 5 seconds." - sleep 5 - - # Upgrade the repository. If GIT_TAG is empty, we are on HEAD. - if [ -n "${GIT_TAG}" ]; then - if ! (cd appscale; git checkout "$GIT_TAG"); then - echo "Please stash your local unsaved changes and checkout"\ - "the version of AppScale you are currently using to fix"\ - "this error." - echo "e.g.: git stash; git checkout " - exit 1 - fi - - if ! (cd appscale-tools; git checkout "$GIT_TAG"); then - echo "Please stash your local unsaved changes and checkout"\ - "the version of appscale-tools you are currently using"\ - "to fix this error." - echo "e.g.: git stash; git checkout " - exit 1 - fi - elif [ "${FORCE_UPGRADE}" = "N" ]; then - (cd appscale; git pull) - (cd appscale-tools; git pull) - else - RANDOM_KEY="$(echo $(date), $$|md5sum|head -c 6)-$(date +%s)" - REMOTE_REPO_NAME="appscale-bootstrap-${RANDOM_KEY}" - if ! (cd appscale; - git remote add -t "${APPSCALE_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_REPO}"; - git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_BRANCH}"); then - echo "Please make sure the repository url is correct, the"\ - "branch exists, and that you have stashed your local"\ - "changes." - echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ - "{repo_name} {repository_url}; git checkout"\ - "{repo_name}/{remote_branch}" - exit 1 - fi - if ! (cd appscale-tools; - git remote add -t "${APPSCALE_TOOLS_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_TOOLS_REPO}"; - git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_TOOLS_BRANCH}"); then - echo "Please make sure the repository url is correct, the"\ - "branch exists, and that you have stashed your local"\ - "changes." - echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ - "{repo_name} {repository_url}; git checkout"\ - "{repo_name}/{remote_branch}" - exit 1 - fi - fi fi + (cd appscale; git checkout "$GIT_TAG") + (cd appscale-tools; git checkout "$GIT_TAG") + (cd appscale-agents; git checkout "$GIT_TAG") + (cd appscale-thirdparties; git checkout "$GIT_TAG") +else + (cd appscale; git checkout ${APPSCALE_BRANCH}) + (cd appscale-tools; git checkout ${APPSCALE_TOOLS_BRANCH}) + (cd appscale-agents; git checkout ${AGENTS_BRANCH}) + (cd appscale-thirdparties; git checkout ${THIRDPARTIES_BRANCH}) fi echo -n "Building AppScale..." @@ -376,23 +242,13 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -# Run unit tests if asked. -if [ "$UNIT_TEST" = "Y" ]; then - echo "Running Unit tests" - (cd appscale; rake) - if [ $? -gt 0 ]; then - echo "Unit tests failed for appscale!" - exit 1 - fi - (cd appscale-tools; rake) - if [ $? -gt 0 ]; then - echo "Unit tests failed for appscale-tools!" - exit 1 - fi - echo "Unit tests complete" +echo -n "Downloading Thirdparty artifacts..." +if ! (cd appscale-thirdparties/; bash download_all_artifacts.sh) ; then + echo "failed!" + exit 1 fi -# Let's source the profles so this image can be used right away. +# Let's source the profiles so this image can be used right away. . /etc/profile.d/appscale.sh echo "*****************************************" From 989a0a754b5349c27a9cad0911e0131c4b0b1667 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Fri, 16 Aug 2019 19:14:21 -0700 Subject: [PATCH 043/221] Use log_fatal during log_and_crash Also use log_and_crash instead of abort() directly. --- AppController/lib/helperfunctions.rb | 2 +- AppController/lib/taskqueue.rb | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/AppController/lib/helperfunctions.rb b/AppController/lib/helperfunctions.rb index 7dfdd98fd4..b2f49c7e35 100644 --- a/AppController/lib/helperfunctions.rb +++ b/AppController/lib/helperfunctions.rb @@ -914,7 +914,7 @@ def self.log_and_crash(message, sleep = nil) write_file(APPCONTROLLER_CRASHLOG_LOCATION, Time.new.to_s + ': ' + message) # Try to also log to the normal log file. - Djinn.log_error("FATAL: #{message}") + Djinn.log_fatal("#{message}") # If asked for, wait for a while before crashing. This will help the # tools to collect the status report or crashlog. diff --git a/AppController/lib/taskqueue.rb b/AppController/lib/taskqueue.rb index a4abccf02f..e6c5a6fff8 100755 --- a/AppController/lib/taskqueue.rb +++ b/AppController/lib/taskqueue.rb @@ -216,10 +216,7 @@ def self.start_slave(master_ip, clear_data, verbose) Djinn.log_run("ps ax | grep rabbit | grep -v grep | awk '{print $1}' | xargs kill -9") erase_local_files if clear_data end - if tries_left.zero? - Djinn.log_fatal('CRITICAL ERROR: RabbitMQ slave failed to come up') - abort - end + HelperFunctions.log_and_crash('RabbitMQ slave failed to come up') if tries_left.zero? } end From 65f73d8c245a485e6ec84296528b3c49811f3471 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Fri, 16 Aug 2019 19:23:49 -0700 Subject: [PATCH 044/221] Don't go to monit if service is not running --- AppController/lib/monit_interface.rb | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/AppController/lib/monit_interface.rb b/AppController/lib/monit_interface.rb index bbf6e1ac2b..2d3b4d3458 100644 --- a/AppController/lib/monit_interface.rb +++ b/AppController/lib/monit_interface.rb @@ -120,6 +120,12 @@ def self.restart(watch) # This function unmonitors and optionally stops the service, and removes # the monit configuration file. def self.stop(watch, stop = true) + # No need to do anything if the service is not running. + unless is_running?(watch) + Djinn.log_debug("Asked to stop #{watch} but it is not running.") + return + end + # To make sure the service is stopped, we query monit till the service # is not any longer running. running = true @@ -203,8 +209,9 @@ def self.service_config(process_name, group, start_cmd, env_vars, mem) end def self.is_running?(watch) - output = run_cmd("#{MONIT} summary | grep \"'#{watch}'\" | grep -E "\ - '"(Running|Initializing|OK)"') + script = `which appscale-admin`.chomp + HelperFunctions.log_and_crash("Cannod find appscale-admin!") if script.empty? + output = run_cmd("#{script} summary | grep \"'#{watch}'\"") (output != '') end From 5ccb555a6d97a1909569503e7f85ee8e92cbc03a Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Sat, 17 Aug 2019 14:26:35 -0700 Subject: [PATCH 045/221] Crash if we cannot find appscale-admin --- AppController/djinn.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 678af49980..eb038ca527 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -4492,6 +4492,7 @@ def start_appcontroller(node) def start_admin_server Djinn.log_info('Starting AdminServer') script = `which appscale-admin`.chomp + HelperFunctions.log_and_crash("Cannod find appscale-admin!") if script.empty? nginx_port = 17441 service_port = 17442 start_cmd = "#{script} serve -p #{service_port}" From 767e6ffc53cdb295858e2f80967ff316047c84a6 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Sat, 17 Aug 2019 14:44:21 -0700 Subject: [PATCH 046/221] Autoscaled nodes do not check database layout If we got thus far to start autoscaled nodes, the datastore must be primed. This shaves off almost 2 minutes in start up time. --- AppController/djinn.rb | 50 ++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index eb038ca527..7dea4b2f6c 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3300,28 +3300,40 @@ def start_stop_api_services Djinn.log_info('Waiting for DB services ... ') threads.each { |t| t.join } - Djinn.log_info('Ensuring necessary database tables are present') - sleep(SMALL_WAIT) until system("#{PRIME_SCRIPT} --check > /dev/null 2>&1") - - Djinn.log_info('Ensuring data layout version is correct') - layout_script = `which appscale-data-layout`.chomp - retries = 10 - loop { - output = `#{layout_script} --db-type cassandra 2>&1` - if $?.exitstatus == 0 + # Autoscaled nodes do not need to check if the datastore is primed: if + # we got this far, it must be primed. + am_i_autoscaled = false + get_autoscaled_nodes.each { |node| + if slave.private_ip == my_node.private_ip + am_i_autoscaled = true + Djinn.log_info("Skipping database layout check on scaled node.") break - elsif $?.exitstatus == INVALID_VERSION_EXIT_CODE - HelperFunctions.log_and_crash( - 'Unexpected data layout version. Please run "appscale upgrade".') - elsif retries.zero? - HelperFunctions.log_and_crash( - 'Exceeded retries while trying to check data layout.') - else - Djinn.log_warn("Error while checking data layout:\n#{output}") - sleep(SMALL_WAIT) end - retries -= 1 } + unless am_i_autoscaled + Djinn.log_info('Ensuring necessary database tables are present') + sleep(SMALL_WAIT) until system("#{PRIME_SCRIPT} --check > /dev/null 2>&1") + + Djinn.log_info('Ensuring data layout version is correct') + layout_script = `which appscale-data-layout`.chomp + retries = 10 + loop { + output = `#{layout_script} --db-type cassandra 2>&1` + if $?.exitstatus == 0 + break + elsif $?.exitstatus == INVALID_VERSION_EXIT_CODE + HelperFunctions.log_and_crash( + 'Unexpected data layout version. Please run "appscale upgrade".') + elsif retries.zero? + HelperFunctions.log_and_crash( + 'Exceeded retries while trying to check data layout.') + else + Djinn.log_warn("Error while checking data layout:\n#{output}") + sleep(SMALL_WAIT) + end + retries -= 1 + } + end if my_node.is_db_master? or my_node.is_db_slave? @state = "Starting UAServer" From ce04efc8d029294aaf069b0025a8e66b37f34019 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Sat, 17 Aug 2019 18:31:18 -0700 Subject: [PATCH 047/221] Fix typo. Thou shalt not cut and paste mindlessly ... --- AppController/djinn.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 7dea4b2f6c..9281ed6efb 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3304,7 +3304,7 @@ def start_stop_api_services # we got this far, it must be primed. am_i_autoscaled = false get_autoscaled_nodes.each { |node| - if slave.private_ip == my_node.private_ip + if node.private_ip == my_node.private_ip am_i_autoscaled = true Djinn.log_info("Skipping database layout check on scaled node.") break From 2d39e384197e637715641789d4fb514d9c74279e Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 21 Aug 2019 19:25:06 -0700 Subject: [PATCH 048/221] Add preliminary support for metadata queries This allows clients to get a consistent list of kinds in a namespace. Many tools, including the datastore viewer, use this type of query. --- AppDB/appscale/datastore/fdb/data.py | 9 +++++++ AppDB/appscale/datastore/fdb/indexes.py | 32 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index 933d77c6f2..0c372af0c1 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -419,6 +419,15 @@ def get_entry(self, tr, index_entry, snapshot=False): Returns: A VersionEntry or None. """ + if index_entry.kind == u'__kind__': + entity = entity_pb.EntityProto() + entity.mutable_key().MergeFrom(index_entry.key) + entity.mutable_entity_group().MergeFrom(index_entry.group) + version_entry = VersionEntry( + index_entry.project_id, index_entry.namespace, index_entry.path, + encoded_entity=entity.Encode()) + raise gen.Return(version_entry) + version_entry = yield self.get_version_from_path( tr, index_entry.project_id, index_entry.namespace, index_entry.path, index_entry.commit_versionstamp, snapshot) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index c515d48db8..830dae184d 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -154,6 +154,10 @@ def __init__(self, project_id, namespace, path, commit_versionstamp, self.commit_versionstamp = commit_versionstamp self.deleted_versionstamp = deleted_versionstamp + @property + def kind(self): + return self.path[-2] + @property def key(self): key = entity_pb.Reference() @@ -331,6 +335,30 @@ def _usable(self, entry): return entry.deleted_versionstamp is None +class KindIterator(object): + def __init__(self, tr, project_dir, namespace): + self._tr = tr + self._project_dir = project_dir + self._namespace = namespace + self._done = False + + @gen.coroutine + def next_page(self): + if self._done: + raise gen.Return(([], False)) + + # TODO: This can be made async. + ns_dir = self._project_dir.open( + self._tr, (KindIndex.DIR_NAME, self._namespace)) + kinds = ns_dir.list(self._tr) + results = [IndexEntry(self._project_dir.get_path()[-1], self._namespace, + (u'__kind__', kind), None, None) + for kind in kinds] + + self._done = True + raise gen.Return((results, False)) + + class MergeJoinIterator(object): """ Returns pages of index entry results from multiple ranges. It ignores @@ -1085,6 +1113,10 @@ def get_iterator(self, tr, query, read_versionstamp=None): if check_more_results: fetch_limit += 1 + if query.has_kind() and query.kind() == u'__kind__': + project_dir = yield self._directory_cache.get(tr, (project_id,)) + raise gen.Return(KindIterator(tr, project_dir, namespace)) + index = yield self._get_perfect_index(tr, query) reverse = get_scan_direction(query, index) == Query_Order.DESCENDING From 8955376b69cb6b7d60b3dba55da653785402a475 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 21 Aug 2019 19:32:06 -0700 Subject: [PATCH 049/221] Add preliminary support for datastore statistics This allows the datastore to quickly summarize the count and byte count of various aspects of entity data. --- AppDB/appscale/datastore/fdb/data.py | 6 +- AppDB/appscale/datastore/fdb/fdb_datastore.py | 57 +- AppDB/appscale/datastore/fdb/indexes.py | 40 +- AppDB/appscale/datastore/fdb/stats.py | 795 ++++++++++++++++++ 4 files changed, 870 insertions(+), 28 deletions(-) create mode 100644 AppDB/appscale/datastore/fdb/stats.py diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index 933d77c6f2..64e8faf7af 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -43,6 +43,10 @@ def __init__(self, project_id, namespace, path, commit_versionstamp=None, self._encoded_entity = encoded_entity self._decoded_entity = None + @property + def kind(self): + return self.path[-2] + @property def present(self): return self.commit_versionstamp is not None @@ -474,7 +478,7 @@ def put(self, tr, key, version, encoded_entity): tr: An FDB transaction. key: A protobuf reference object. version: An integer specifying the new entity version. - encoded_entity: A string specifying the encoded entity data. + encoded_entity: A byte string specifying the encoded entity data. """ data_ns = yield self._data_ns_from_key(tr, key) for fdb_key, val in data_ns.encode(key.path(), encoded_entity, version): diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index e3157e5fa6..b34e42e5b2 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -27,6 +27,7 @@ from appscale.datastore.fdb.gc import GarbageCollector from appscale.datastore.fdb.indexes import ( get_order_info, IndexManager, KEY_PROP) +from appscale.datastore.fdb.stats import StatsBuffer, StatsSummary from appscale.datastore.fdb.transactions import TransactionManager from appscale.datastore.fdb.utils import ( ABSENT_VERSION, fdb, FDBErrorCodes, next_entity_version, DS_ROOT, @@ -49,6 +50,7 @@ def __init__(self): self._tornado_fdb = None self._tx_manager = None self._gc = None + self._stats_buffer = None def start(self, fdb_clusterfile): self._db = fdb.open(fdb_clusterfile) @@ -62,11 +64,16 @@ def start(self, fdb_clusterfile): self._db, self._tornado_fdb, self._data_manager, directory_cache) self._tx_manager = TransactionManager( self._db, self._tornado_fdb, directory_cache) + self._gc = GarbageCollector( self._db, self._tornado_fdb, self._data_manager, self.index_manager, self._tx_manager, directory_cache) self._gc.start() + self._stats_buffer = StatsBuffer( + self._db, self._tornado_fdb, directory_cache, self) + self._stats_buffer.start() + @gen.coroutine def dynamic_put(self, project_id, put_request, put_response, retries=5): # logger.debug(u'put_request:\n{}'.format(put_request)) @@ -84,7 +91,8 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) writes = [(VersionEntry.from_key(entity.key()), - VersionEntry.from_key(entity.key())) + VersionEntry.from_key(entity.key()), + None) for entity in put_request.entity_list()] else: futures = [] @@ -93,7 +101,8 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): writes = yield futures - old_entries = [old_entry for old_entry, _ in writes if old_entry.present] + old_entries = [old_entry for old_entry, _, _ in writes + if old_entry.present] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() @@ -114,7 +123,11 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - for _, new_entry in writes: + stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) + for old_entry, _, stats in writes if stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + + for _, new_entry, _ in writes: put_response.add_key().CopyFrom(new_entry.key) if new_entry.version != ABSENT_VERSION: put_response.add_version(new_entry.version) @@ -170,7 +183,7 @@ def dynamic_delete(self, project_id, delete_request, retries=5): if delete_request.has_transaction(): yield self._tx_manager.log_deletes(tr, project_id, delete_request) - deletes = [(VersionEntry.from_key(key), None) + deletes = [(VersionEntry.from_key(key), None, None) for key in delete_request.key_list()] else: futures = [] @@ -179,7 +192,8 @@ def dynamic_delete(self, project_id, delete_request, retries=5): deletes = yield futures - old_entries = [old_entry for old_entry, _ in deletes if old_entry.present] + old_entries = [old_entry for old_entry, _, _ in deletes + if old_entry.present] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() @@ -200,8 +214,12 @@ def dynamic_delete(self, project_id, delete_request, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) + stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) + for old_entry, _, stats in deletes if stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + # TODO: Once the Cassandra backend is removed, populate a delete response. - for old_entry, new_version in deletes: + for old_entry, new_version, _ in deletes: logger.debug(u'new_version: {}'.format(new_version)) @gen.coroutine @@ -317,12 +335,14 @@ def apply_txn_changes(self, project_id, txid, retries=5): tr, project_id, txid) try: - old_entries = yield self._apply_mutations( + writes = yield self._apply_mutations( tr, project_id, queried_groups, mutations, lookups, read_versionstamp) finally: yield self._tx_manager.delete(tr, project_id, txid) versionstamp_future = None + old_entries = [old_entry for old_entry, _, _ in writes + if old_entry.present] if old_entries: versionstamp_future = tr.get_versionstamp() @@ -342,6 +362,10 @@ def apply_txn_changes(self, project_id, txid, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) + stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) + for old_entry, _, stats in writes if stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + logger.debug(u'Finished applying {}:{}'.format(project_id, txid)) @gen.coroutine @@ -384,15 +408,19 @@ def _upsert(self, tr, entity, old_entry_future=None): raise InternalError(u'The datastore chose an existing ID') new_version = next_entity_version(old_entry.version) + encoded_entity = entity.Encode() yield self._data_manager.put( - tr, entity.key(), new_version, entity.Encode()) - yield self.index_manager.put_entries(tr, old_entry, entity) + tr, entity.key(), new_version, encoded_entity) + stats = yield self.index_manager.put_entries(tr, old_entry, entity) + if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) + stats -= StatsSummary.from_entity(old_entry.encoded) new_entry = VersionEntry.from_key(entity.key()) new_entry.version = new_version - raise gen.Return((old_entry, new_entry)) + stats += StatsSummary.from_entity(encoded_entity) + raise gen.Return((old_entry, new_entry, stats)) @gen.coroutine def _delete(self, tr, key, old_entry_future=None): @@ -406,11 +434,13 @@ def _delete(self, tr, key, old_entry_future=None): new_version = next_entity_version(old_entry.version) yield self._data_manager.put(tr, key, new_version, b'') - yield self.index_manager.put_entries(tr, old_entry, new_entity=None) + stats = yield self.index_manager.put_entries(tr, old_entry, new_entity=None) + if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) + stats -= StatsSummary.from_entity(old_entry.encoded) - raise gen.Return((old_entry, new_version)) + raise gen.Return((old_entry, new_version, stats)) @gen.coroutine def _apply_mutations(self, tr, project_id, queried_groups, mutations, @@ -472,8 +502,7 @@ def _apply_mutations(self, tr, project_id, queried_groups, mutations, mutation_futures.append(self._upsert(tr, mutation, old_entry_future)) responses = yield mutation_futures - raise gen.Return([old_entry for old_entry, _ in responses - if old_entry.present]) + raise gen.Return(responses) @staticmethod def _collapse_mutations(mutations): diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index c515d48db8..9f48fb402b 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -17,6 +17,7 @@ from appscale.datastore.fdb.codecs import ( decode_str, decode_value, encode_value, encode_versionstamp_index, Path) from appscale.datastore.fdb.sdk import FindIndexToUse, ListCursor +from appscale.datastore.fdb.stats import StatsSummary from appscale.datastore.fdb.utils import ( format_prop_val, DS_ROOT, fdb, get_scatter_val, MAX_FDB_TX_DURATION, ResultIterator, SCATTER_PROP, VERSIONSTAMP_SIZE) @@ -1007,26 +1008,31 @@ def __init__(self, db, tornado_fdb, data_manager, directory_cache): @gen.coroutine def put_entries(self, tr, old_version_entry, new_entity): + old_key_stats = StatsSummary() if old_version_entry.has_entity: - keys = yield self._get_index_keys( + old_keys, old_key_stats = yield self._get_index_keys( tr, old_version_entry.decoded, old_version_entry.commit_versionstamp) - for key in keys: + for key in old_keys: # Set deleted versionstamp. tr.set_versionstamped_value( key, b'\x00' * VERSIONSTAMP_SIZE + encode_versionstamp_index(0)) + new_key_stats = StatsSummary() if new_entity is not None: - keys = yield self._get_index_keys(tr, new_entity) - for key in keys: + new_keys, new_key_stats = yield self._get_index_keys( + tr, new_entity) + for key in new_keys: tr.set_versionstamped_key(key, b'') + raise gen.Return(new_key_stats - old_key_stats) + @gen.coroutine def hard_delete_entries(self, tr, version_entry): if not version_entry.has_entity: return - keys = yield self._get_index_keys( - tr, version_entry.decoded, version_entry.commit_versionstamp) + keys = (yield self._get_index_keys(tr, version_entry.decoded, + version_entry.commit_versionstamp))[0] for key in keys: del tr[key] @@ -1160,21 +1166,26 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): path = Path.flatten(entity.key().path()) kind = path[-2] + stats = StatsSummary() kindless_index = yield self._kindless_index(tr, project_id, namespace) kind_index = yield self._kind_index(tr, project_id, namespace, kind) composite_indexes = yield self._get_indexes( tr, project_id, namespace, kind) - all_keys = [kindless_index.encode_key(path, commit_versionstamp), - kind_index.encode_key(path, commit_versionstamp)] + kindless_keys = kindless_index.encode_key(path, commit_versionstamp) + kind_keys = kind_index.encode_key(path, commit_versionstamp) + stats.add_kindless_keys(kindless_keys) + stats.add_kind_keys(kindless_keys) + all_keys = [kindless_keys, kind_keys] entity_prop_names = [] for prop in entity.property_list(): prop_name = decode_str(prop.name()) entity_prop_names.append(prop_name) index = yield self._single_prop_index( tr, project_id, namespace, kind, prop_name) - all_keys.append( - index.encode_key(prop.value(), path, commit_versionstamp)) + prop_key = index.encode_key(prop.value(), path, commit_versionstamp) + stats.add_prop_key(prop, prop_key) + all_keys.append(prop_key) scatter_val = get_scatter_val(path) if scatter_val is not None: @@ -1183,14 +1194,17 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): all_keys.append(index.encode_key(scatter_val, path, commit_versionstamp)) for index in composite_indexes: + # If the entity does not have the relevant props for the index, skip it. if not all(index_prop_name in entity_prop_names for index_prop_name in index.prop_names): continue - all_keys.extend( - index.encode_keys(entity.property_list(), path, commit_versionstamp)) + composite_keys = index.encode_keys(entity.property_list(), path, + commit_versionstamp) + stats.add_composite_keys(composite_keys) + all_keys.extend(composite_keys) - raise gen.Return(all_keys) + raise gen.Return((all_keys, stats)) @gen.coroutine def _get_perfect_index(self, tr, query): diff --git a/AppDB/appscale/datastore/fdb/stats.py b/AppDB/appscale/datastore/fdb/stats.py new file mode 100644 index 0000000000..1caa582665 --- /dev/null +++ b/AppDB/appscale/datastore/fdb/stats.py @@ -0,0 +1,795 @@ +import datetime +import logging +import random +import struct +import sys +import time +from collections import defaultdict + +import six +from tornado import gen +from tornado.ioloop import IOLoop +from tornado.locks import Lock as AsyncLock + +from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.fdb.codecs import decode_str +from appscale.datastore.fdb.polling_lock import PollingLock +from appscale.datastore.fdb.utils import fdb, ResultIterator + +sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.api import datastore +from google.appengine.datastore.entity_pb import Property as Meaning + +logger = logging.getLogger(__name__) + + +class StatsPropTypes(object): + STRING = 0x01 + BOOLEAN = 0x02 + INTEGER = 0x03 + NULL = 0x04 + FLOAT = 0x05 + KEY = 0x06 + BLOB = 0x07 + EMBEDDED_ENTITY = 0x08 + SHORT_BLOB = 0x09 + TEXT = 0x0A + USER = 0x0B + CATEGORY = 0x0C + LINK = 0x0D + EMAIL = 0x0E + DATE_TIME = 0x0F + GEO_PT = 0x10 + IM = 0x11 + PHONE_NUMBER = 0x12 + POSTAL_ADDRESS = 0x13 + RATING = 0x14 + BLOB_KEY = 0x15 + + MEANING_TYPES = { + Meaning.GD_WHEN: DATE_TIME, + Meaning.ATOM_CATEGORY: CATEGORY, + Meaning.ATOM_LINK: LINK, + Meaning.GD_EMAIL: EMAIL, + Meaning.GD_IM: IM, + Meaning.GD_PHONENUMBER: PHONE_NUMBER, + Meaning.GD_POSTALADDRESS: POSTAL_ADDRESS, + Meaning.GD_RATING: RATING, + Meaning.BLOB: BLOB, + Meaning.ENTITY_PROTO: EMBEDDED_ENTITY, + Meaning.BYTESTRING: SHORT_BLOB, + Meaning.TEXT: TEXT, + Meaning.BLOBKEY: BLOB_KEY + } + + VALUE_TYPES = { + 'string': STRING, + 'int64': INTEGER, + 'boolean': BOOLEAN, + 'double': FLOAT, + 'reference': KEY, + 'point': GEO_PT, + 'user': USER + } + + NAMES = { + STRING: u'String', + BOOLEAN: u'Boolean', + INTEGER: u'Integer', + NULL: u'NULL', + FLOAT: u'Float', + KEY: u'Key', + BLOB: u'Blob', + EMBEDDED_ENTITY: u'EmbeddedEntity', + SHORT_BLOB: u'ShortBlob', + TEXT: u'Text', + USER: u'User', + CATEGORY: u'Category', + LINK: u'Link', + EMAIL: u'Email', + DATE_TIME: u'Date/Time', + GEO_PT: u'GeoPt', + IM: u'IM', + PHONE_NUMBER: u'PhoneNumber', + POSTAL_ADDRESS: u'PostalAddress', + RATING: u'Rating', + BLOB_KEY: u'BlobKey' + } + + +def stats_prop_type(prop_pb): + """ Determines the property type for a Property object. + + Args: + prop_pb: An entity_pb.Property object. + + Returns: + A constant from PropertyTypes. + """ + value_type = StatsPropTypes.NULL + for type_name, type_code in six.iteritems(StatsPropTypes.VALUE_TYPES): + if getattr(prop_pb.value(), 'has_{}value'.format(type_name))(): + value_type = type_code + break + + if prop_pb.has_meaning(): + value_type = StatsPropTypes.MEANING_TYPES.get( + prop_pb.meaning(), value_type) + + return value_type + + +def fill_stat_entities(project_id, stats_by_ns_kind_isroot, + entity_bytes_by_prop, timestamp): + stats_by_ns_kind = defaultdict(lambda: defaultdict(StatsSummary)) + for namespace, kinds in six.iteritems(stats_by_ns_kind_isroot): + for kind, (root, non_root) in six.iteritems(kinds): + stats_by_ns_kind[namespace][kind] += root + non_root + + stats_by_namespace = {} + for namespace, kinds in six.iteritems(stats_by_ns_kind): + stats_by_namespace[namespace] = sum(six.itervalues(kinds), StatsSummary()) + + stats_by_kind = defaultdict(StatsSummary) + for namespace, kinds in six.iteritems(stats_by_ns_kind): + for kind, stats in six.iteritems(kinds): + stats_by_kind[kind] += stats + + entities = [] + + # TODO: Cover and test all stat entity types. + total_stats = sum(six.itervalues(stats_by_namespace), StatsSummary()) + entity = datastore.Entity( + '__Stat_Total__', _app=project_id, name='total_entity_usage') + entity['bytes'] = total_stats.total_bytes + entity['count'] = total_stats.entity_count + entity['timestamp'] = timestamp + + entity['entity_bytes'] = total_stats.entity_bytes + entity['builtin_index_bytes'] = total_stats.builtin_bytes + entity['builtin_index_count'] = total_stats.builtin_count + entity['composite_index_bytes'] = total_stats.composite_bytes + entity['composite_index_count'] = total_stats.composite_count + entities.append(entity) + + for namespace, stats in six.iteritems(stats_by_namespace): + if namespace: + entity = datastore.Entity('__Stat_Namespace__', _app=project_id, + name=namespace) + else: + entity = datastore.Entity('__Stat_Namespace__', _app=project_id, id=1) + + entity['bytes'] = stats.total_bytes + entity['count'] = stats.entity_count + entity['timestamp'] = timestamp + + entity['subject_namespace'] = namespace + entity['entity_bytes'] = stats.entity_bytes + entity['builtin_index_bytes'] = stats.builtin_bytes + entity['builtin_index_count'] = stats.builtin_count + entity['composite_index_bytes'] = stats.composite_bytes + entity['composite_index_count'] = stats.composite_count + entities.append(entity) + + for kind, stats in six.iteritems(stats_by_kind): + entity = datastore.Entity('__Stat_Kind__', _app=project_id, name=kind) + entity['bytes'] = stats.total_bytes + entity['count'] = stats.entity_count + entity['timestamp'] = timestamp + + entity['builtin_index_bytes'] = stats.builtin_bytes + entity['builtin_index_count'] = stats.builtin_count + entity['composite_index_bytes'] = stats.composite_bytes + entity['composite_index_count'] = stats.composite_count + entities.append(entity) + + stats_by_kind_root = defaultdict(StatsSummary) + stats_by_kind_nonroot = defaultdict(StatsSummary) + for namespace, kinds in six.iteritems(stats_by_ns_kind_isroot): + for kind, (root, non_root) in six.iteritems(kinds): + stats_by_kind_root[kind] += root + stats_by_kind_nonroot[kind] += non_root + + for kind, stats in six.iteritems(stats_by_kind_root): + entity = datastore.Entity('__Stat_Kind_IsRootEntity__', _app=project_id, + name=kind) + entity['bytes'] = stats.total_bytes + entity['count'] = stats.entity_count + entity['timestamp'] = timestamp + + entity['kind_name'] = kind + entity['entity_bytes'] = stats.entity_bytes + entities.append(entity) + + for kind, stats in six.iteritems(stats_by_kind_nonroot): + entity = datastore.Entity('__Stat_Kind_NotRootEntity__', _app=project_id, + name=kind) + entity['bytes'] = stats.total_bytes + entity['count'] = stats.entity_count + entity['timestamp'] = timestamp + + entity['kind_name'] = kind + entity['entity_bytes'] = stats.entity_bytes + entities.append(entity) + + # entity_bytes, builtin_index_bytes, builtin_index_count + stats_by_prop_type = defaultdict(lambda: [0, 0, 0]) + for namespace, kinds in six.iteritems(entity_bytes_by_prop): + for kind, prop_names in six.iteritems(kinds): + for prop_name, prop_types in six.iteritems(prop_names): + for prop_type, byte_count in six.iteritems(prop_types): + stats_by_prop_type[prop_type][0] += byte_count + + for prop_name, prop_types in six.iteritems(total_stats.prop_bytes): + for prop_type, byte_count in six.iteritems(prop_types): + stats_by_prop_type[prop_type][1] += byte_count + + for prop_name, prop_types in six.iteritems(total_stats.prop_count): + for prop_type, count in six.iteritems(prop_types): + stats_by_prop_type[prop_type][2] += count + + for prop_type, (entity_bytes, builtin_bytes, builtin_count) in \ + six.iteritems(stats_by_prop_type): + entity = datastore.Entity('__Stat_PropertyType__', _app=project_id, + name=StatsPropTypes.NAMES[prop_type]) + entity['bytes'] = entity_bytes + builtin_bytes + entity['count'] = builtin_count + entity['timestamp'] = timestamp + + entity['property_type'] = StatsPropTypes.NAMES[prop_type] + entity['entity_bytes'] = entity_bytes + entity['builtin_index_bytes'] = builtin_bytes + entity['builtin_index_count'] = builtin_count + entities.append(entity) + + # entity_bytes, builtin_index_bytes, builtin_index_count + stats_by_kind_prop_type = defaultdict(lambda: defaultdict(lambda: [0, 0, 0])) + for namespace, kinds in six.iteritems(entity_bytes_by_prop): + for kind, prop_names in six.iteritems(kinds): + for prop_name, prop_types in six.iteritems(prop_names): + for prop_type, byte_count in six.iteritems(prop_types): + stats_by_kind_prop_type[kind][prop_type][0] += byte_count + + for kind, stats in six.iteritems(stats_by_kind): + for prop_name, prop_types in six.iteritems(stats.prop_bytes): + for prop_type, byte_count in six.iteritems(prop_types): + stats_by_kind_prop_type[kind][prop_type][1] += byte_count + + for prop_name, prop_types in six.iteritems(stats.prop_count): + for prop_type, count in six.iteritems(prop_types): + stats_by_kind_prop_type[kind][prop_type][2] += count + + for kind, prop_types in six.iteritems(stats_by_kind_prop_type): + for prop_type, (entity_bytes, builtin_bytes, builtin_count) \ + in six.iteritems(prop_types): + type_name = StatsPropTypes.NAMES[prop_type] + entity = datastore.Entity('__Stat_PropertyType_Kind__', _app=project_id, + name=u'_'.join([type_name, kind])) + entity['bytes'] = entity_bytes + builtin_bytes + entity['count'] = builtin_count + entity['timestamp'] = timestamp + + entity['kind_name'] = kind + entity['entity_bytes'] = entity_bytes + + entity['property_type'] = type_name + entity['builtin_index_bytes'] = builtin_bytes + entity['builtin_index_count'] = builtin_count + entities.append(entity) + + # entity_bytes, builtin_index_bytes, builtin_index_count + stats_by_kind_prop_name = defaultdict(lambda: defaultdict(lambda: [0, 0, 0])) + for namespace, kinds in six.iteritems(entity_bytes_by_prop): + for kind, prop_names in six.iteritems(kinds): + for prop_name, prop_types in six.iteritems(prop_names): + stats_by_kind_prop_name[kind][prop_name][0] += \ + sum(six.itervalues(prop_types)) + + for kind, stats in six.iteritems(stats_by_kind): + for prop_name, prop_types in six.iteritems(stats.prop_bytes): + stats_by_kind_prop_name[kind][prop_name][1] += \ + sum(six.itervalues(prop_types)) + + for prop_name, prop_types in six.iteritems(stats.prop_count): + stats_by_kind_prop_name[kind][prop_name][2] += \ + sum(six.itervalues(prop_types)) + + for kind, prop_types in six.iteritems(stats_by_kind_prop_name): + for prop_name, (entity_bytes, builtin_bytes, builtin_count) \ + in six.iteritems(prop_types): + entity = datastore.Entity('__Stat_PropertyType_Kind__', _app=project_id, + name=u'_'.join([prop_name, kind])) + entity['bytes'] = entity_bytes + builtin_bytes + entity['count'] = builtin_count + entity['timestamp'] = timestamp + + entity['kind_name'] = kind + entity['entity_bytes'] = entity_bytes + + entity['property_name'] = prop_name + entity['builtin_index_bytes'] = builtin_bytes + entity['builtin_index_count'] = builtin_count + entities.append(entity) + + for namespace, kinds in six.iteritems(stats_by_ns_kind): + for kind, stats in six.iteritems(kinds): + entity = datastore.Entity( + '__Stat_Ns_Kind__', _app=project_id, name=kind, namespace=namespace) + entity['bytes'] = stats.total_bytes + entity['count'] = stats.entity_count + entity['timestamp'] = timestamp + + entity['kind_name'] = kind + entity['entity_bytes'] = stats.entity_bytes + + entity['builtin_index_bytes'] = stats.builtin_bytes + entity['builtin_index_count'] = stats.builtin_count + entity['composite_index_bytes'] = stats.composite_bytes + entity['composite_index_count'] = stats.composite_count + entities.append(entity) + + return entities + + +class ProjectStatsDir(object): + """ + A ProjectStatsDir handles the encoding and decoding details for a project's + stats entries. + + The directory path looks like (, 'stats'). + """ + DIR_NAME = u'stats' + + def __init__(self, directory): + self.directory = directory + + def encode_entity_count(self, namespace, kind, is_root, count): + key = self.directory.pack((u'entities', namespace, kind, is_root, u'count')) + return key, self._encode_delta(count) + + def encode_entity_bytes(self, namespace, kind, is_root, byte_count): + key = self.directory.pack((u'entities', namespace, kind, is_root, u'bytes')) + return key, self._encode_delta(byte_count) + + def encode_kindless_count(self, namespace, kind, is_root, count): + key = self.directory.pack((u'kindless', namespace, kind, is_root, u'count')) + return key, self._encode_delta(count) + + def encode_kindless_bytes(self, namespace, kind, is_root, byte_count): + key = self.directory.pack((u'kindless', namespace, kind, is_root, u'bytes')) + return key, self._encode_delta(byte_count) + + def encode_kind_count(self, namespace, kind, is_root, count): + key = self.directory.pack((u'kind', namespace, kind, is_root, u'count')) + return key, self._encode_delta(count) + + def encode_kind_bytes(self, namespace, kind, is_root, byte_count): + key = self.directory.pack((u'kind', namespace, kind, is_root, u'bytes')) + return key, self._encode_delta(byte_count) + + def encode_prop_type_count(self, namespace, kind, is_root, prop_name, + prop_type, count): + key = self.directory.pack((u'prop-type', namespace, kind, is_root, + prop_name, prop_type, u'count')) + return key, self._encode_delta(count) + + def encode_prop_type_bytes(self, namespace, kind, is_root, prop_name, + prop_type, byte_count): + key = self.directory.pack((u'prop-type', namespace, kind, is_root, + prop_name, prop_type, u'bytes')) + return key, self._encode_delta(byte_count) + + def encode_composite_count(self, namespace, kind, is_root, count): + key = self.directory.pack((u'composite', namespace, kind, is_root, u'count')) + return key, self._encode_delta(count) + + def encode_composite_bytes(self, namespace, kind, is_root, byte_count): + key = self.directory.pack((u'composite', namespace, kind, is_root, u'bytes')) + return key, self._encode_delta(byte_count) + + def encode_entity_bytes_by_prop(self, namespace, kind, prop_name, prop_type, + byte_count): + key = self.directory.pack((u'entity-bytes-by-prop', namespace, kind, + prop_name, prop_type)) + return key, self._encode_delta(byte_count) + + def encode_last_versionstamp(self): + return self.directory.pack((u'last-versionstamp',)), b'\x00' * 14 + + def encode_last_timestamp(self): + key = self.directory.pack((u'last-timestamp',)) + value = fdb.tuple.pack((int(time.time()),)) + return key, value + + def decode(self, kvs): + # By namespace/kind/[root, nonroot] + stats_by_ns_kind_isroot = defaultdict( + lambda: defaultdict(lambda: [StatsSummary(), StatsSummary()])) + + # By namespace/kind/prop_name/prop_type + entity_bytes_by_prop = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))) + last_timestamp = None + for kv in kvs: + path = self.directory.unpack(kv.key) + section = path[0] + if section == u'last-versionstamp': + continue + + if section == u'last-timestamp': + last_timestamp = datetime.datetime.utcfromtimestamp( + fdb.tuple.unpack(kv.value)[0]) + continue + + namespace = path[1] + kind = path[2] + value = struct.unpack(' Date: Thu, 22 Aug 2019 08:54:37 -0700 Subject: [PATCH 050/221] Add get_indexes method to FDBDatastore This adds support for v3.GetIndices calls. --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index e3157e5fa6..3faa374922 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -31,6 +31,7 @@ from appscale.datastore.fdb.utils import ( ABSENT_VERSION, fdb, FDBErrorCodes, next_entity_version, DS_ROOT, ScatteredAllocator, TornadoFDB) +from appscale.datastore.index_manager import IndexInaccessible sys.path.append(APPSCALE_PYTHON_APPSERVER) from google.appengine.datastore import entity_pb @@ -358,6 +359,30 @@ def update_composite_index(self, project_id, index): project_id = decode_str(project_id) yield self.index_manager.update_composite_index(project_id, index) + def get_indexes(self, project_id): + """ Retrieves list of indexes for a project. + + Args: + project_id: A string specifying a project ID. + Returns: + A list of entity_pb.CompositeIndex objects. + Raises: + BadRequest if project_id is not found. + InternalError if ZooKeeper is not accessible. + """ + try: + project_index_manager = self.index_manager.composite_index_manager.\ + projects[project_id] + except KeyError: + raise BadRequest('project_id: {} not found'.format(project_id)) + + try: + indexes = project_index_manager.indexes_pb + except IndexInaccessible: + raise InternalError('ZooKeeper is not accessible') + + return indexes + @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): last_element = entity.key().path().element(-1) From 7db0bf00c60cb62d83f6d90698a743aeae92b51e Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Thu, 22 Aug 2019 10:13:18 -0700 Subject: [PATCH 051/221] Fix typo in message. --- AppController/djinn.rb | 2 +- AppController/lib/monit_interface.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 9281ed6efb..6788510625 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -4504,7 +4504,7 @@ def start_appcontroller(node) def start_admin_server Djinn.log_info('Starting AdminServer') script = `which appscale-admin`.chomp - HelperFunctions.log_and_crash("Cannod find appscale-admin!") if script.empty? + HelperFunctions.log_and_crash("Cannot find appscale-admin!") if script.empty? nginx_port = 17441 service_port = 17442 start_cmd = "#{script} serve -p #{service_port}" diff --git a/AppController/lib/monit_interface.rb b/AppController/lib/monit_interface.rb index 2d3b4d3458..975e409308 100644 --- a/AppController/lib/monit_interface.rb +++ b/AppController/lib/monit_interface.rb @@ -210,7 +210,7 @@ def self.service_config(process_name, group, start_cmd, env_vars, mem) def self.is_running?(watch) script = `which appscale-admin`.chomp - HelperFunctions.log_and_crash("Cannod find appscale-admin!") if script.empty? + HelperFunctions.log_and_crash("Cannot find appscale-admin!") if script.empty? output = run_cmd("#{script} summary | grep \"'#{watch}'\"") (output != '') end From 39ab10acc30bb8ba2bc95d36efd22099d4e06ddb Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 22 Aug 2019 13:39:55 -0700 Subject: [PATCH 052/221] Remove unused code for SSL datastore connections --- .../appengine/api/datastore_distributed.py | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/AppServer/google/appengine/api/datastore_distributed.py b/AppServer/google/appengine/api/datastore_distributed.py index 0ba16266a7..9fcd21005b 100755 --- a/AppServer/google/appengine/api/datastore_distributed.py +++ b/AppServer/google/appengine/api/datastore_distributed.py @@ -46,15 +46,6 @@ from google.appengine.ext.remote_api import remote_api_pb from google.appengine.datastore import old_datastore_stub_util -# Where the SSL certificate is placed for encrypted communication. -CERT_LOCATION = "/etc/appscale/certs/mycert.pem" - -# Where the SSL private key is placed for encrypted communication. -KEY_LOCATION = "/etc/appscale/certs/mykey.pem" - -# The default SSL port to connect to. -SSL_DEFAULT_PORT = 8443 - try: __import__('google.appengine.api.taskqueue.taskqueue_service_pb') taskqueue_service_pb = sys.modules.get( @@ -197,11 +188,6 @@ def __init__(self, assert isinstance(app_id, basestring) and app_id != '' self.project_id = app_id self.__datastore_location = datastore_location - self.__is_encrypted = True - res = self.__datastore_location.split(':') - if len(res) == 2: - if int(res[1]) != SSL_DEFAULT_PORT: - self.__is_encrypted = False self.SetTrusted(trusted) @@ -356,14 +342,7 @@ def _RemoteSend(self, request, response, method, request_id=None): location = self.__datastore_location while True: try: - api_request.sendCommand( - location, - tag, - api_response, - 1, - self.__is_encrypted, - KEY_LOCATION, - CERT_LOCATION) + api_request.sendCommand(location, tag, api_response) break except socket.error as socket_error: if socket_error.errno in (errno.ECONNREFUSED, errno.EHOSTUNREACH): From 90c561a721da94a550da64509c5e7b8e290b48ad Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 22 Aug 2019 14:36:12 -0700 Subject: [PATCH 053/221] Ensure urllib3 is installed --- debian/appscale_install.sh | 1 + debian/appscale_install_functions.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/debian/appscale_install.sh b/debian/appscale_install.sh index d3bd2e4be8..87fc016156 100755 --- a/debian/appscale_install.sh +++ b/debian/appscale_install.sh @@ -39,6 +39,7 @@ case "$1" in installappserverjava installtornado installpycrypto + installurllib3 installpycapnp installpymemcache installpyyaml diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index f90f7a75e5..439a30c49f 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -436,6 +436,12 @@ installpycrypto() pipwrapper pycrypto } +installurllib3() +{ + # Avoid using pipwrapper to prevent upgrading the package. + pip install urllib3 +} + postinstallzookeeper() { service zookeeper stop || true From 99c81d365961b59ff1c79c722bd814a70648d7ca Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 22 Aug 2019 14:36:51 -0700 Subject: [PATCH 054/221] Reuse TCP connections for datastore calls This reduces latency for datastore API calls in most cases. --- .../appengine/api/datastore_distributed.py | 86 ++++++++++--------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/AppServer/google/appengine/api/datastore_distributed.py b/AppServer/google/appengine/api/datastore_distributed.py index 9fcd21005b..f6492aff67 100755 --- a/AppServer/google/appengine/api/datastore_distributed.py +++ b/AppServer/google/appengine/api/datastore_distributed.py @@ -33,6 +33,9 @@ import threading import warnings +from urllib3 import HTTPConnectionPool +from urllib3.exceptions import MaxRetryError + from google.appengine.api import apiproxy_stub from google.appengine.api import apiproxy_stub_map from google.appengine.api import datastore_errors @@ -79,15 +82,14 @@ PROXY_PORT = 8888 -def get_random_lb(): - """ Selects a random location from the load balancers file. +def get_random_lb_host(): + """ Selects a random host from the load balancers file. Returns: A string specifying a load balancer IP. """ with open(LOAD_BALANCERS_FILE) as lb_file: - return random.choice([':'.join([line.strip(), str(PROXY_PORT)]) - for line in lb_file]) + return random.choice(line.strip() for line in lb_file) class InternalCursor(): @@ -187,7 +189,13 @@ def __init__(self, # TODO lock any use of these global variables assert isinstance(app_id, basestring) and app_id != '' self.project_id = app_id - self.__datastore_location = datastore_location + + host, port = datastore_location.split(':') + port = int(port) + self._ds_pool = HTTPConnectionPool(host, port, maxsize=8) + + self._service_id = os.environ.get('CURRENT_MODULE_ID', 'default') + self._version_id = os.environ.get('CURRENT_VERSION_ID', 'v1').split('.')[0] self.SetTrusted(trusted) @@ -319,6 +327,24 @@ def _maybeSetDefaultAuthDomain(self): if not auth_domain: os.environ['AUTH_DOMAIN'] = "appscale.com" + def _make_call(self, payload, headers, retries=2): + try: + http_response = self._ds_pool.request('POST', '/', body=payload, + headers=headers) + except MaxRetryError: + logging.exception('Failed to make datastore call') + if retries == 0: + raise + + # To handle the failure of a service proxy, pick a different host. + self._ds_pool = HTTPConnectionPool(get_random_lb_host(), PROXY_PORT, + maxsize=8) + backoff_ms = 500 * 3 ** (2 - retries) # 0.5s, 1.5s, 4.5s + time.sleep(float(backoff_ms) / 1000) + return self._make_call(payload, headers, retries - 1) + + return http_response + def _RemoteSend(self, request, response, method, request_id=None): """Sends a request remotely to the datstore server. """ tag = self.project_id @@ -335,44 +361,20 @@ def _RemoteSend(self, request, response, method, request_id=None): if request_id is not None: api_request.set_request_id(request_id) - api_response = remote_api_pb.Response() + payload = api_request.Encode() + headers = {'Content-Length': len(payload), + 'ProtocolBufferType': 'Request', + 'AppData': tag, + 'Module': self._service_id, + 'Version': self._version_id} + http_response = self._make_call(payload, headers) + + if http_response.status != 200: + raise apiproxy_errors.ApplicationError( + datastore_pb.Error.INTERNAL_ERROR, 'Unhandled datastore error') + + api_response = remote_api_pb.Response(http_response.data) - retry_count = 0 - max_retries = 3 - location = self.__datastore_location - while True: - try: - api_request.sendCommand(location, tag, api_response) - break - except socket.error as socket_error: - if socket_error.errno in (errno.ECONNREFUSED, errno.EHOSTUNREACH): - backoff_ms = 500 * 3**retry_count # 0.5s, 1.5s, 4.5s - retry_count += 1 - if retry_count > max_retries: - raise - - logging.warning( - 'Failed to call {} method of Datastore ({}). Retry #{} in {}ms.' - .format(method, socket_error, retry_count, backoff_ms)) - time.sleep(float(backoff_ms) / 1000) - location = get_random_lb() - api_response = remote_api_pb.Response() - continue - - if socket_error.errno == errno.ETIMEDOUT: - raise apiproxy_errors.ApplicationError( - datastore_pb.Error.TIMEOUT, - 'Connection timed out when making datastore request') - raise - # AppScale: Interpret ProtocolBuffer.ProtocolBufferReturnError as - # datastore_errors.InternalError - except ProtocolBuffer.ProtocolBufferReturnError as e: - raise datastore_errors.InternalError(e) - - if not api_response or not api_response.has_response(): - raise datastore_errors.InternalError( - 'No response from db server on %s requests.' % method) - if api_response.has_application_error(): error_pb = api_response.application_error() logging.error(error_pb.detail()) From 8252d640472ac7b3a28c9787a55f33ae2add693b Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 22 Aug 2019 21:38:09 -0700 Subject: [PATCH 055/221] Fall back to sandbox-friendly datastore requests When used in the context of an instance runtime (as the dashboard uses it), the datastore client library does not pool connections. --- .../appengine/api/datastore_distributed.py | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/AppServer/google/appengine/api/datastore_distributed.py b/AppServer/google/appengine/api/datastore_distributed.py index f6492aff67..8fb8fcc405 100755 --- a/AppServer/google/appengine/api/datastore_distributed.py +++ b/AppServer/google/appengine/api/datastore_distributed.py @@ -23,18 +23,20 @@ """ import datetime -import errno import logging import os import time import random -import socket import sys import threading import warnings -from urllib3 import HTTPConnectionPool -from urllib3.exceptions import MaxRetryError +try: + from urllib3 import HTTPConnectionPool + from urllib3.exceptions import MaxRetryError + POOL_CONNECTIONS = True +except ImportError: + POOL_CONNECTIONS = False from google.appengine.api import apiproxy_stub from google.appengine.api import apiproxy_stub_map @@ -189,10 +191,13 @@ def __init__(self, # TODO lock any use of these global variables assert isinstance(app_id, basestring) and app_id != '' self.project_id = app_id + self.__datastore_location = datastore_location - host, port = datastore_location.split(':') - port = int(port) - self._ds_pool = HTTPConnectionPool(host, port, maxsize=8) + self._ds_pool = None + if POOL_CONNECTIONS: + host, port = datastore_location.split(':') + port = int(port) + self._ds_pool = HTTPConnectionPool(host, port, maxsize=8) self._service_id = os.environ.get('CURRENT_MODULE_ID', 'default') self._version_id = os.environ.get('CURRENT_VERSION_ID', 'v1').split('.')[0] @@ -327,23 +332,45 @@ def _maybeSetDefaultAuthDomain(self): if not auth_domain: os.environ['AUTH_DOMAIN'] = "appscale.com" - def _make_call(self, payload, headers, retries=2): + def _request_with_pool(self, api_request, tag, retries=2): + """AppScale: Make datastore request with pool to reduce connections. """ + payload = api_request.Encode() + headers = {'Content-Length': len(payload), + 'ProtocolBufferType': 'Request', + 'AppData': tag, + 'Module': self._service_id, + 'Version': self._version_id} try: http_response = self._ds_pool.request('POST', '/', body=payload, headers=headers) except MaxRetryError: - logging.exception('Failed to make datastore call') if retries == 0: raise - # To handle the failure of a service proxy, pick a different host. + logging.exception('Failed to make datastore call') self._ds_pool = HTTPConnectionPool(get_random_lb_host(), PROXY_PORT, maxsize=8) backoff_ms = 500 * 3 ** (2 - retries) # 0.5s, 1.5s, 4.5s time.sleep(float(backoff_ms) / 1000) - return self._make_call(payload, headers, retries - 1) + return self._request_with_pool(payload, headers, retries - 1) - return http_response + if http_response.status != 200: + raise apiproxy_errors.ApplicationError( + datastore_pb.Error.INTERNAL_ERROR, 'Unhandled datastore error') + + return remote_api_pb.Response(http_response.data) + + def _request_from_sandbox(self, api_request, tag): + """ AppScale: Make datastore request within sandbox constraints. """ + api_response = remote_api_pb.Response() + try: + api_request.sendCommand(self.__datastore_location, tag, api_response) + except ProtocolBuffer.ProtocolBufferReturnError: + # Since this is not within the context of the API server, raise a + # runtime exception. + raise datastore_errors.InternalError('Unhandled datastore error') + + return api_response def _RemoteSend(self, request, response, method, request_id=None): """Sends a request remotely to the datstore server. """ @@ -361,19 +388,10 @@ def _RemoteSend(self, request, response, method, request_id=None): if request_id is not None: api_request.set_request_id(request_id) - payload = api_request.Encode() - headers = {'Content-Length': len(payload), - 'ProtocolBufferType': 'Request', - 'AppData': tag, - 'Module': self._service_id, - 'Version': self._version_id} - http_response = self._make_call(payload, headers) - - if http_response.status != 200: - raise apiproxy_errors.ApplicationError( - datastore_pb.Error.INTERNAL_ERROR, 'Unhandled datastore error') - - api_response = remote_api_pb.Response(http_response.data) + if POOL_CONNECTIONS: + api_response = self._request_with_pool(api_request, tag) + else: + api_response = self._request_from_sandbox(api_request, tag) if api_response.has_application_error(): error_pb = api_response.application_error() From 22bd36abf2fc27f8aa2669686fa777f2597ca491 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 23 Aug 2019 16:51:14 -0700 Subject: [PATCH 056/221] Register UA servers with ZooKeeper --- AppDB/appscale/datastore/scripts/ua_server.py | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index 0f94fac13b..df23d0e0b6 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -11,12 +11,14 @@ import logging import SOAPpy import sys +import threading import time -from appscale.common import appscale_info -from appscale.common.constants import LOG_FORMAT +from kazoo.client import KazooClient, KazooState, NodeExistsError from tornado import gen +from appscale.common import appscale_info +from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS from appscale.datastore import appscale_datastore from appscale.datastore.dbconstants import ( AppScaleDBConnectionError, USERS_SCHEMA, USERS_TABLE @@ -35,6 +37,9 @@ # The port avaialble from the outside via SSL. DEFAULT_SSL_PORT = 4343 +# The ZooKeeper path where a list of active UA servers is stored. +UA_SERVERS_NODE = '/appscale/iam/servers' + # The default datastore used. datastore_type = DEFAULT_DATASTORE @@ -507,6 +512,40 @@ def usage(): print " --port or -p for server port" +def register_location(host, port): + """ Register service location with ZooKeeper. """ + zk_client = KazooClient(hosts=appscale_info.get_zk_locations_string(), + connection_retry=ZK_PERSISTENT_RECONNECTS) + zk_client.start() + server_node = '{}/{}:{}'.format(UA_SERVERS_NODE, host, port) + + def create_server_node(): + """ Creates a server registration entry in ZooKeeper. """ + try: + zk_client.retry(zk_client.create, server_node, ephemeral=True) + except NodeExistsError: + # If the server gets restarted, the old node may exist for a short time. + zk_client.retry(zk_client.delete, server_node) + zk_client.retry(zk_client.create, server_node, ephemeral=True) + + logger.info('UAServer registered at {}'.format(server_node)) + + def zk_state_listener(state): + """ Handles changes to ZooKeeper connection state. + + Args: + state: A string specifying the new ZooKeeper connection state. + """ + if state == KazooState.CONNECTED: + threading.Thread(target=create_server_node).start() + + zk_client.add_listener(zk_state_listener) + zk_client.ensure_path(UA_SERVERS_NODE) + # Since the client was started before adding the listener, make sure the + # server node gets created. + zk_state_listener(zk_client.state) + + def main(): """ Main function for running the server. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) @@ -531,6 +570,8 @@ def main(): else: pass + register_location(appscale_info.get_private_ip(), bindport) + db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() valid_datastores = appscale_datastore.DatastoreFactory.valid_datastores() From a1dd7b83d6a0ed7a39471b1870828e0b562e0c7e Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 23 Aug 2019 16:51:51 -0700 Subject: [PATCH 057/221] Register TaskQueue servers with ZooKeeper --- .../appscale/taskqueue/appscale_taskqueue.py | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py b/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py index f177b76d0b..c5a3dea371 100644 --- a/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py +++ b/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py @@ -8,7 +8,7 @@ import signal import sys -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooState, NodeExistsError from tornado import gen, httpserver, ioloop from tornado.web import Application, RequestHandler @@ -30,6 +30,8 @@ sys.path.append(APPSCALE_PYTHON_APPSERVER) +TQ_SERVERS_NODE = '/appscale/tasks/servers' + class ProtobufferHandler(RequestHandler): """ Defines what to do when the webserver receives different types of HTTP @@ -280,6 +282,37 @@ def stop_on_signal(): return graceful_shutdown +def register_location(zk_client, host, port): + """ Register service location with ZooKeeper. """ + server_node = '{}/{}:{}'.format(TQ_SERVERS_NODE, host, port) + + def create_server_node(): + """ Creates a server registration entry in ZooKeeper. """ + try: + zk_client.retry(zk_client.create, server_node, ephemeral=True) + except NodeExistsError: + # If the server gets restarted, the old node may exist for a short time. + zk_client.retry(zk_client.delete, server_node) + zk_client.retry(zk_client.create, server_node, ephemeral=True) + + logger.info('TaskQueue server registered at {}'.format(server_node)) + + def zk_state_listener(state): + """ Handles changes to ZooKeeper connection state. + + Args: + state: A string specifying the new ZooKeeper connection state. + """ + if state == KazooState.CONNECTED: + ioloop.IOLoop.instance().add_callback(create_server_node) + + zk_client.add_listener(zk_state_listener) + zk_client.ensure_path(TQ_SERVERS_NODE) + # Since the client was started before adding the listener, make sure the + # server node gets created. + zk_state_listener(zk_client.state) + + def main(): """ Main function which initializes and starts the tornado server. """ # Parse command line arguments @@ -298,6 +331,8 @@ def main(): connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() + register_location(zk_client, appscale_info.get_private_ip(), args.port) + # Initialize tornado server task_queue = distributed_tq.DistributedTaskQueue(zk_client) tq_application = prepare_taskqueue_application(task_queue) From 88d611cc3297909eba9efcfd6e1d41471f4e506a Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 23 Aug 2019 16:52:08 -0700 Subject: [PATCH 058/221] Register blobstore servers with ZooKeeper --- AppDB/appscale/datastore/scripts/blobstore.py | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/scripts/blobstore.py b/AppDB/appscale/datastore/scripts/blobstore.py index a984a42097..0c57e5421a 100644 --- a/AppDB/appscale/datastore/scripts/blobstore.py +++ b/AppDB/appscale/datastore/scripts/blobstore.py @@ -34,7 +34,7 @@ from appscale.common.deployment_config import DeploymentConfig from appscale.common.deployment_config import ConfigInaccessible from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from kazoo.client import KazooClient +from kazoo.client import KazooClient, KazooState, NodeExistsError from StringIO import StringIO sys.path.append(APPSCALE_PYTHON_APPSERVER) @@ -73,6 +73,8 @@ # The chunk size to use for uploading files to GCS. GCS_CHUNK_SIZE = 5 * 1024 * 1024 # 5MB +BLOBSTORE_SERVERS_NODE = '/appscale/blobstore/servers' + # Global used for setting the datastore path when registering the DB datastore_path = "" @@ -423,6 +425,37 @@ def post(self, session_id = "session"): return +def register_location(zk_client, host, port): + """ Register service location with ZooKeeper. """ + server_node = '{}/{}:{}'.format(BLOBSTORE_SERVERS_NODE, host, port) + + def create_server_node(): + """ Creates a server registration entry in ZooKeeper. """ + try: + zk_client.retry(zk_client.create, server_node, ephemeral=True) + except NodeExistsError: + # If the server gets restarted, the old node may exist for a short time. + zk_client.retry(zk_client.delete, server_node) + zk_client.retry(zk_client.create, server_node, ephemeral=True) + + logger.info('Blobstore server registered at {}'.format(server_node)) + + def zk_state_listener(state): + """ Handles changes to ZooKeeper connection state. + + Args: + state: A string specifying the new ZooKeeper connection state. + """ + if state == KazooState.CONNECTED: + tornado.ioloop.IOLoop.instance().add_callback(create_server_node) + + zk_client.add_listener(zk_state_listener) + zk_client.ensure_path(BLOBSTORE_SERVERS_NODE) + # Since the client was started before adding the listener, make sure the + # server node gets created. + zk_state_listener(zk_client.state) + + def main(): global datastore_path global deployment_config @@ -443,6 +476,8 @@ def main(): deployment_config = DeploymentConfig(zk_client) setup_env() + register_location(zk_client, appscale_info.get_private_ip(), args.port) + http_server = tornado.httpserver.HTTPServer( Application(), max_buffer_size=MAX_REQUEST_BUFF_SIZE, xheaders=True) From 04374826c8130a065efd6fb291e3e88ee4864733 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 27 Aug 2019 09:50:04 -0700 Subject: [PATCH 059/221] Update service routing based on registration nodes This allows the UA, TaskQueue, and blobstore servers to get routed in the same was as datastore and search2 servers. --- AppController/djinn.rb | 149 +++++++++--------- AppController/djinnServer.rb | 1 - AppController/lib/haproxy.rb | 28 ---- AppController/lib/zkinterface.rb | 35 +++- AppController/test/tc_djinn.rb | 2 - .../appscale/appcontroller_client/__init__.py | 8 - AppDB/appscale/datastore/scripts/blobstore.py | 6 - 7 files changed, 105 insertions(+), 124 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index aa11208bee..29f1adeab0 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -1635,18 +1635,30 @@ def get_all_private_ips(secret) end def check_api_services - # LoadBalancers needs to setup the routing - # for the datastore and search2 (if applicable) before proceeding. - while my_node.is_load_balancer? && !update_db_haproxy - Djinn.log_info('Waiting for Datastore assignements ...') - sleep(SMALL_WAIT) - end - has_search2 = !get_search2.empty? - if has_search2 - while my_node.is_load_balancer? && !update_search2_haproxy - Djinn.log_info('Waiting for Search2 assignements ...') - sleep (SMALL_WAIT) + + # Wait for required services to be registered. + if my_node.is_load_balancer? + until update_db_haproxy + Djinn.log_info('Waiting for Datastore servers') + sleep(SMALL_WAIT) + end + + until update_tq_haproxy + Djinn.log_info('Waiting for TaskQueue servers') + sleep(SMALL_WAIT) + end + + until update_blob_servers + Djinn.log_info('Waiting for blobstore servers') + sleep(SMALL_WAIT) + end + + if has_search2 + until update_search2_haproxy + Djinn.log_info('Waiting for Search2 servers') + sleep(SMALL_WAIT) + end end end @@ -1921,7 +1933,10 @@ def job_start(secret) end if my_node.is_load_balancer? # Load balancers need to regenerate nginx/haproxy configuration if needed. + update_ua_haproxy update_db_haproxy + update_tq_haproxy + update_blob_servers update_search2_haproxy unless get_search2.empty? APPS_LOCK.synchronize { regenerate_routing_config } end @@ -2599,45 +2614,22 @@ def gather_logs(secret) # Updates the list of blob_server in haproxy. def update_blob_servers - servers = [] - get_all_compute_nodes.each { |ip| - servers << {'ip' => ip, 'port' => BlobServer::SERVER_PORT} - } - HAProxy.create_app_config(servers, my_node.private_ip, - BlobServer::HAPROXY_PORT, BlobServer::NAME) - end - - # Instruct HAProxy to begin routing traffic to the BlobServers. - # - # Args: - # secret: A String that is used to authenticate the caller. - # - # Returns: - # "OK" if the addition was successful. In case of failures, the following - # Strings may be returned: - # - BAD_SECRET_MSG: If the caller cannot be authenticated. - # - NO_HAPROXY_PRESENT: If this node does not run HAProxy. - def add_routing_for_blob_server(secret) - return BAD_SECRET_MSG unless valid_secret?(secret) - return NOT_READY if @nodes.empty? - return NO_HAPROXY_PRESENT unless my_node.is_load_balancer? + begin + servers = ZKInterface.get_blob_servers.map { |machine_ip, port| + {'ip' => machine_ip, 'port' => port} + } + rescue FailedZooKeeperOperationException + Djinn.log_warn('Unable to fetch list of datastore servers') + return false + end - Djinn.log_debug('Adding BlobServer routing.') - update_blob_servers + HAProxy.create_app_config(servers, my_node.private_ip, + BlobServer::HAPROXY_PORT, BlobServer::NAME) + return true end - # Creates an Nginx/HAProxy configuration file for the Users/Apps soap server. + # Creates an Nginx configuration file for the Users/Apps soap server. def configure_uaserver - all_db_private_ips = [] - @state_change_lock.synchronize { - @nodes.each { | node | - if node.is_db_master? or node.is_db_slave? - all_db_private_ips.push(node.private_ip) - end - } - } - HAProxy.create_ua_server_config(all_db_private_ips, - my_node.private_ip, UserAppClient::HAPROXY_SERVER_PORT) Nginx.add_service_location( 'appscale-uaserver', my_node.private_ip, UserAppClient::HAPROXY_SERVER_PORT, UserAppClient::SSL_SERVER_PORT) @@ -2672,19 +2664,39 @@ def update_search2_haproxy return true end - # Creates HAProxy configuration for TaskQueue. - def configure_tq_routing - all_tq_ips = [] - @state_change_lock.synchronize { - @nodes.each { | node | - if node.is_taskqueue_master? || node.is_taskqueue_slave? - all_tq_ips.push(node.private_ip) - end + def update_tq_haproxy + begin + servers = ZKInterface.get_taskqueue_servers.map { |machine_ip, port| + {'ip' => machine_ip, 'port' => port} } - } - HAProxy.create_tq_server_config( - all_tq_ips, my_node.private_ip, TaskQueue::HAPROXY_PORT) + rescue FailedZooKeeperOperationException + Djinn.log_warn('Unable to fetch list of taskqueue servers') + return false + end + HAProxy.create_app_config(servers, my_node.private_ip, + TaskQueue::HAPROXY_PORT, TaskQueue::NAME) + return true + end + + def update_ua_haproxy + begin + servers = ZKInterface.get_ua_servers.map { |machine_ip, port| + {'ip' => machine_ip, 'port' => port} + } + rescue FailedZooKeeperOperationException + Djinn.log_warn('Unable to fetch list of UA servers') + return false + end + + HAProxy.create_app_config( + servers, my_node.private_ip, UserAppClient::HAPROXY_SERVER_PORT, + UserAppClient::NAME) + return true + end + + # Creates nginx configuration for TaskQueue. + def configure_tq_routing # TaskQueue REST API routing. # We don't need Nginx for backend TaskQueue servers, only for REST support. rest_prefix = '~ /taskqueue/v1beta2/projects/.*' @@ -2693,10 +2705,6 @@ def configure_tq_routing TaskQueue::TASKQUEUE_SERVER_SSL_PORT, rest_prefix) end - def remove_tq_endpoints - HAProxy.remove_tq_endpoints - end - # TODO: this is a temporary fix. The dependency on the tools should be # removed. def write_tools_config @@ -3332,6 +3340,14 @@ def start_stop_api_services stop_soap_server end + if my_node.is_load_balancer? + until update_ua_haproxy + Djinn.log_info('Waiting for UA servers') + sleep(SMALL_WAIT) + end + configure_uaserver + end + # All nodes wait for the UserAppServer now. The call here is just to # ensure the UserAppServer is talking to the persistent state. HelperFunctions.sleep_until_port_is_open(@my_private_ip, @@ -3379,7 +3395,6 @@ def start_stop_api_services } else threads << Thread.new { - remove_tq_endpoints stop_ejabberd } end @@ -4350,13 +4365,6 @@ def initialize_server Djinn.log_info("Nginx already configured and running.") end - # The HAProxy process needs at least one configured service to start. The - # UAServer is configured first to satisfy this condition. - configure_uaserver - - # HAProxy must be running so that the UAServer can be accessed. - HAProxy.services_start - # Volume is mounted, let's finish the configuration of static files. if my_node.is_shadow? and not my_node.is_compute? write_app_logrotate @@ -5077,9 +5085,6 @@ def scale_down_instances num_scaled_down += num_terminated } } - - # Make sure we have the proper list of blobservers configured. - update_blob_servers } end diff --git a/AppController/djinnServer.rb b/AppController/djinnServer.rb index a00ee07611..8cbe2f1309 100755 --- a/AppController/djinnServer.rb +++ b/AppController/djinnServer.rb @@ -76,7 +76,6 @@ def on_init add_method(@djinn, "get_online_users_list", "secret") add_method(@djinn, "start_roles_on_nodes", "ips_hash", "secret") add_method(@djinn, "gather_logs", "secret") - add_method(@djinn, "add_routing_for_blob_server", "secret") add_method(@djinn, "run_groomer", "secret") add_method(@djinn, "get_property", "property_regex", "secret") add_method(@djinn, "set_property", "property_name", "property_value", diff --git a/AppController/lib/haproxy.rb b/AppController/lib/haproxy.rb index 6b27d0227f..c8d879626a 100644 --- a/AppController/lib/haproxy.rb +++ b/AppController/lib/haproxy.rb @@ -93,34 +93,6 @@ def self.services_start :service_haproxy, start_cmd, stop_cmd, SERVICE_PIDFILE, nil, restart_cmd) end - # Create the config file for UserAppServer. - def self.create_ua_server_config(server_ips, my_ip, listen_port) - # We reach out to UserAppServers on the DB nodes. - # The port is fixed. - servers = [] - server_ips.each { |server| - servers << { 'ip' => server, 'port' => UserAppClient::SERVER_PORT } - } - create_app_config(servers, my_ip, listen_port, UserAppClient::NAME) - end - - # Remove the configuration for TaskQueue REST API endpoints. - def self.remove_tq_endpoints - FileUtils.rm_f(File.join(SERVICE_SITES_PATH, TaskQueue::NAME)) - HAProxy.regenerate_config - end - - # Create the config file for TaskQueue servers. - def self.create_tq_server_config(server_ips, my_ip, listen_port) - servers = [] - server_ips.each { |server| - TaskQueue.get_server_ports.each { |port| - servers << { 'ip' => server, 'port' => port } - } - } - create_app_config(servers, my_ip, listen_port, TaskQueue::NAME) - end - # A generic function for creating HAProxy config files used by AppScale services. # # Arguments: diff --git a/AppController/lib/zkinterface.rb b/AppController/lib/zkinterface.rb index 0f9411518d..34b3824a4c 100644 --- a/AppController/lib/zkinterface.rb +++ b/AppController/lib/zkinterface.rb @@ -45,12 +45,21 @@ class ZKInterface # and where other nodes will recover that state from. APPCONTROLLER_STATE_PATH = "#{APPCONTROLLER_PATH}/state".freeze + # The ZooKeeper node where blobstore servers register themselves. + BLOBSTORE_REGISTRY_PATH = '/appscale/blobstore/servers' + # The ZooKeeper node where datastore servers register themselves. DATASTORE_REGISTRY_PATH = '/appscale/datastore/servers' # The ZooKeeper node where search servers register themselves. SEARCH2_REGISTRY_PATH = '/appscale/search/live_nodes' + # The ZooKeeper node where taskqueue servers register themselves. + TASKQUEUE_REGISTRY_PATH = '/appscale/tasks/servers' + + # The ZooKeeper node where UA servers register themselves. + UA_REGISTRY_PATH = '/appscale/iam/servers' + # The location in ZooKeeper that AppControllers write information about their # node to, so that others can poll to see if they are alive and what roles # they've taken on. @@ -242,20 +251,32 @@ def self.get_dispatch_rules(project_id) return JSON.load(dispatch_config_json) end - def self.get_datastore_servers - return get_children(DATASTORE_REGISTRY_PATH).map { |server| + def self.list_registered(registration_node) + return get_children(registration_node).map { |server| server = server.split(':') server[1] = server[1].to_i server } end + def self.get_datastore_servers + list_registered(DATASTORE_REGISTRY_PATH) + end + def self.get_search2_servers - return get_children(SEARCH2_REGISTRY_PATH).map { |server| - server = server.split(':') - server[1] = server[1].to_i - server - } + list_registered(SEARCH2_REGISTRY_PATH) + end + + def self.get_taskqueue_servers + list_registered(TASKQUEUE_REGISTRY_PATH) + end + + def self.get_ua_servers + list_registered(UA_REGISTRY_PATH) + end + + def self.get_blob_servers + list_registered(BLOBSTORE_REGISTRY_PATH) end def self.set_machine_assignments(machine_ip, assignments) diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index e534a0db70..10a10a3fc2 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -239,7 +239,6 @@ def test_taskqueue_master file.should_receive(:open).and_return() file.should_receive(:log_run).and_return() flexmock(Djinn).should_receive(:log_run).and_return() - flexmock(HAProxy).should_receive(:create_tq_server_config).and_return() flexmock(HelperFunctions).should_receive(:shell).and_return() flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open). and_return() @@ -288,7 +287,6 @@ def test_taskqueue_slave # mock out and commands flexmock(Djinn).should_receive(:log_run).and_return() - flexmock(HAProxy).should_receive(:create_tq_server_config).and_return() flexmock(MonitInterface).should_receive(:start_daemon).and_return() flexmock(MonitInterface).should_receive(:start).and_return() flexmock(Addrinfo).should_receive('ip.getnameinfo').and_return(["hostname-ip1"]) diff --git a/AppControllerClient/appscale/appcontroller_client/__init__.py b/AppControllerClient/appscale/appcontroller_client/__init__.py index 119d43bc47..eb934773f0 100644 --- a/AppControllerClient/appscale/appcontroller_client/__init__.py +++ b/AppControllerClient/appscale/appcontroller_client/__init__.py @@ -302,14 +302,6 @@ def run_groomer(self): return self.call(self.MAX_RETRIES, self.server.run_groomer, self.secret) - def add_routing_for_blob_server(self): - """ Tells the AppController to begin routing traffic to the - BlobServer(s). - """ - return self.call(self.MAX_RETRIES, self.server.add_routing_for_blob_server, - self.secret) - - def deployment_id_exists(self): """ Asks the AppController if the deployment ID is stored in ZooKeeper. diff --git a/AppDB/appscale/datastore/scripts/blobstore.py b/AppDB/appscale/datastore/scripts/blobstore.py index 0c57e5421a..728a7f8010 100644 --- a/AppDB/appscale/datastore/scripts/blobstore.py +++ b/AppDB/appscale/datastore/scripts/blobstore.py @@ -483,11 +483,5 @@ def main(): http_server.listen(args.port) - # Make sure this server is accessible from each of the load balancers. - secret = appscale_info.get_secret() - for load_balancer in appscale_info.get_load_balancer_ips(): - acc = AppControllerClient(load_balancer, secret) - acc.add_routing_for_blob_server() - logger.info('Starting BlobServer on {}'.format(args.port)) tornado.ioloop.IOLoop.instance().start() From 9f1efcee71de3a8d1faf3f3709528145adf924b3 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 27 Aug 2019 16:39:15 -0700 Subject: [PATCH 060/221] Use interal port when making UAServer calls When making UAServer calls on the same network, clients do not need to go through nginx. --- AdminServer/appscale/admin/__init__.py | 2 +- .../admin/instance_manager/instance.py | 8 ++++--- AppController/djinn.rb | 22 ++++++++++--------- AppController/lib/app_dashboard.rb | 4 ++-- AppController/lib/user_app_client.rb | 3 +-- AppController/scripts/ejabberd_auth.py | 3 +-- AppController/test/tc_djinn.rb | 2 +- AppDashboard/lib/app_dashboard_helper.py | 9 +++----- AppDashboard/lib/uaserver_host.py | 1 - AppDashboard/lib/uaserver_location.py | 1 + .../test/functional/test_dashboard.py | 2 +- .../appengine/api/xmpp/xmpp_service_real.py | 4 ++-- .../google/appengine/tools/dev_appserver.py | 1 - common/appscale/common/constants.py | 2 +- common/appscale/common/ua_client.py | 17 ++++++++------ scripts/create_user.py | 4 +--- scripts/delete_user.py | 4 +--- scripts/describe_user.py | 4 +--- scripts/grant_admin_for_app.py | 4 +--- scripts/grant_cloud_admin.py | 4 +--- scripts/view_users.py | 4 +--- 21 files changed, 47 insertions(+), 58 deletions(-) delete mode 100644 AppDashboard/lib/uaserver_host.py create mode 100644 AppDashboard/lib/uaserver_location.py diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py index 9dd8bc8c68..7b0f1968d9 100644 --- a/AdminServer/appscale/admin/__init__.py +++ b/AdminServer/appscale/admin/__init__.py @@ -1387,7 +1387,7 @@ def main(): options.define('load_balancers', appscale_info.get_load_balancer_ips()) acc = appscale_info.get_appcontroller_client() - ua_client = UAClient(appscale_info.get_db_master_ip(), options.secret) + ua_client = UAClient() zk_client = KazooClient( hosts=','.join(options.zk_locations), connection_retry=ZK_PERSISTENT_RECONNECTS) diff --git a/AdminServer/appscale/admin/instance_manager/instance.py b/AdminServer/appscale/admin/instance_manager/instance.py index 46cedb7a7c..535b61b9fb 100644 --- a/AdminServer/appscale/admin/instance_manager/instance.py +++ b/AdminServer/appscale/admin/instance_manager/instance.py @@ -152,7 +152,7 @@ def create_java_start_cmd(app_name, port, load_balancer_port, load_balancer_host '--nginx_host={}'.format(load_balancer_host), '--xmpp_path={}'.format(load_balancer_host), '--uaserver_path={}'.format( - ':'.join([options.db_proxy, str(UA_SERVER_PORT)])), + ':'.join([load_balancer_host, str(UA_SERVER_PORT)])), '--external_api_port={}'.format(api_server_port) ] @@ -191,7 +191,8 @@ def create_python_api_start_cmd(app_name, login_ip, port, pidfile, '--nginx_host', login_ip, '--enable_sendmail', '--xmpp_path', options.load_balancer_ip, - '--uaserver_path', '{}:{}'.format(options.db_proxy, UA_SERVER_PORT), + '--uaserver_path', '{}:{}'.format(options.load_balancer_ip, + UA_SERVER_PORT), '--datastore_path', '{}:{}'.format(options.db_proxy, DB_SERVER_PORT), '--pidfile', pidfile, '--external_api_port', str(api_server_port) @@ -249,7 +250,8 @@ def create_python27_start_cmd(app_name, login_ip, port, pidfile, revision_key, '--xmpp_path', options.load_balancer_ip, '--php_executable_path=' + str(PHP_CGI_LOCATION), '--max_module_instances', "{}:1".format(service_id), - '--uaserver_path', '{}:{}'.format(options.db_proxy, UA_SERVER_PORT), + '--uaserver_path', '{}:{}'.format(options.load_balancer_ip, + UA_SERVER_PORT), '--datastore_path', '{}:{}'.format(options.db_proxy, DB_SERVER_PORT), '--host', options.private_ip, '--automatic_restart', 'no', diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 29f1adeab0..30d24100aa 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -1550,7 +1550,7 @@ def reset_password(username, password, secret) return NOT_READY if @nodes.empty? begin - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) return uac.change_password(username, password) rescue FailedNodeException Djinn.log_warn("Failed to talk to the UserAppServer while resetting " \ @@ -1567,7 +1567,7 @@ def does_user_exist(username, secret) return NOT_READY if @nodes.empty? begin - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) return uac.does_user_exist?(username) rescue FailedNodeException Djinn.log_warn("Failed to talk to the UserAppServer to check if the " \ @@ -1587,7 +1587,7 @@ def create_user(username, password, account_type, secret) return NOT_READY if @nodes.empty? begin - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) return uac.commit_new_user(username, password, account_type) rescue FailedNodeException Djinn.log_warn("Failed to talk to the UserAppServer while committing " \ @@ -1604,7 +1604,7 @@ def set_admin_role(username, is_cloud_admin, capabilities, secret) return NOT_READY if @nodes.empty? begin - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) return uac.set_admin_role(username, is_cloud_admin, capabilities) rescue FailedNodeException Djinn.log_warn("Failed to talk to the UserAppServer while setting admin role " \ @@ -3350,9 +3350,9 @@ def start_stop_api_services # All nodes wait for the UserAppServer now. The call here is just to # ensure the UserAppServer is talking to the persistent state. - HelperFunctions.sleep_until_port_is_open(@my_private_ip, - UserAppClient::SSL_SERVER_PORT, USE_SSL) - uac = UserAppClient.new(@my_private_ip, @@secret) + HelperFunctions.sleep_until_port_is_open( + get_load_balancer.private_ip, UserAppClient::HAPROXY_SERVER_PORT) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) begin uac.does_user_exist?("not-there") rescue FailedNodeException @@ -4543,7 +4543,7 @@ def stop_ejabberd # Create the system user used to start and run system's applications. def create_appscale_user - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) password = SecureRandom.base64 begin @@ -4647,9 +4647,11 @@ def prep_app_dashboard DatastoreServer::PROXY_PORT].join(':') taskqueue_location = [get_load_balancer.private_ip, TaskQueue::HAPROXY_PORT].join(':') + ua_server_location = [get_load_balancer.private_ip, + UserAppClient::HAPROXY_SERVER_PORT].join(':') source_archive = AppDashboard.prep( my_private, PERSISTENT_MOUNT_POINT, datastore_location, - taskqueue_location) + taskqueue_location, ua_server_location) self.deploy_dashboard(source_archive) end @@ -5840,7 +5842,7 @@ def start_xmpp_for_app(app) # We don't need to check for FailedNodeException here since we catch # it at a higher level. login_ip = @options['login'] - uac = UserAppClient.new(my_node.private_ip, @@secret) + uac = UserAppClient.new(get_load_balancer.private_ip, @@secret) xmpp_user = "#{app}@#{login_ip}" xmpp_pass = HelperFunctions.encrypt_password(xmpp_user, @@secret) diff --git a/AppController/lib/app_dashboard.rb b/AppController/lib/app_dashboard.rb index a2c2776b16..8c87e26b7e 100644 --- a/AppController/lib/app_dashboard.rb +++ b/AppController/lib/app_dashboard.rb @@ -32,13 +32,13 @@ module AppDashboard # Returns: # A string specifying the location of the prepared archive. def self.prep(private_ip, persistent_storage, datastore_location, - taskqueue_location) + taskqueue_location, ua_server_location) # Write deployment-specific information that the dashboard needs. lib_dir = File.join(APPSCALE_HOME, 'AppDashboard', 'lib') lib_contents = { 'admin_server_location.py' => "ADMIN_SERVER_LOCATION = '#{private_ip}'", 'controller_location.py' => "CONTROLLER_LOCATION = '#{private_ip}'", - 'uaserver_host.py' => "UA_SERVER_IP = '#{private_ip}'", + 'uaserver_location.py' => "UA_SERVER_LOCATION = '#{ua_server_location}'", 'datastore_location.py' => "DATASTORE_LOCATION = '#{datastore_location}'", 'taskqueue_location.py' => "TASKQUEUE_LOCATION = '#{taskqueue_location}'" } diff --git a/AppController/lib/user_app_client.rb b/AppController/lib/user_app_client.rb index 0ad2daeed8..01b111a861 100644 --- a/AppController/lib/user_app_client.rb +++ b/AppController/lib/user_app_client.rb @@ -36,8 +36,7 @@ def initialize(ip, secret) @ip = ip @secret = secret - @conn = SOAP::RPC::Driver.new("https://#{@ip}:#{SSL_SERVER_PORT}") - @conn.options['protocol.http.ssl_config.verify_mode'] = nil + @conn = SOAP::RPC::Driver.new("http://#{@ip}:#{HAPROXY_SERVER_PORT}") @conn.add_method('change_password', 'user', 'password', 'secret') @conn.add_method('commit_new_user', 'user', 'passwd', 'utype', 'secret') @conn.add_method('is_user_cloud_admin', 'username', 'secret') diff --git a/AppController/scripts/ejabberd_auth.py b/AppController/scripts/ejabberd_auth.py index a4f506ceb0..be44c096d6 100755 --- a/AppController/scripts/ejabberd_auth.py +++ b/AppController/scripts/ejabberd_auth.py @@ -108,8 +108,7 @@ def log_result(op, in_user, result): logging.debug('Authentication script: waiting for ejabberd requests') - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() while True: try: diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index 10a10a3fc2..a5f2b81ad1 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -674,7 +674,7 @@ def get_djinn_mock role = { "public_ip" => "my public ip", "private_ip" => "my private ip", - "roles" => [] + "roles" => ["load_balancer"] } djinn = flexmock(Djinn.new()) djinn.my_index = 0 diff --git a/AppDashboard/lib/app_dashboard_helper.py b/AppDashboard/lib/app_dashboard_helper.py index bd6ea06961..0361bd2cc1 100644 --- a/AppDashboard/lib/app_dashboard_helper.py +++ b/AppDashboard/lib/app_dashboard_helper.py @@ -21,7 +21,7 @@ from custom_exceptions import BadConfigurationException from local_state import LocalState from secret_key import GLOBAL_SECRET_KEY -from uaserver_host import UA_SERVER_IP +from uaserver_location import UA_SERVER_LOCATION class AppHelperException(Exception): @@ -64,9 +64,6 @@ class AppDashboardHelper(object): # the login cookie. LOGIN_COOKIE_APPS_PART = 2 - # The port that the UserAppServer runs on, by default. - UA_SERVER_PORT = 4343 - # The port that the AdminServer runs on. ADMIN_SERVER_PORT = 17441 @@ -196,8 +193,8 @@ def get_uaserver(self): An SOAPpy object, representing a connection to the UserAppServer. """ if self.uaserver is None: - self.uaserver = SOAPpy.SOAPProxy('https://{0}:{1}'.format(UA_SERVER_IP, - self.UA_SERVER_PORT)) + self.uaserver = SOAPpy.SOAPProxy('http://{}'.format(UA_SERVER_LOCATION)) + return self.uaserver def get_user_capabilities(self, email): diff --git a/AppDashboard/lib/uaserver_host.py b/AppDashboard/lib/uaserver_host.py deleted file mode 100644 index aef0999ccd..0000000000 --- a/AppDashboard/lib/uaserver_host.py +++ /dev/null @@ -1 +0,0 @@ -UA_SERVER_IP = 'THIS VALUE WILL BE OVERWRITTEN ON STARTUP' diff --git a/AppDashboard/lib/uaserver_location.py b/AppDashboard/lib/uaserver_location.py new file mode 100644 index 0000000000..23db0f3bb3 --- /dev/null +++ b/AppDashboard/lib/uaserver_location.py @@ -0,0 +1 @@ +UA_SERVER_LOCATION = 'THIS VALUE WILL BE OVERWRITTEN ON STARTUP' diff --git a/AppDashboard/test/functional/test_dashboard.py b/AppDashboard/test/functional/test_dashboard.py index fd0215b139..a54b21b61d 100644 --- a/AppDashboard/test/functional/test_dashboard.py +++ b/AppDashboard/test/functional/test_dashboard.py @@ -36,7 +36,7 @@ class FunctionalTestAppDashboard(unittest.TestCase): def setUp(self): acc = flexmock(AppControllerClient) - acc.should_receive('get_uaserver_host').and_return('public1') + acc.should_receive('get_uaserver_location').and_return('public1:4341') acc.should_receive('get_cluster_stats').and_return([ # TODO make up example of cluster stats # TODO and make sure that this change doesn't break tests diff --git a/AppServer/google/appengine/api/xmpp/xmpp_service_real.py b/AppServer/google/appengine/api/xmpp/xmpp_service_real.py index 7c3d2eda2b..c29a4f4a77 100755 --- a/AppServer/google/appengine/api/xmpp/xmpp_service_real.py +++ b/AppServer/google/appengine/api/xmpp/xmpp_service_real.py @@ -63,7 +63,7 @@ def __init__(self, xmpp_location, log=logging.info, service_name='xmpp', super(XmppService, self).__init__(service_name) self.log = log self.xmpp_domain = domain - self.uaserver = "https://" + uaserver + self.uaserver = "http://" + uaserver self.login = "https://localhost:17443" if not uasecret: @@ -208,7 +208,7 @@ def _Dynamic_CreateChannel(self, request, response): application_key, self.xmpp_domain) - server = SOAPpy.SOAPProxy(self.uaserver, transport=UnverifiedTransport) + server = SOAPpy.SOAPProxy(self.uaserver) password = application_key encry_pw = hashlib.sha1(client_id+password) ret = server.commit_new_user(client_id, diff --git a/AppServer/google/appengine/tools/dev_appserver.py b/AppServer/google/appengine/tools/dev_appserver.py index e559eba4ab..83a7002344 100644 --- a/AppServer/google/appengine/tools/dev_appserver.py +++ b/AppServer/google/appengine/tools/dev_appserver.py @@ -3543,7 +3543,6 @@ def SetupStubs(app_id, **config): xmpp_path = config['xmpp_path'] uaserver_path = config['uaserver_path'] login_server = config['login_server'] - cookie_secret = config['COOKIE_SECRET'] os.environ['APPLICATION_ID'] = app_id diff --git a/common/appscale/common/constants.py b/common/appscale/common/constants.py index 495c48beaa..061ac691ff 100644 --- a/common/appscale/common/constants.py +++ b/common/appscale/common/constants.py @@ -99,7 +99,7 @@ def non_negative_int(value): DB_SERVER_PORT = 8888 # The port of the UserAppServer SOAP server. -UA_SERVER_PORT = 4343 +UA_SERVER_PORT = 4341 # The port of the application manager soap server. APP_MANAGER_PORT = 17445 diff --git a/common/appscale/common/ua_client.py b/common/appscale/common/ua_client.py index c135dbea68..eca16e7c67 100644 --- a/common/appscale/common/ua_client.py +++ b/common/appscale/common/ua_client.py @@ -1,9 +1,10 @@ """ A client that makes requests to the UAServer. """ -import json -import ssl +import random from SOAPpy import SOAPProxy + +from .appscale_info import get_load_balancer_ips, get_secret from .constants import UA_SERVER_PORT @@ -31,19 +32,21 @@ class UAClient(object): # applications a user owns, when applied to their user data. USER_APP_LIST_REGEX = "\napplications:(.+)\n" - def __init__(self, host, secret): + def __init__(self, host=None, secret=None): """ Creates a UAClient instance. Args: host: A string specifying the location of the UAServer. secret: A string specifying the deployment secret. """ - # Disable certificate verification for Python >= 2.7.9. - if hasattr(ssl, '_create_unverified_context'): - ssl._create_default_https_context = ssl._create_unverified_context + if host is None: + host = random.choice(get_load_balancer_ips()) + + if secret is None: + secret = get_secret() self.secret = secret - self.server = SOAPProxy('https://{}:{}'.format(host, UA_SERVER_PORT)) + self.server = SOAPProxy('http://{}:{}'.format(host, UA_SERVER_PORT)) def add_admin_for_app(self, email, app_id): """ Grants a user admin privileges for an application. diff --git a/scripts/create_user.py b/scripts/create_user.py index 8c945b3391..33572afd28 100644 --- a/scripts/create_user.py +++ b/scripts/create_user.py @@ -5,7 +5,6 @@ import string import sys -from appscale.common import appscale_info from appscale.common.ua_client import UAClient @@ -54,8 +53,7 @@ def is_valid_email(email): new_password = random_password() - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() if ua_client.does_user_exist(email): print "User already exists." diff --git a/scripts/delete_user.py b/scripts/delete_user.py index 8a5078ea14..1353c40e09 100644 --- a/scripts/delete_user.py +++ b/scripts/delete_user.py @@ -2,7 +2,6 @@ import sys -from appscale.common import appscale_info from appscale.common.ua_client import UAClient @@ -23,8 +22,7 @@ def usage(): email = sys.argv[1] - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() if not ua_client.does_user_exist(email): print "User does not exist." diff --git a/scripts/describe_user.py b/scripts/describe_user.py index 89c69e9ea1..f0597c3171 100644 --- a/scripts/describe_user.py +++ b/scripts/describe_user.py @@ -2,15 +2,13 @@ import sys -from appscale.common import appscale_info from appscale.common.ua_client import UAClient if __name__ == "__main__": email = sys.argv[1] - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() if not ua_client.does_user_exist(email): print "User does not exist." diff --git a/scripts/grant_admin_for_app.py b/scripts/grant_admin_for_app.py index d4362a6ef3..441b02db8b 100644 --- a/scripts/grant_admin_for_app.py +++ b/scripts/grant_admin_for_app.py @@ -2,7 +2,6 @@ import sys -from appscale.common import appscale_info from appscale.common.ua_client import UAClient @@ -26,8 +25,7 @@ def usage(): email = sys.argv[1] app_id = sys.argv[2] - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() if not ua_client.does_user_exist(email): print "User does not exist." diff --git a/scripts/grant_cloud_admin.py b/scripts/grant_cloud_admin.py index fbf9bce20e..1fa181e472 100644 --- a/scripts/grant_cloud_admin.py +++ b/scripts/grant_cloud_admin.py @@ -2,7 +2,6 @@ import sys -from appscale.common import appscale_info from appscale.common.ua_client import UAClient @@ -25,8 +24,7 @@ def usage(): email = sys.argv[1] - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() if not ua_client.does_user_exist(email): print "User does not exist." diff --git a/scripts/view_users.py b/scripts/view_users.py index 282675afea..520bdadf29 100644 --- a/scripts/view_users.py +++ b/scripts/view_users.py @@ -1,13 +1,11 @@ """ This script dumps all users. """ -from appscale.common import appscale_info from appscale.common.ua_client import UAClient if __name__ == "__main__": - secret = appscale_info.get_secret() - ua_client = UAClient(appscale_info.get_db_master_ip(), secret) + ua_client = UAClient() for user in ua_client.get_all_users(): print(user) From eb16f583b5e58cd98b9f618cc238834196402364 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 29 Aug 2019 09:57:04 -0700 Subject: [PATCH 061/221] Define private IP for TQ E2E tests --- AppTaskQueue/test/helpers/restart-taskqueue.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index 9a1e1be24e..f80a01ea39 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -109,6 +109,7 @@ echo ${DB_IP} > /etc/appscale/masters echo ${DB_IP} > /etc/appscale/slaves echo ${ZK_IP} > /etc/appscale/zookeeper_locations echo ${LB_IP} > /etc/appscale/load_balancer_ips +hostname -I > /etc/appscale/my_private_ip RUNNING_SERVER=$(ps -ax | grep "[a]ppscale-taskqueue" || echo "") From 148a7a97876e50f3cfabf9341c00cf24381a5462 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 29 Aug 2019 09:57:15 -0700 Subject: [PATCH 062/221] Guess UAServer locations when ZK is not available --- AppController/djinn.rb | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 30d24100aa..b47080701e 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -2680,13 +2680,25 @@ def update_tq_haproxy end def update_ua_haproxy - begin - servers = ZKInterface.get_ua_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} + if ZKInterface.is_connected? + begin + servers = ZKInterface.get_ua_servers.map { |machine_ip, port| + {'ip' => machine_ip, 'port' => port} + } + rescue FailedZooKeeperOperationException + Djinn.log_warn('Unable to fetch list of UA servers') + return false + end + else + # If there is no ZK connection, guess the locations for now. + servers = [] + @state_change_lock.synchronize { + servers = @nodes.map { |node| + if node.is_db_master? or node.is_db_slave? + {'ip' => node.private_ip, 'port' => UserAppClient::SERVER_PORT} + end + }.compact } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of UA servers') - return false end HAProxy.create_app_config( @@ -4365,6 +4377,13 @@ def initialize_server Djinn.log_info("Nginx already configured and running.") end + # The HAProxy process needs at least one configured service to start. The + # UAServer is configured first to satisfy this condition. + update_ua_haproxy + + # This ensures HAProxy gets started after a machine reboot. + HAProxy.services_start + # Volume is mounted, let's finish the configuration of static files. if my_node.is_shadow? and not my_node.is_compute? write_app_logrotate From e48fc4d2f84535107e333f2bf220f71b3ca48da2 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 29 Aug 2019 14:47:50 -0700 Subject: [PATCH 063/221] Use FDB backend if clusterfile content is defined --- AdminServer/appscale/admin/service_manager.py | 2 +- AppController/djinn.rb | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/AdminServer/appscale/admin/service_manager.py b/AdminServer/appscale/admin/service_manager.py index 941aaf3027..52a4a36f23 100644 --- a/AdminServer/appscale/admin/service_manager.py +++ b/AdminServer/appscale/admin/service_manager.py @@ -222,7 +222,7 @@ def datastore_start_cmd(port, assignment_options): A list of command line arguments. """ start_cmd = ['appscale-datastore', - '--type', 'cassandra', + '--type', assignment_options.get('backend', 'cassandra'), '--port', str(port)] if assignment_options.get('verbose'): start_cmd.append('--verbose') diff --git a/AppController/djinn.rb b/AppController/djinn.rb index aa11208bee..f2c1f19318 100755 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3619,8 +3619,10 @@ def assign_datastore_processes # startup. return unless my_node.is_shadow? + backend = 'cassandra' if @options.key?('fdb_clusterfile_content') ZKInterface.set_fdb_clusterfile_content(@options['fdb_clusterfile_content']) + backend = 'fdb' end Djinn.log_info("Assigning datastore processes.") @@ -3636,7 +3638,7 @@ def assign_datastore_processes # machine. db_nodes.each { |node| assignments = {} - assignments['datastore'] = {'verbose' => verbose} + assignments['datastore'] = {'backend' => backend, 'verbose' => verbose} ZKInterface.set_machine_assignments(node.private_ip, assignments) Djinn.log_debug("Node #{node.private_ip} got #{assignments}.") } From 07aba67e045c56f4846ae1061baa4f2dc1c51e78 Mon Sep 17 00:00:00 2001 From: Tanvi Marballi Date: Fri, 30 Aug 2019 14:05:07 -0700 Subject: [PATCH 064/221] Addressing code review changes --- AppController/djinn.rb | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 0a343d0aae..ffc7d94c37 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -491,7 +491,7 @@ class Djinn 'write_detailed_proxies_stats_log' => [TrueClass, 'False', true], 'zone' => [String, nil, true], 'fdb_clusterfile_content' => [String, nil, true], - 'update' => [String, nil, false] + 'update' => [Array, [], true] }.freeze # Template used for rsyslog configuration files. @@ -858,6 +858,11 @@ def check_options(options) end end + # We do not sanitize Array parameters for now. + if PARAMETERS_AND_CLASS[key][PARAMETER_CLASS] == Array + newval = val + end + newoptions[key] = newval newval = "*****" unless PARAMETERS_AND_CLASS[key][2] Djinn.log_debug("Accepted option #{key}:#{newval}.") @@ -3828,18 +3833,18 @@ def build_search_service2 # Run a build on modified directories so that changes will take effect. def build_uncommitted_changes - if @options['update'].nil? + if @options['update'].empty? return end - update_dir = @options['update'] + update_dirs = @options['update'] - if update_dir == "all" - update_dir = ALLOWED_DIR_UPDATES.join(',') + if update_dirs == "all" + update_dirs = ALLOWED_DIR_UPDATES.join(',') end # Update Python packages across corresponding virtual environments - if update_dir.include?('common') + if update_dirs.include?('common') update_python_package("#{APPSCALE_HOME}/common") update_python_package("#{APPSCALE_HOME}/common", '/opt/appscale_venvs/api_server/bin/pip') @@ -3848,33 +3853,33 @@ def build_uncommitted_changes update_python_package("#{APPSCALE_HOME}/common", '/opt/appscale_venvs/search2/bin/pip') end - if update_dir.include?('app_controller') + if update_dirs.include?('app_controller') update_python_package("#{APPSCALE_HOME}/AppControllerClient") end - if update_dir.include?('admin_server') + if update_dirs.include?('admin_server') update_python_package("#{APPSCALE_HOME}/AdminServer") end - if update_dir.include?('taskqueue') + if update_dirs.include?('taskqueue') build_taskqueue end - if update_dir.include?('app_db') + if update_dirs.include?('app_db') update_python_package("#{APPSCALE_HOME}/AppDB") end - if update_dir.include?('iaas_manager') + if update_dirs.include?('iaas_manager') update_python_package("#{APPSCALE_HOME}/InfrastructureManager") end - if update_dir.include?('hermes') + if update_dirs.include?('hermes') update_python_package("#{APPSCALE_HOME}/Hermes") end - if update_dir.include?('api_server') + if update_dirs.include?('api_server') build_api_server end - if status.include?('SearchService2') + if update_dirs.include?('SearchService2') build_search_service2 end # Update Java AppServer - build_java_appserver if update_dir.include?('appserver_java') + build_java_appserver if update_dirs.include?('appserver_java') end def configure_ejabberd_cert From a25dafc4b214eec118513c1985013d23a3f72fc3 Mon Sep 17 00:00:00 2001 From: Tanvi Marballi Date: Fri, 30 Aug 2019 14:57:46 -0700 Subject: [PATCH 065/221] Fix typo --- AppController/djinn.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index ffc7d94c37..e9134a02d7 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -491,7 +491,7 @@ class Djinn 'write_detailed_proxies_stats_log' => [TrueClass, 'False', true], 'zone' => [String, nil, true], 'fdb_clusterfile_content' => [String, nil, true], - 'update' => [Array, [], true] + 'update' => [Array, [], false] }.freeze # Template used for rsyslog configuration files. From 410e4d0348d2f71f8edf12f7300a11be76a37e84 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Mon, 2 Sep 2019 11:59:37 +0300 Subject: [PATCH 066/221] boostrap.sh can only install appscale, but can't upgrade --- bootstrap.sh | 81 ++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 41 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 35d6821e48..d2680b7d63 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Simple script to install AppScale and tools from the master branch +# Simple script to install AppScale. # Author: AppScale Team set -e @@ -20,7 +20,7 @@ BRANCH_PARAM_SPECIFIED="N" TAG_PARAM_SPECIFIED="N" usage() { - echo "Usage: ${0} [--repo ] [--tools-repo ]" + echo "Usage: ${0} [--repo ] [--branch ]" echo " [--tools-repo ] [--tools-branch ]" echo " [--agents-repo ] [--agents-branch ]" echo " [--thirdparties-repo ] [--thirdparties-branch ]" @@ -111,45 +111,46 @@ while [ $# -gt 0 ]; do done -# Empty tag means we use the latest available. -if [ "${BRANCH_PARAM_SPECIFIED}" = "Y" ] \ - && [ "${TAG_PARAM_SPECIFIED}" = "Y" ] \ - && [ "${GIT_TAG}" != "dev" ]; then +# Validate parameters combination +if [ "${BRANCH_PARAM_SPECIFIED}" = "Y" ] && [ "${TAG_PARAM_SPECIFIED}" = "Y" ]; then echo "Repo/Branch parameters can't be used if --tag parameter is specified" exit 1 fi -declare -A REPOS=( - ["appscale"]="${APPSCALE_REPO}" - ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" - ["appscale-agents"]="${AGENTS_REPO}" - ["appscale-thirdparties"]="${THIRDPARTIES_REPO}" -) -declare -A BRANCHES=( - ["appscale"]="${APPSCALE_BRANCH}" - ["appscale-tools"]="${APPSCALE_TOOLS_BRANCH}" - ["appscale-agents"]="${AGENTS_BRANCH}" - ["appscale-thirdparties"]="${THIRDPARTIES_BRANCH}" -) +# Determine if we use official repos with tag or custom branches +if [ "${BRANCH_PARAM_SPECIFIED}" = "Y" ] || [ "${GIT_TAG}" = "dev" ]; then + RELY_ON_TAG="N" +else + RELY_ON_TAG="Y" +fi + + +# Determine the latest git tag on the AppScale/appscale repo +if [ "${RELY_ON_TAG}" = "Y" ] || [ "$GIT_TAG" = "last" ]; then + echo "Determining the latest tag in AppScale/appscale repo" + GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ + | grep '"name"' | head -1 \ + | awk -F ':' '{ print $2 }' | tr --delete ' ,"') +fi # At this time we expect to be installed in $HOME. cd $HOME # Let's pull the github repositories. echo -if [ "${TAG_PARAM_SPECIFIED}" = "Y" ]; then +if [ "${RELY_ON_TAG}" = "Y" ]; then echo "Will be using the following github repos:" - echo "Repo: ${APPSCALE_REPO} Tag ${GIT_TAG}" - echo "Repo: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" - echo "Repo: ${AGENTS_REPO} Tag ${GIT_TAG}" - echo "Repo: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" + echo "AppScale: ${APPSCALE_REPO} Tag ${GIT_TAG}" + echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" + echo "Cloud-Agents: ${AGENTS_REPO} Tag ${GIT_TAG}" + echo "Thirdparties: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" echo "Exit now (ctrl-c) if this is incorrect" else echo "Will be using the following github repos:" - echo "Repo: ${APPSCALE_REPO} Branch: ${APPSCALE_BRANCH}" - echo "Repo: ${APPSCALE_TOOLS_REPO} Branch: ${APPSCALE_TOOLS_BRANCH}" - echo "Repo: ${AGENTS_REPO} Branch: ${AGENTS_BRANCH}" - echo "Repo: ${THIRDPARTIES_REPO} Branch: ${THIRDPARTIES_BRANCH}" + echo "AppScale: ${APPSCALE_REPO} Branch ${APPSCALE_BRANCH}" + echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} Branch ${APPSCALE_TOOLS_BRANCH}" + echo "Cloud-Agents: ${AGENTS_REPO} Branch ${AGENTS_BRANCH}" + echo "Thirdparties: ${THIRDPARTIES_REPO} Branch ${THIRDPARTIES_BRANCH}" echo "Exit now (ctrl-c) if this is incorrect" fi echo @@ -194,8 +195,9 @@ for appscale_presence_marker in ${APPSCALE_DIRS}; do if [ -d ${appscale_presence_marker} ] ; then echo "${appscale_presence_marker} already exists!" echo "bootstrap.sh script should be used for initial installation only." - echo "Use bootstrap-upgrade.sh for upgrading existing deployment" - echo "It can be found here: https://raw.githubusercontent.com/AppScale/appscale/master/bootstrap-upgrade.sh." + echo "Use upgrade.sh for upgrading existing deployment" + echo "It can be found here: https://raw.githubusercontent.com/AppScale/appscale/master/upgrade.sh." + exit 1 fi done @@ -209,10 +211,7 @@ git clone ${AGENTS_REPO} appscale-agents git clone ${THIRDPARTIES_REPO} appscale-thirdparties # Use tags if we specified it. -if [ "$TAG_PARAM_SPECIFIED" = "Y" ]; then - if [ "$GIT_TAG" = "last" ]; then - GIT_TAG="$(cd appscale; git tag | tail -n 1)" - fi +if [ "${RELY_ON_TAG}" = "Y" ]; then (cd appscale; git checkout "$GIT_TAG") (cd appscale-tools; git checkout "$GIT_TAG") (cd appscale-agents; git checkout "$GIT_TAG") @@ -226,7 +225,7 @@ fi echo -n "Building AppScale..." if ! (cd appscale/debian; bash appscale_build.sh) ; then - echo "failed!" + echo "Failed to install AppScale core" exit 1 fi @@ -238,20 +237,20 @@ fi echo -n "Building AppScale Tools..." if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then - echo "failed!" + echo "Failed to install AppScale-Tools" exit 1 fi -echo -n "Downloading Thirdparty artifacts..." -if ! (cd appscale-thirdparties/; bash download_all_artifacts.sh) ; then - echo "failed!" +echo -n "Installing Thirdparty software..." +if ! (cd appscale-thirdparties/; bash install_all.sh) ; then + echo "Failed to install Thirdparties software" exit 1 fi # Let's source the profiles so this image can be used right away. . /etc/profile.d/appscale.sh -echo "*****************************************" -echo "AppScale and AppScale tools are installed" -echo "*****************************************" +echo "****************************************" +echo " AppScale is installed on the machine " +echo "****************************************" exit 0 From 06e4a0fc3f43e46920737842b7d6ffe77f4b0490 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 3 Sep 2019 18:10:54 +0300 Subject: [PATCH 067/221] Reworking boostrap script - bootstrap.sh script lost its upgrade functionality. - upgrade.sh script was created for upgrade purposes. - upgrade became possible to only official tag (not branch). - some refactoring. --- bootstrap-upgrade.sh | 313 ------------------------------------------- bootstrap.sh | 76 ++++++----- upgrade.sh | 238 ++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+), 347 deletions(-) delete mode 100755 bootstrap-upgrade.sh create mode 100755 upgrade.sh diff --git a/bootstrap-upgrade.sh b/bootstrap-upgrade.sh deleted file mode 100755 index b24c3f2084..0000000000 --- a/bootstrap-upgrade.sh +++ /dev/null @@ -1,313 +0,0 @@ -#!/bin/bash -# -# Simple script to install AppScale and tools from the master branch -# Author: AppScale Team - -set -e - -APPSCALE_REPO="git://github.com/AppScale/appscale.git" -APPSCALE_TOOLS_REPO="git://github.com/AppScale/appscale-tools.git" -AGENTS_REPO="git://github.com/AppScale/appscale-agents.git" -THIRDPARTIES_REPO="git://github.com/AppScale/appscale-thirdparties.git" -GIT_TAG="last" - -usage() { - echo "Usage: ${0} [--tag ]" - echo - echo "Options:" - echo " --tag Git tag (e.g.: 3.7.2) to upgrade to." - echo " Default: '${GIT_TAG}' (use the latest release)." - exit 1 -} - - -echo -n "Checking to make sure you are root..." -if [ "$(id -u)" != "0" ]; then - echo "Failed" 1>&2 - exit 1 -fi -echo "Success" - -echo -n "Checking to make sure \$HOME is /root..." -if [ "$HOME" != "/root" ]; then - echo "Failed" - exit 1 -fi -echo "Success" - -# Let's get the command line argument. -while [ $# -gt 0 ]; do - if [ "${1}" = "--tag" ]; then - shift; if [ -z "${1}" ]; then usage; fi - GIT_TAG="${1}"; - shift; continue - fi - usage -done - -declare -A REPOS=( - ["appscale"]="${APPSCALE_REPO}" - ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" - ["appscale-agents"]="${AGENTS_REPO}" - ["appscale-thirdparties"]="${THIRDPARTIES_REPO}" -) - -# At this time we expect to be installed in $HOME. -cd $HOME - -echo "Ensuring all appscale repos are cloned" -for repo_name in "${!REPOS[@]}"; do - if [ ! -d "/root/${repo_name}" ]; then - git clone ${REPOS[$repo_name]} ${repo_name} - else - cd ${repo_name} - git remote - cd .. - fi -done - -# TODO if tag is "last", firstly identify what tag is actually latest. - -# Let's pull the github repositories. -echo -echo "Will be using the following github repos:" -echo "Repo: ${APPSCALE_REPO} Tag ${GIT_TAG}" -echo "Repo: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" -echo "Repo: ${AGENTS_REPO} Tag ${GIT_TAG}" -echo "Repo: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" -echo "Exit now (ctrl-c) if this is incorrect" -echo - -sleep 5 - -# Wait up to 30 seconds for the package lists lock to become available. -lock_wait_start=$(date +%s) -printed_status=false -while fuser /var/lib/apt/lists/lock; do - if [ "${printed_status}" = false ]; then - echo "Waiting for another process to update package lists" - printed_status=true - fi - current_time=$(date +%s) - elapsed_time=$((current_time - lock_wait_start)) - if [ "${elapsed_time}" -gt 30 ]; then break; fi - sleep 1 -done -apt-get update - -# Wait up to 2 min for the dpkg lock to become available. -lock_wait_start=$(date +%s) -printed_status=false -while fuser /var/lib/dpkg/lock; do - if [ "${printed_status}" = false ]; then - echo "Waiting for another process to update packages" - printed_status=true - fi - current_time=$(date +%s) - elapsed_time=$((current_time - lock_wait_start)) - if [ "${elapsed_time}" -gt 120 ]; then break; fi - sleep 1 -done -apt-get install -y git - -# TODO TODO TODO TODO TODO TODO TODO TODO TODO - -if [ ! -d appscale ]; then - # We split the commands, to ensure it fails if branch doesn't - # exists (Precise git will not fail otherwise). - git clone ${APPSCALE_REPO} appscale - git clone ${APPSCALE_TOOLS_REPO} appscale-tools - git clone ${AGENTS_REPO} appscale-agents - git clone ${THIRDPARTIES_REPO} appscale-thirdparties - - if [ "$GIT_TAG" = "last" ]; then - GIT_TAG="$(cd appscale; git tag | tail -n 1)" - fi - (cd appscale; git checkout "$GIT_TAG") - (cd appscale-tools; git checkout "$GIT_TAG") - (cd appscale-agents; git checkout "$GIT_TAG") - (cd appscale-thirdparties; git checkout "$GIT_TAG") -fi - -# Since the last step in appscale_build.sh is to create the certs directory, -# its existence indicates that appscale has already been installed. -if [ -d /etc/appscale/certs ]; then - UPDATE_REPO="Y" - - # For upgrade, we don't switch across branches. - if [ "${TAG_PARAM_SPECIFIED}" = "N" ]; then - echo "Can't use custom branches when upgrading existing installation." - echo "Use for example '--tag last' or '--tag 3.8.0' instead." - exit 1 - fi - - APPSCALE_MAJOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\1/gp' appscale/VERSION)" - APPSCALE_MINOR="$(sed -n 's/.*\([0-9]\)\+\.\([0-9]\)\+\.[0-9]/\2/gp' appscale/VERSION)" - if [ -z "$APPSCALE_MAJOR" -o -z "$APPSCALE_MINOR" ]; then - echo "Cannot determine version of AppScale!" - exit 1 - fi - - # This is an upgrade, so let's make sure we use a tag that has - # been passed, or the last one available. Let's fetch all the - # available tags first. - (cd appscale; git fetch ${APPSCALE_REPO} -t) - (cd appscale-tools; git fetch ${APPSCALE_TOOLS_REPO} -t) - (cd appscale-agents; git fetch ${AGENTS_REPO} -t) - (cd appscale-thirdparties; git fetch ${THIRDPARTIES_REPO} -t) - - if [ "$GIT_TAG" = "last" ]; then - GIT_TAG="$(cd appscale; git tag | tail -n 1)" - else - # Make sure we have this tag in the official repo. - if ! (cd appscale; git ls-remote --tags ${APPSCALE_REPO} | grep -F $GIT_TAG > /dev/null) ; then - echo "\"$GIT_TAG\" not recognized: use --tag to specify tag to upgrade to." - exit 1 - fi - fi - - # We can pull a tag only if we are on the master branch. - CURRENT_BRANCH="$(cd appscale; git branch --no-color | grep '^*' | cut -f 2 -d ' ')" - if [ "${CURRENT_BRANCH}" != "master" ] && \ - (cd appscale; git tag -l | grep $(git describe)) ; then - CURRENT_BRANCH="$(cd appscale; git tag -l | grep $(git describe))" - if [ "${CURRENT_BRANCH}" = "${GIT_TAG}" ]; then - echo "AppScale repository is already at the"\ - "specified release. Building with current code." - UPDATE_REPO="N" - fi - fi - - # If CURRENT_BRANCH is empty, then we are not on master, and we - # are not on a released version: we don't upgrade then. - if [ -z "${CURRENT_BRANCH}" ]; then - echo "Error: git repository is not 'master' or a released version." - exit 1 - fi - - # Make sure AppScale is not running. - MONIT=$(which monit) - if $MONIT summary | grep controller > /dev/null ; then - echo "AppScale is still running: please stop it" - [ "${FORCE_UPGRADE}" = "Y" ] || exit 1 - elif echo $MONIT | grep local > /dev/null ; then - # AppScale is not running but there is a monit - # leftover from the custom install. - $MONIT quit - fi - - # Let's keep a copy of the old config: we need to move it to avoid - # questions from dpkg. - if [ -e /etc/haproxy/haproxy.cfg ]; then - mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old - fi - - # Remove outdated appscale-controller and appscale-progenitor. - if [ $APPSCALE_MAJOR -le 2 -a $APPSCALE_MINOR -le 2 ]; then - rm -f /etc/init.d/appscale-controller - rm -f /etc/init.d/appscale-progenitor - update-rc.d -f appscale-progenitor remove || true - fi - - # Remove control files we added before 1.14, and re-add the - # default ones. - if [ $APPSCALE_MAJOR -le 1 -a $APPSCALE_MINOR -le 14 ]; then - rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc - if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install haproxy - fi - if dpkg-query -l monit > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install monit - fi - fi - - - if [ "${UPDATE_REPO}" = "Y" ]; then - echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR."\ - "An upgrade to the latest version available will be"\ - "attempted in 5 seconds." - sleep 5 - - # Upgrade the repository. If GIT_TAG is empty, we are on HEAD. - if [ -n "${GIT_TAG}" ]; then - if ! (cd appscale; git checkout "$GIT_TAG"); then - echo "Please stash your local unsaved changes and checkout"\ - "the version of AppScale you are currently using to fix"\ - "this error." - echo "e.g.: git stash; git checkout " - exit 1 - fi - - if ! (cd appscale-tools; git checkout "$GIT_TAG"); then - echo "Please stash your local unsaved changes and checkout"\ - "the version of appscale-tools you are currently using"\ - "to fix this error." - echo "e.g.: git stash; git checkout " - exit 1 - fi - elif [ "${FORCE_UPGRADE}" = "N" ]; then - # TODO is it master? - (cd appscale; git pull) - (cd appscale-tools; git pull) - (cd appscale-agents; git pull) - (cd appscale-thirdparties; git pull) - else - RANDOM_KEY="$(echo $(date), $$|md5sum|head -c 6)-$(date +%s)" - REMOTE_REPO_NAME="appscale-bootstrap-${RANDOM_KEY}" - if ! (cd appscale; - git remote add -t "${APPSCALE_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_REPO}"; - git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_BRANCH}"); then - echo "Please make sure the repository url is correct, the"\ - "branch exists, and that you have stashed your local"\ - "changes." - echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ - "{repo_name} {repository_url}; git checkout"\ - "{repo_name}/{remote_branch}" - exit 1 - fi - if ! (cd appscale-tools; - git remote add -t "${APPSCALE_TOOLS_BRANCH}" -f "${REMOTE_REPO_NAME}" "${APPSCALE_TOOLS_REPO}"; - git checkout "${REMOTE_REPO_NAME}"/"${APPSCALE_TOOLS_BRANCH}"); then - echo "Please make sure the repository url is correct, the"\ - "branch exists, and that you have stashed your local"\ - "changes." - echo "e.g.: git stash, git remote add -t {remote_branch} -f"\ - "{repo_name} {repository_url}; git checkout"\ - "{repo_name}/{remote_branch}" - exit 1 - fi - fi - fi -fi - -echo -n "Building AppScale..." -if ! (cd appscale/debian; bash appscale_build.sh) ; then - echo "failed!" - exit 1 -fi - -echo -n "Installing AppScale Agents..." -if ! (cd appscale-agents/; make install-no-venv) ; then - echo "Failed to install AppScale Agents" - exit 1 -fi - -echo -n "Building AppScale Tools..." -if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then - echo "failed!" - exit 1 -fi - -echo -n "Downloading Thirdparty artifacts..." -if ! (cd appscale-thirdparties/; bash download_all_artifacts.sh) ; then - echo "failed!" - exit 1 -fi - -# Let's source the profiles so this image can be used right away. -. /etc/profile.d/appscale.sh - -echo "*****************************************" -echo "AppScale and AppScale tools are installed" -echo "*****************************************" -exit 0 diff --git a/bootstrap.sh b/bootstrap.sh index d2680b7d63..6169f34a12 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -44,6 +44,10 @@ usage() { exit 1 } +version_ge() { + test "$(printf '%s\n' "$@" | sort -V | tail -1)" = "$1" +} + echo -n "Checking to make sure you are root..." if [ "$(id -u)" != "0" ]; then @@ -107,6 +111,8 @@ while [ $# -gt 0 ]; do if [${GIT_TAG} != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi shift; continue fi + echo "Parameter '$1' is not recognized" + echo usage done @@ -122,35 +128,32 @@ if [ "${BRANCH_PARAM_SPECIFIED}" = "Y" ] || [ "${GIT_TAG}" = "dev" ]; then RELY_ON_TAG="N" else RELY_ON_TAG="Y" -fi - - -# Determine the latest git tag on the AppScale/appscale repo -if [ "${RELY_ON_TAG}" = "Y" ] || [ "$GIT_TAG" = "last" ]; then - echo "Determining the latest tag in AppScale/appscale repo" - GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ - | grep '"name"' | head -1 \ - | awk -F ':' '{ print $2 }' | tr --delete ' ,"') + if [ "${GIT_TAG}" = "last" ]; then + echo "Determining the latest tag in AppScale/appscale repo" + GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ + | grep '"name"' | head -1 \ + | awk -F ':' '{ print $2 }' | tr --delete ' ,"') + fi + VERSION="${GIT_TAG}" fi # At this time we expect to be installed in $HOME. cd $HOME -# Let's pull the github repositories. echo if [ "${RELY_ON_TAG}" = "Y" ]; then echo "Will be using the following github repos:" - echo "AppScale: ${APPSCALE_REPO} Tag ${GIT_TAG}" - echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} Tag ${GIT_TAG}" - echo "Cloud-Agents: ${AGENTS_REPO} Tag ${GIT_TAG}" - echo "Thirdparties: ${THIRDPARTIES_REPO} Tag ${GIT_TAG}" + echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" + echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" + if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi + if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" else echo "Will be using the following github repos:" - echo "AppScale: ${APPSCALE_REPO} Branch ${APPSCALE_BRANCH}" - echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} Branch ${APPSCALE_TOOLS_BRANCH}" - echo "Cloud-Agents: ${AGENTS_REPO} Branch ${AGENTS_BRANCH}" - echo "Thirdparties: ${THIRDPARTIES_REPO} Branch ${THIRDPARTIES_BRANCH}" + echo "AppScale: ${APPSCALE_REPO} - Branch ${APPSCALE_BRANCH}" + echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Branch ${APPSCALE_TOOLS_BRANCH}" + echo "Cloud-Agents: ${AGENTS_REPO} - Branch ${AGENTS_BRANCH}" + echo "Thirdparties: ${THIRDPARTIES_REPO} - Branch ${THIRDPARTIES_BRANCH}" echo "Exit now (ctrl-c) if this is incorrect" fi echo @@ -206,21 +209,22 @@ echo "Cloning appscale repositories" # We split the commands, to ensure it fails if branch doesn't # exists (Precise git will not fail otherwise). git clone ${APPSCALE_REPO} appscale +VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") git clone ${APPSCALE_TOOLS_REPO} appscale-tools -git clone ${AGENTS_REPO} appscale-agents -git clone ${THIRDPARTIES_REPO} appscale-thirdparties +if version_ge ${VERSION} 3.8.0; then git clone ${AGENTS_REPO} appscale-agents; fi +if version_ge ${VERSION} 4.0.0; then git clone ${THIRDPARTIES_REPO} appscale-thirdparties; fi # Use tags if we specified it. if [ "${RELY_ON_TAG}" = "Y" ]; then - (cd appscale; git checkout "$GIT_TAG") - (cd appscale-tools; git checkout "$GIT_TAG") - (cd appscale-agents; git checkout "$GIT_TAG") - (cd appscale-thirdparties; git checkout "$GIT_TAG") + (cd appscale; git checkout "${GIT_TAG}") + (cd appscale-tools; git checkout "${GIT_TAG}") + if version_ge ${VERSION} 3.8.0; then (cd appscale-agents; git checkout "${GIT_TAG}"); fi + if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout "${GIT_TAG}"); fi else (cd appscale; git checkout ${APPSCALE_BRANCH}) (cd appscale-tools; git checkout ${APPSCALE_TOOLS_BRANCH}) - (cd appscale-agents; git checkout ${AGENTS_BRANCH}) - (cd appscale-thirdparties; git checkout ${THIRDPARTIES_BRANCH}) + if version_ge ${VERSION} 3.8.0; then (cd appscale-agents; git checkout ${AGENTS_BRANCH}); fi + if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout ${THIRDPARTIES_BRANCH}); fi fi echo -n "Building AppScale..." @@ -229,10 +233,12 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -echo -n "Installing AppScale Agents..." -if ! (cd appscale-agents/; make install-no-venv) ; then - echo "Failed to install AppScale Agents" - exit 1 +if version_ge ${VERSION} 3.8.0; then + echo -n "Installing AppScale Agents..." + if ! (cd appscale-agents/; make install-no-venv) ; then + echo "Failed to install AppScale Agents" + exit 1 + fi fi echo -n "Building AppScale Tools..." @@ -241,10 +247,12 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -echo -n "Installing Thirdparty software..." -if ! (cd appscale-thirdparties/; bash install_all.sh) ; then - echo "Failed to install Thirdparties software" - exit 1 +if version_ge ${VERSION} 4.0.0; then + echo -n "Installing Thirdparty software..." + if ! (cd appscale-thirdparties/; bash install_all.sh) ; then + echo "Failed to install Thirdparties software" + exit 1 + fi fi # Let's source the profiles so this image can be used right away. diff --git a/upgrade.sh b/upgrade.sh new file mode 100755 index 0000000000..837a5ef134 --- /dev/null +++ b/upgrade.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# +# Simple script for upgrading AppScale to one of release versions. +# Author: AppScale Team + +set -e + +APPSCALE_REPO="git://github.com/AppScale/appscale.git" +APPSCALE_TOOLS_REPO="git://github.com/AppScale/appscale-tools.git" +AGENTS_REPO="git://github.com/AppScale/appscale-agents.git" +THIRDPARTIES_REPO="git://github.com/AppScale/appscale-thirdparties.git" +GIT_TAG="last" + +usage() { + echo "Usage: ${0} [--tag ]" + echo + echo "Options:" + echo " --tag Git tag (e.g.: 3.7.2) to upgrade to." + echo " Default: '${GIT_TAG}' (use the latest release)." + exit 1 +} + +version_ge() { + test "$(printf '%s\n' "$@" | sort -V | tail -1)" = "$1" +} + + +echo -n "Checking to make sure you are root..." +if [ "$(id -u)" != "0" ]; then + echo "Failed" 1>&2 + exit 1 +fi +echo "Success" + +echo -n "Checking to make sure \$HOME is /root..." +if [ "$HOME" != "/root" ]; then + echo "Failed" + exit 1 +fi +echo "Success" + +# Let's get the command line argument. +while [ $# -gt 0 ]; do + if [ "${1}" = "--tag" ]; then + shift; if [ -z "${1}" ]; then usage; fi + GIT_TAG="${1}"; + shift; continue + fi + echo "Parameter '$1' is not recognized" + echo + usage +done + + +# Determine the latest git tag on the AppScale/appscale repo +if [ "$GIT_TAG" = "last" ]; then + echo "Determining the latest tag in AppScale/appscale repo" + GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ + | grep '"name"' | head -1 \ + | awk -F ':' '{ print $2 }' | tr --delete ' ,"') +elif ! curl --fail https://api.github.com/repos/appscale/appscale/tags \ + | grep '"name"' | awk -F ':' '{ print $2 }' | tr --delete ' ,"' \ + | grep "^${GIT_TAG}$"; then + echo "Tag '${GIT_TAG}' not recognized" + echo "Use --tag to specify existing appscale repo tag to upgrade to." + exit 1 +fi +VERSION="${GIT_TAG}" + +echo +echo "Will be using the following github repos:" +echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" +echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" +if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi +if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi +echo "Exit now (ctrl-c) if this is incorrect" +echo +sleep 5 + +# Wait up to 30 seconds for the package lists lock to become available. +lock_wait_start=$(date +%s) +printed_status=false +while fuser /var/lib/apt/lists/lock; do + if [ "${printed_status}" = false ]; then + echo "Waiting for another process to update package lists" + printed_status=true + fi + current_time=$(date +%s) + elapsed_time=$((current_time - lock_wait_start)) + if [ "${elapsed_time}" -gt 30 ]; then break; fi + sleep 1 +done +apt-get update + +# Wait up to 2 min for the dpkg lock to become available. +lock_wait_start=$(date +%s) +printed_status=false +while fuser /var/lib/dpkg/lock; do + if [ "${printed_status}" = false ]; then + echo "Waiting for another process to update packages" + printed_status=true + fi + current_time=$(date +%s) + elapsed_time=$((current_time - lock_wait_start)) + if [ "${elapsed_time}" -gt 120 ]; then break; fi + sleep 1 +done +apt-get install -y git + + +if [ -d /etc/appscale/certs ]; then + CURRENT_VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") + if [ -z "${CURRENT_VERSION}" ]; then + echo "Cannot determine version of AppScale!" + exit 1 + fi + + # Make sure AppScale is not running. + MONIT=$(which monit) + if ps -o args -ax | grep -E '$[a-z/]+ruby[ a-zA-Z/-]+djinnServer.rb' > /dev/null ; then + echo "AppScale is still running: please stop it" + exit 1 + elif echo ${MONIT} | grep local > /dev/null ; then + # AppScale is not running but there is a monit + # leftover from the custom install. + ${MONIT} quit + fi + + # Let's keep a copy of the old config: we need to move it to avoid + # questions from dpkg. + if [ -e /etc/haproxy/haproxy.cfg ]; then + mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old + fi + + # Remove outdated appscale-controller and appscale-progenitor. + if version_ge 2.2.0 "${CURRENT_VERSION}"; then + rm -f /etc/init.d/appscale-controller + rm -f /etc/init.d/appscale-progenitor + update-rc.d -f appscale-progenitor remove || true + fi + + # Remove control files we added before 1.14, and re-add the + # default ones. + if version_ge 1.14.0 "${CURRENT_VERSION}"; then + rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc + if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then + apt-get -o DPkg::Options::="--force-confmiss" --reinstall install haproxy + fi + if dpkg-query -l monit > /dev/null 2> /dev/null ; then + apt-get -o DPkg::Options::="--force-confmiss" --reinstall install monit + fi + fi + + echo "Found AppScale version ${CURRENT_VERSION}. "\ + "An upgrade to the ${GIT_TAG} version will be attempted in 5 seconds." + echo "Exit now (ctrl-c) if this is incorrect" + sleep 5 +else + echo "No previous AppScale installation was found. Installing it from scratch." +fi + + +declare -A REPOS=( + ["appscale"]="${APPSCALE_REPO}" + ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" +) +if version_ge "${VERSION}" 3.8.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi +if version_ge "${VERSION}" 4.0.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi + +# At this time we expect to be installed in $HOME. +cd $HOME + + +echo "Ensuring all appscale repos are pulled and checked out to the tag" +for repo_name in "${!REPOS[@]}"; do + repo="${REPOS[$repo_name]}" + if [ -d "/root/${repo_name}" ]; then + cd ${repo_name} + # ... + remote=$(git remote -v | grep "${repo} " | head -1 | awk '{ print $1 }') + if [ -z "${remote}" ]; then + remote="upgrade-$(date +%Y-%m-%d_%H-%M-%S)" + git remote add ${remote} "${repo}" + fi + git fetch ${remote} -t + current_branch="$(git branch --no-color | grep '^*' | cut -f 2 -d ' ')" + echo "Checking out /root/${repo_name} from '${current_branch}' to '${GIT_TAG}'" + if ! git checkout "${GIT_TAG}"; then + echo "Please stash your local unsaved changes at "\ + "/root/${repo_name} and checkout the version of AppScale "\ + "you are currently using to fix this error." + echo "e.g.: git stash; git checkout ${GIT_TAG}" + exit 1 + fi + # ... + cd $HOME + else + git clone "${repo}" ${repo_name} + (cd ${repo_name}; git checkout "${GIT_TAG}") + fi +done + + +echo -n "Building AppScale..." +if ! (cd appscale/debian; bash appscale_build.sh) ; then + echo "Failed to upgrade AppScale core" + exit 1 +fi + +if version_ge ${VERSION} 3.8.0; then + echo -n "Installing AppScale Agents..." + if ! (cd appscale-agents/; make install-no-venv) ; then + echo "Failed to upgrade AppScale Agents" + exit 1 + fi +fi + +echo -n "Building AppScale Tools..." +if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then + echo "Failed to upgrade AppScale Tools" + exit 1 +fi + +if version_ge ${VERSION} 4.0.0; then + echo -n "Downloading Thirdparty artifacts..." + if ! (cd appscale-thirdparties/; bash install_all.sh) ; then + echo "Failed to upgrade Thirdparties software" + exit 1 + fi +fi + +# Let's source the profiles so this image can be used right away. +. /etc/profile.d/appscale.sh + +echo "**********************************" +echo "AppScale is upgraded to ${GIT_TAG}" +echo "**********************************" +exit 0 From 2edb9427fa96619c48977807396a75396cbb545a Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 4 Sep 2019 12:38:30 +0300 Subject: [PATCH 068/221] Undo UNIT_TEST removal --- bootstrap.sh | 35 +++++++++++++++++++++++++++++------ upgrade.sh | 31 +++++++++++++++++++++++++++---- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index d8349d22ce..a94caacddd 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -15,6 +15,7 @@ APPSCALE_TOOLS_BRANCH="master" AGENTS_BRANCH="master" THIRDPARTIES_BRANCH="master" GIT_TAG="last" +UNIT_TEST="N" BRANCH_PARAM_SPECIFIED="N" TAG_PARAM_SPECIFIED="N" @@ -22,7 +23,7 @@ TAG_PARAM_SPECIFIED="N" usage() { echo "Usage: ${0} [--repo ] [--branch ]" echo " [--tools-repo ] [--tools-branch ]" - echo " [--tag ]" + echo " [--tag ] [-t]" echo echo "Be aware that tag parameter has priority over repo and branch parameters." echo "So if no tag, repos and branches are specified, tag 'last' will be used." @@ -35,6 +36,7 @@ usage() { echo " --tools-branch Specify appscale-tools branch (default $APPSCALE_TOOLS_BRANCH)" echo " --tag Use git tag (ie 3.7.2) or 'last' to use the latest release" echo " or 'dev' for HEAD (default ${GIT_TAG})" + echo " -t Run unit tests" exit 1 } @@ -105,6 +107,11 @@ while [ $# -gt 0 ]; do if [${GIT_TAG} != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi shift; continue fi + if [ "${1}" = "-t" ]; then + UNIT_TEST="Y" + shift; continue + fi + echo echo "Parameter '$1' is not recognized" echo usage @@ -139,7 +146,7 @@ if [ "${RELY_ON_TAG}" = "Y" ]; then echo "Will be using the following github repos:" echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" - if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi + if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" else @@ -205,19 +212,19 @@ echo "Cloning appscale repositories" git clone ${APPSCALE_REPO} appscale VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") git clone ${APPSCALE_TOOLS_REPO} appscale-tools -if version_ge ${VERSION} 3.8.0; then git clone ${AGENTS_REPO} appscale-agents; fi +if version_ge ${VERSION} 3.7.0; then git clone ${AGENTS_REPO} appscale-agents; fi if version_ge ${VERSION} 4.0.0; then git clone ${THIRDPARTIES_REPO} appscale-thirdparties; fi # Use tags if we specified it. if [ "${RELY_ON_TAG}" = "Y" ]; then (cd appscale; git checkout "${GIT_TAG}") (cd appscale-tools; git checkout "${GIT_TAG}") - if version_ge ${VERSION} 3.8.0; then (cd appscale-agents; git checkout "${GIT_TAG}"); fi + if version_ge ${VERSION} 3.7.0; then (cd appscale-agents; git checkout "${GIT_TAG}"); fi if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout "${GIT_TAG}"); fi else (cd appscale; git checkout ${APPSCALE_BRANCH}) (cd appscale-tools; git checkout ${APPSCALE_TOOLS_BRANCH}) - if version_ge ${VERSION} 3.8.0; then (cd appscale-agents; git checkout ${AGENTS_BRANCH}); fi + if version_ge ${VERSION} 3.7.0; then (cd appscale-agents; git checkout ${AGENTS_BRANCH}); fi if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout ${THIRDPARTIES_BRANCH}); fi fi @@ -227,7 +234,7 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 3.8.0; then +if version_ge ${VERSION} 3.7.0; then echo -n "Installing AppScale Agents..." if ! (cd appscale-agents/; make install-no-venv) ; then echo "Failed to install AppScale Agents" @@ -249,6 +256,22 @@ if version_ge ${VERSION} 4.0.0; then fi fi +# Run unit tests if asked. +if [ "$UNIT_TEST" = "Y" ]; then + echo "Running Unit tests" + (cd appscale; rake) + if [ $? -gt 0 ]; then + echo "Unit tests failed for appscale!" + exit 1 + fi + (cd appscale-tools; rake) + if [ $? -gt 0 ]; then + echo "Unit tests failed for appscale-tools!" + exit 1 + fi + echo "Unit tests complete" +fi + # Let's source the profiles so this image can be used right away. . /etc/profile.d/appscale.sh diff --git a/upgrade.sh b/upgrade.sh index 837a5ef134..461d72df4f 100755 --- a/upgrade.sh +++ b/upgrade.sh @@ -10,13 +10,15 @@ APPSCALE_TOOLS_REPO="git://github.com/AppScale/appscale-tools.git" AGENTS_REPO="git://github.com/AppScale/appscale-agents.git" THIRDPARTIES_REPO="git://github.com/AppScale/appscale-thirdparties.git" GIT_TAG="last" +UNIT_TEST="N" usage() { - echo "Usage: ${0} [--tag ]" + echo "Usage: ${0} [--tag ] [-t]" echo echo "Options:" echo " --tag Git tag (e.g.: 3.7.2) to upgrade to." echo " Default: '${GIT_TAG}' (use the latest release)." + echo " -t Run unit tests" exit 1 } @@ -46,6 +48,11 @@ while [ $# -gt 0 ]; do GIT_TAG="${1}"; shift; continue fi + if [ "${1}" = "-t" ]; then + UNIT_TEST="Y" + shift; continue + fi + echo echo "Parameter '$1' is not recognized" echo usage @@ -71,7 +78,7 @@ echo echo "Will be using the following github repos:" echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" -if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi +if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" echo @@ -164,7 +171,7 @@ declare -A REPOS=( ["appscale"]="${APPSCALE_REPO}" ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" ) -if version_ge "${VERSION}" 3.8.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi +if version_ge "${VERSION}" 3.7.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi if version_ge "${VERSION}" 4.0.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi # At this time we expect to be installed in $HOME. @@ -207,7 +214,7 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 3.8.0; then +if version_ge ${VERSION} 3.7.0; then echo -n "Installing AppScale Agents..." if ! (cd appscale-agents/; make install-no-venv) ; then echo "Failed to upgrade AppScale Agents" @@ -229,6 +236,22 @@ if version_ge ${VERSION} 4.0.0; then fi fi +# Run unit tests if asked. +if [ "$UNIT_TEST" = "Y" ]; then + echo "Running Unit tests" + (cd appscale; rake) + if [ $? -gt 0 ]; then + echo "Unit tests failed for appscale!" + exit 1 + fi + (cd appscale-tools; rake) + if [ $? -gt 0 ]; then + echo "Unit tests failed for appscale-tools!" + exit 1 + fi + echo "Unit tests complete" +fi + # Let's source the profiles so this image can be used right away. . /etc/profile.d/appscale.sh From 32b7a109ec791319fdfada11567861f38f84a54b Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 4 Sep 2019 14:43:59 +0300 Subject: [PATCH 069/221] Update clonning order --- bootstrap.sh | 43 +++++++++++++++++++++++++++---------------- upgrade.sh | 6 +++--- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index a94caacddd..ddb49f1aea 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -147,7 +147,7 @@ if [ "${RELY_ON_TAG}" = "Y" ]; then echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi - if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi + if version_ge ${VERSION} 3.8.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" else echo "Will be using the following github repos:" @@ -206,27 +206,38 @@ for appscale_presence_marker in ${APPSCALE_DIRS}; do done +if [ "${RELY_ON_TAG}" = "Y" ]; then + APPSCALE_TARGET="${GIT_TAG}" + TOOLS_TARGET="${GIT_TAG}" + AGENTS_TARGET="${GIT_TAG}" + THIRDPARTIES_TARGET="${GIT_TAG}" +else + APPSCALE_TARGET="${APPSCALE_BRANCH}" + TOOLS_TARGET="${APPSCALE_TOOLS_BRANCH}" + AGENTS_TARGET="${AGENTS_BRANCH}" + THIRDPARTIES_TARGET="${THIRDPARTIES_BRANCH}" +fi + + echo "Cloning appscale repositories" # We split the commands, to ensure it fails if branch doesn't # exists (Precise git will not fail otherwise). git clone ${APPSCALE_REPO} appscale +(cd appscale; git checkout ${APPSCALE_TARGET}) VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") + git clone ${APPSCALE_TOOLS_REPO} appscale-tools -if version_ge ${VERSION} 3.7.0; then git clone ${AGENTS_REPO} appscale-agents; fi -if version_ge ${VERSION} 4.0.0; then git clone ${THIRDPARTIES_REPO} appscale-thirdparties; fi +(cd appscale-tools; git checkout "${TOOLS_TARGET}") -# Use tags if we specified it. -if [ "${RELY_ON_TAG}" = "Y" ]; then - (cd appscale; git checkout "${GIT_TAG}") - (cd appscale-tools; git checkout "${GIT_TAG}") - if version_ge ${VERSION} 3.7.0; then (cd appscale-agents; git checkout "${GIT_TAG}"); fi - if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout "${GIT_TAG}"); fi -else - (cd appscale; git checkout ${APPSCALE_BRANCH}) - (cd appscale-tools; git checkout ${APPSCALE_TOOLS_BRANCH}) - if version_ge ${VERSION} 3.7.0; then (cd appscale-agents; git checkout ${AGENTS_BRANCH}); fi - if version_ge ${VERSION} 4.0.0; then (cd appscale-thirdparties; git checkout ${THIRDPARTIES_BRANCH}); fi +if version_ge "${VERSION}" 3.7.0; then + git clone ${AGENTS_REPO} appscale-agents + (cd appscale-agents; git checkout "${AGENTS_TARGET}") fi +if version_ge "${VERSION}" 3.8.0; then + git clone ${THIRDPARTIES_REPO} appscale-thirdparties + (cd appscale-thirdparties; git checkout "${THIRDPARTIES_TARGET}") +fi + echo -n "Building AppScale..." if ! (cd appscale/debian; bash appscale_build.sh) ; then @@ -234,7 +245,7 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 3.7.0; then +if version_ge "${VERSION}" 3.7.0; then echo -n "Installing AppScale Agents..." if ! (cd appscale-agents/; make install-no-venv) ; then echo "Failed to install AppScale Agents" @@ -248,7 +259,7 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 4.0.0; then +if version_ge "${VERSION}" 3.8.0; then echo -n "Installing Thirdparty software..." if ! (cd appscale-thirdparties/; bash install_all.sh) ; then echo "Failed to install Thirdparties software" diff --git a/upgrade.sh b/upgrade.sh index 461d72df4f..f0f9c95738 100755 --- a/upgrade.sh +++ b/upgrade.sh @@ -79,7 +79,7 @@ echo "Will be using the following github repos:" echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi -if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi +if version_ge ${VERSION} 3.8.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" echo sleep 5 @@ -172,7 +172,7 @@ declare -A REPOS=( ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" ) if version_ge "${VERSION}" 3.7.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi -if version_ge "${VERSION}" 4.0.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi +if version_ge "${VERSION}" 3.8.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi # At this time we expect to be installed in $HOME. cd $HOME @@ -228,7 +228,7 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 4.0.0; then +if version_ge ${VERSION} 3.8.0; then echo -n "Downloading Thirdparty artifacts..." if ! (cd appscale-thirdparties/; bash install_all.sh) ; then echo "Failed to upgrade Thirdparties software" From 777ada0a1301dd15b74a18ead9b922b2a4c3e617 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 4 Sep 2019 17:14:23 -0700 Subject: [PATCH 070/221] Do not request KVs when query limit is zero The datastore API treats a limit of "0" as a request for 0 results rather than a request for all the entities that match the query. --- AppDB/appscale/datastore/fdb/utils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/utils.py b/AppDB/appscale/datastore/fdb/utils.py index 9a2c0b47fe..db089360d2 100644 --- a/AppDB/appscale/datastore/fdb/utils.py +++ b/AppDB/appscale/datastore/fdb/utils.py @@ -162,7 +162,7 @@ def _handle_fdb_result(self, fdb_future, tornado_future): class ResultIterator(object): """ Allows clients to page through a range of Key-Values. """ - def __init__(self, tr, tornado_fdb, key_slice, limit=0, reverse=False, + def __init__(self, tr, tornado_fdb, key_slice, limit=None, reverse=False, streaming_mode=fdb.StreamingMode.iterator, snapshot=False): self.slice = key_slice self.done_with_range = False @@ -193,7 +193,7 @@ def __repr__(self): self.slice, self._limit, self._reverse, self._mode, self._snapshot) def increase_limit(self, difference=1): - if not self.done_with_range: + if self._limit is not None and not self.done_with_range: self._limit += difference self._done = False @@ -203,12 +203,15 @@ def next_page(self, mode=None): if self._done: raise gen.Return(([], False)) - tmp_limit = 0 - if self._limit > 0: - tmp_limit = self._limit - self._fetched + if self._limit is None: + fdb_limit = 0 + else: + fdb_limit = self._limit - self._fetched + if fdb_limit < 1: + raise gen.Return(([], False)) results, count, more = yield self._tornado_fdb.get_range( - self._tr, slice(self._bsel, self._esel), tmp_limit, mode, + self._tr, slice(self._bsel, self._esel), fdb_limit, mode, self._iteration, self._reverse, self._snapshot) self._fetched += count self._iteration += 1 @@ -219,7 +222,7 @@ def next_page(self, mode=None): else: self._bsel = fdb.KeySelector.first_greater_than(results[-1].key) - reached_limit = self._limit > 0 and self._fetched == self._limit + reached_limit = self._limit is not None and self._fetched >= self._limit self._done = not more or reached_limit self.done_with_range = not more and not reached_limit From ea5b02322aca7803fb881295d052e091d5574196 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 4 Sep 2019 18:31:26 -0700 Subject: [PATCH 071/221] AppController terminate update for newer ifconfig output --- AppController/terminate.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/terminate.rb b/AppController/terminate.rb index 134252a043..7c99a927c3 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -134,7 +134,7 @@ def self.erase_appscale_full_state def self.disable_database_writes # First, tell Cassandra that no more writes should be accepted on this node. ifconfig = `ifconfig` - bound_addrs = ifconfig.scan(/inet addr:(\d+.\d+.\d+.\d+)/).flatten + bound_addrs = ifconfig.scan(/inet .*?(\d+.\d+.\d+.\d+) /).flatten bound_addrs.delete("127.0.0.1") ip = bound_addrs[0] From 18028f84ac9548903ea3714996fc094856683e7d Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 5 Sep 2019 13:02:44 +0300 Subject: [PATCH 072/221] Update python3 packages in control files --- debian/control.bionic | 1 + debian/control.jessie | 0 debian/control.stretch | 3 +-- debian/control.xenial | 3 +-- 4 files changed, 3 insertions(+), 4 deletions(-) delete mode 100644 debian/control.jessie diff --git a/debian/control.bionic b/debian/control.bionic index 6ea6466857..e2d2629679 100644 --- a/debian/control.bionic +++ b/debian/control.bionic @@ -84,6 +84,7 @@ Depends: appscale-tools, python-twisted, python-virtualenv, python3-venv, + python3-dev, python-xmpp, python-yaml, rabbitmq-server, diff --git a/debian/control.jessie b/debian/control.jessie deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/debian/control.stretch b/debian/control.stretch index e8de1cf7e6..2edd139a1b 100644 --- a/debian/control.stretch +++ b/debian/control.stretch @@ -85,10 +85,9 @@ Depends: appscale-tools, python-twisted, python-virtualenv, python3-venv, + python3-dev, python-xmpp, python-yaml, - python3-venv, - python3.5-dev, rabbitmq-server, rsync, rsyslog, diff --git a/debian/control.xenial b/debian/control.xenial index 0f542dda3c..4f68a4f7e2 100644 --- a/debian/control.xenial +++ b/debian/control.xenial @@ -86,9 +86,8 @@ Depends: appscale-tools, python-twisted, python-virtualenv, python3-venv, + python3-dev, python-xmpp, - python3-venv, - python3.5-dev, rabbitmq-server, rsync, rsyslog, From 008e0b2f5e03d93939ba07bc789c332d1b7dfabd Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 5 Sep 2019 18:00:47 +0300 Subject: [PATCH 073/221] Avoid IOLoops collision --- SearchService2/appscale/search/scripts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SearchService2/appscale/search/scripts.py b/SearchService2/appscale/search/scripts.py index 1e46820bac..b4b6bea669 100644 --- a/SearchService2/appscale/search/scripts.py +++ b/SearchService2/appscale/search/scripts.py @@ -37,12 +37,12 @@ def list_solr_collections(): connection_retry=ZK_PERSISTENT_RECONNECTS ) zk_client.start() + adapter = solr_adapter.SolrAdapter(zk_client) async def list_collections(): """ Asynchronously fetches a list of Solr collections from SolrCloud and prints it to stdout. """ - adapter = solr_adapter.SolrAdapter(zk_client) try: collections, broken = await adapter.solr.list_collections() logger.info('Collections:\n {}'.format('\n '.join(collections))) @@ -78,11 +78,11 @@ def delete_solr_collection(): connection_retry=ZK_PERSISTENT_RECONNECTS ) zk_client.start() + adapter = solr_adapter.SolrAdapter(zk_client) async def delete_collection(): """ Asynchronously deletes Solr collection. """ - adapter = solr_adapter.SolrAdapter(zk_client) try: await adapter.solr.delete_collection(args.collection) except (SolrServerError, SolrClientError) as err: @@ -127,6 +127,7 @@ def reindex(): connection_retry=ZK_PERSISTENT_RECONNECTS ) zk_client.start() + adapter = solr_adapter.SolrAdapter(zk_client) async def reindex_documents(): """ Loops through all documents in the index and @@ -134,7 +135,6 @@ async def reindex_documents(): """ logger.info('Reindexing documents from {}|{}|{}' .format(args.project, args.namespace, args.index)) - adapter = solr_adapter.SolrAdapter(zk_client) has_more = True start_doc_id = None total = 0 From e6224527ff55467eda1f1f723f5d2685464edcad Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 5 Sep 2019 13:34:37 -0700 Subject: [PATCH 074/221] AppController terminate cassandra check allows for newer monit output --- AppController/terminate.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppController/terminate.rb b/AppController/terminate.rb index 7c99a927c3..2bf76ee025 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -140,7 +140,7 @@ def self.disable_database_writes # Make sure we have cassandra running, otherwise nodetool may get # stuck. - if system("monit summary | grep cassandra | grep Running > /dev/null") + if system("monit summary | grep cassandra | grep 'Running\\|OK' > /dev/null") `/opt/cassandra/cassandra/bin/nodetool -h #{ip} -p 7199 drain` end From 1d209812dbfd5109f1de3f7d1d51151973537a9d Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 5 Sep 2019 16:37:05 -0700 Subject: [PATCH 075/221] Retry unknown commit result for simple mutations This can reduce error messages for idempotent operations. --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 12 ++++++++++-- AppDB/appscale/datastore/fdb/utils.py | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index e3157e5fa6..06ebbe5ec8 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -101,7 +101,11 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: - if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: + if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: + pass + elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: + logger.error('Unable to determine commit result. Retrying.') + else: raise InternalError(fdb_error.description) retries -= 1 @@ -187,7 +191,11 @@ def dynamic_delete(self, project_id, delete_request, retries=5): try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: - if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: + if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: + pass + elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: + logger.error('Unable to determine commit result. Retrying.') + else: raise InternalError(fdb_error.description) retries -= 1 diff --git a/AppDB/appscale/datastore/fdb/utils.py b/AppDB/appscale/datastore/fdb/utils.py index 9a2c0b47fe..dbd0dcc2d8 100644 --- a/AppDB/appscale/datastore/fdb/utils.py +++ b/AppDB/appscale/datastore/fdb/utils.py @@ -51,6 +51,7 @@ class FDBErrorCodes(object): NOT_COMMITTED = 1020 + COMMIT_RESULT_UNKNOWN = 1021 def ReverseBitsInt64(v): From 3c58f0a15ff2bb2e386a6ab21c104a5e3c4fb025 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 6 Sep 2019 13:55:23 +0300 Subject: [PATCH 076/221] Add fdb initialization step to faststart --- scripts/fast-start.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/fast-start.sh b/scripts/fast-start.sh index f4a76b6373..70b57603d0 100755 --- a/scripts/fast-start.sh +++ b/scripts/fast-start.sh @@ -205,6 +205,12 @@ case "$PROVIDER" in ;; esac +echo "Configuring local foundationdb" +/root/appscale-thirdparties/foundationdb/configure-and-start-fdb.sh \ + --data-dir /opt/appscale/fdb-data/ \ + --fdbcli-command 'configure new single ssd' +FDB_CLUSTERFILE_CONTENT=$(cat /etc/foundationdb/fdb.cluster) + # Let's make sure we don't overwrite and existing AppScalefile. if [ ! -e AppScalefile ]; then # Let's make sure we detected the IPs. @@ -234,6 +240,7 @@ if [ ! -e AppScalefile ]; then echo "admin_user : $ADMIN_EMAIL" >> AppScalefile echo "admin_pass : $ADMIN_PASSWD" >> AppScalefile fi + echo "fdb_clusterfile_content : ${FDB_CLUSTERFILE_CONTENT}" >> AppScalefile echo "group : faststart-${PROVIDER}" >> AppScalefile echo "done." From 6912d1b3b07211197ac243d40edbd9fd928f3af3 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Mon, 9 Sep 2019 11:36:24 -0700 Subject: [PATCH 077/221] Few more changes on the start/stop procedure Start hermes and the admin_server in a thread Don't start flower if there is no password set --- AppController/djinn.rb | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 7ecdaa1124..ea1359a53d 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3386,7 +3386,7 @@ def start_stop_api_services } end - start_admin_server + threads << Thread.new { start_admin_server } if my_node.is_memcache? threads << Thread.new { start_memcache } @@ -3447,6 +3447,9 @@ def start_stop_api_services threads << Thread.new { stop_taskqueue } end + # Start Hermes with integrated stats service + threads << Thread.new ( start_hermes } + # App Engine apps rely on the above services to be started, so # join all our threads here Djinn.log_info('Waiting for relevant services to finish starting up,') @@ -3455,15 +3458,14 @@ def start_stop_api_services end Djinn.log_info('API services have started on this node.') - # Start Hermes with integrated stats service - start_hermes - # Leader node starts additional services. if my_node.is_shadow? @state = 'Assigning Datastore and Search2 processes' assign_datastore_processes assign_search2_processes - TaskQueue.start_flower(@options['flower_password']) + + # Don't start flower if we don't have a password. + TaskQueue.start_flower(@options['flower_password']) unless @options['flower_password'].nil? else TaskQueue.stop_flower end @@ -3850,10 +3852,7 @@ def build_uncommitted_changes end update_dirs = @options['update'] - - if update_dirs == "all" - update_dirs = ALLOWED_DIR_UPDATES.join(',') - end + update_dirs = ALLOWED_DIR_UPDATES if update_dirs == ['all'] # Update Python packages across corresponding virtual environments if update_dirs.include?('common') From 80030b2b86acd2ef5def8fe487a25324064e9aa6 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Mon, 9 Sep 2019 12:06:49 -0700 Subject: [PATCH 078/221] Typo --- AppController/djinn.rb | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index ea1359a53d..2caa799224 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3448,7 +3448,7 @@ def start_stop_api_services end # Start Hermes with integrated stats service - threads << Thread.new ( start_hermes } + threads << Thread.new { start_hermes } # App Engine apps rely on the above services to be started, so # join all our threads here @@ -3912,9 +3912,7 @@ def initialize_nodes_in_parallel(must_have, nice_have) threads = [] must_have.each { |slave| next if slave.private_ip == my_node.private_ip - threads << Thread.new { - initialize_node(slave) - } + threads << Thread.new { initialize_node(slave) } } # If we cannot reconnect with autoscaled nodes, we will have to clean From 1a7fa1a20e8860abd3b4355b67029474bbe39fdf Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:28:45 -0700 Subject: [PATCH 079/221] AppController service now uses systemd, monit configs now under /run --- AppController/djinn.rb | 24 +---- AppController/lib/monit_interface.rb | 4 +- AppController/scripts/appcontroller | 101 ------------------ AppController/scripts/appscale-unmonit.sh | 21 ---- AppController/terminate.rb | 11 +- bootstrap.sh | 2 +- .../common/monit_app_configuration.py | 2 +- debian/appscale_install_functions.sh | 22 ++-- system/tmpfiles.d/appscale.conf | 3 + system/units/appscale-controller.service | 25 +++++ 10 files changed, 49 insertions(+), 166 deletions(-) delete mode 100755 AppController/scripts/appcontroller delete mode 100755 AppController/scripts/appscale-unmonit.sh create mode 100644 system/tmpfiles.d/appscale.conf create mode 100644 system/units/appscale-controller.service diff --git a/AppController/djinn.rb b/AppController/djinn.rb index e9134a02d7..47dd7c0bd6 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -1806,10 +1806,8 @@ def job_start(secret) initialize_server start_stop_api_services - # Now that we are done loading, we can set the monit job to check the - # AppController. At this point we are resilient to failure (ie the AC + # Now that we are done loading, we are resilient to failure (ie the AC # will restart if needed). - set_appcontroller_monit @done_loading = true pick_zookeeper(@zookeeper_data) @@ -4430,29 +4428,11 @@ def run_user_commands(node, user_commands) } end - def set_appcontroller_monit - Djinn.log_debug("Configuring AppController monit.") - service = `which service`.chomp - start_cmd = "#{service} appscale-controller start" - pidfile = '/var/run/appscale/controller.pid' - stop_cmd = "/sbin/start-stop-daemon --stop --pidfile #{pidfile} --retry=TERM/30/KILL/5" - - # Let's make sure we don't have 2 roles monitoring the controller. - FileUtils.rm_rf("/etc/monit/conf.d/controller-17443.cfg") - - begin - MonitInterface.start_daemon(:controller, start_cmd, stop_cmd, pidfile) - rescue - Djinn.log_warn("Failed to set local AppController monit: retrying.") - retry - end - end - def start_appcontroller(node) ip = node.private_ip # Start the AppController on the remote machine. - remote_cmd = "/usr/sbin/service appscale-controller start" + remote_cmd = "/bin/systemctl start appscale-controller" tries = RETRIES begin result = HelperFunctions.run_remote_command(ip, remote_cmd, node.ssh_key, true) diff --git a/AppController/lib/monit_interface.rb b/AppController/lib/monit_interface.rb index bbf6e1ac2b..3abc79fb25 100644 --- a/AppController/lib/monit_interface.rb +++ b/AppController/lib/monit_interface.rb @@ -8,7 +8,7 @@ require 'helperfunctions' # Where we save the configuration file. -MONIT_CONFIG = '/etc/monit/conf.d'.freeze +MONIT_CONFIG = '/run/appscale/monit.conf.d'.freeze # Monit is finicky when it comes to multiple commands at the same time. # Let's make sure we serialize access. @@ -170,7 +170,7 @@ def self.service_config(process_name, group, start_cmd, env_vars, mem) bash = `which bash`.chomp rm = `which rm`.chomp - pidfile = "/var/run/appscale/#{process_name}.pid" + pidfile = "/run/appscale/#{process_name}.pid" logfile = "/var/log/appscale/#{process_name}.log" bash_exec = "exec env #{env_vars_str} #{start_cmd} >> #{logfile} 2>&1" diff --git a/AppController/scripts/appcontroller b/AppController/scripts/appcontroller deleted file mode 100755 index 4ec4276425..0000000000 --- a/AppController/scripts/appcontroller +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/sh - -# pidfile: /var/run/appscale/controller.pid - -### BEGIN INIT INFO -# Provides: appscale-controller -# Required-Start: $local_fs -# Required-Stop: $local_fs -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# X-Interactive: false -# Short-Description: Init script for AppScale AppController. -# Description: Start/stop script for AppScale AppController. -### END INIT INFO - -DESC="AppScale Controller" -DIR=/root/appscale/AppController -DAEMON=$DIR/djinnServer.rb -DAEMON_NAME=appscale-controller -DAEMON_USER=root -PIDFILE=/var/run/appscale/controller.pid -SECRET_FILE=/etc/appscale/secret.key -LOG_FILE=/var/log/appscale/controller.log -PATH=/usr/local/bin:$PATH -. /lib/lsb/init-functions - -do_start() -{ - if [ ! -e $SECRET_FILE ]; then - log_begin_msg "AppScale not configured: not starting." - exit 1 - fi - - # The run directory is cleared on every reboot. - mkdir -p /var/run/appscale - - log_daemon_msg "Starting system $DAEMON_NAME daemon" - start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE\ - --user $DAEMON_USER --chuid $DAEMON_USER --startas /bin/bash -- -c\ - "exec $DAEMON >> $LOG_FILE 2>&1" - log_end_msg $? -} - -do_stop() -{ - log_daemon_msg "Stopping system $DAEMON_NAME daemon" - monit stop controller - RETVAL="$?" - - if [ "${RETVAL}" = "0" ]; then - rm ${PIDFILE} - fi - - return "$RETVAL" -} - -do_force_stop() -{ - log_daemon_msg "Stopping all local AppScale services" - ruby -w #{DIR}/terminate.rb -} - -case "$1" in - - start) - if status_of_proc "$DAEMON_NAME" "$DAEMON" > /dev/null; then - echo "$DAEMON_NAME already running." - exit 0 - fi - do_${1} - ;; - - stop) - do_stop - case "$?" in - # The start-stop-daemon returns "1" if service is already stopped. - 0|1) log_end_msg 0 ;; - *) log_end_msg 1 ;; - esac - ;; - - force-stop) - do_force_stop - ;; - - restart|reload|force-reload) - do_stop - do_start - ;; - - status) - status_of_proc "$DAEMON" "$DAEMON_NAME" && exit 0 || exit $? - ;; - - *) - echo "Usage: /etc/init.d/$DAEMON_NAME {start|stop|restart|status}" - exit 1 - ;; - -esac -exit 0 diff --git a/AppController/scripts/appscale-unmonit.sh b/AppController/scripts/appscale-unmonit.sh deleted file mode 100755 index 50757a632e..0000000000 --- a/AppController/scripts/appscale-unmonit.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -### BEGIN INIT INFO -# Provides: appscale-unmonit -# Required-Start: $local_fs -# Required-Stop: $local_fs -# Default-Start: 2 3 4 5 -# Default-Stop: 0 1 6 -# Description: Prevents Monit from starting AppScale services. -### END INIT INFO - -# This script is meant to be run at boot to prevent monit from starting -# services. The AppController should restart them as needed. - -case "$1" in - start) - rm -rf /etc/monit/conf.d/appscale*cfg - echo "AppScale Monit files removed" - ;; - stop) echo "Service stopped" ;; - *) echo "Usage: $0 {start|stop}" -esac diff --git a/AppController/terminate.rb b/AppController/terminate.rb index 134252a043..d548270489 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -12,7 +12,7 @@ module TerminateHelper # TODO: Use FileUtils.rm_rf instead of backticks throughout this # method. def self.erase_appscale_state - `service appscale-controller stop` + `systemctl stop appscale-controller` `rm -f #{APPSCALE_CONFIG_DIR}/secret.key` `rm -f /tmp/uploaded-apps` @@ -33,8 +33,7 @@ def self.erase_appscale_state # The process has finished. end - `rm -f /etc/monit/conf.d/appscale*.cfg` - `rm -f /etc/monit/conf.d/controller-17443.cfg` + `rm -f /run/appscale/monit.conf.d/appscale*.cfg` # Stop datastore and search servers. for slice_name in ['appscale-datastore', 'appscale-search'] @@ -73,15 +72,13 @@ def self.erase_appscale_state # TODO: Use the constant in djinn.rb (ZK_LOCATIONS_JSON_FILE) `rm -rf #{APPSCALE_CONFIG_DIR}/zookeeper_locations.json` `rm -rf #{APPSCALE_CONFIG_DIR}/zookeeper_locations` - `rm -f /opt/appscale/appcontroller-state.json` - `rm -f /opt/appscale/appserver-state.json` print "OK" end # This functions ensure that the services AppScale started that have a - # PID in /var/run/appscale got terminated. + # PID in /run/appscale got terminated. def self.ensure_services_are_stopped - Dir["/var/run/appscale/*.pid"].each { |pidfile| + Dir["/run/appscale/*.pid"].each { |pidfile| # Nothing should still be running after the controller got stopped, # so we unceremoniously kill them. begin diff --git a/bootstrap.sh b/bootstrap.sh index 28d7204243..0613ed6b1f 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -270,7 +270,7 @@ if [ -d /etc/appscale/certs ]; then # Make sure AppScale is not running. MONIT=$(which monit) - if $MONIT summary | grep controller > /dev/null ; then + if systemctl is-active appscale-controller > /dev/null ; then echo "AppScale is still running: please stop it" [ "${FORCE_UPGRADE}" = "Y" ] || exit 1 elif echo $MONIT | grep local > /dev/null ; then diff --git a/common/appscale/common/monit_app_configuration.py b/common/appscale/common/monit_app_configuration.py index 92998c0000..04c7b331d4 100644 --- a/common/appscale/common/monit_app_configuration.py +++ b/common/appscale/common/monit_app_configuration.py @@ -14,7 +14,7 @@ TEMPLATE_LOCATION = os.path.join(TEMPLATE_DIR, 'monit_template.conf') # The directory used when storing a service's config file. -MONIT_CONFIG_DIR = '/etc/monit/conf.d' +MONIT_CONFIG_DIR = '/run/appscale/monit.conf.d' def create_config_file(watch, start_cmd, pidfile, port=None, env_vars=None, diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 439a30c49f..efd504354b 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -400,19 +400,16 @@ postinstallcassandra() installservice() { # This must be absolute path of runtime. - mkdir -pv ${DESTDIR}/etc/init.d/ - cp ${APPSCALE_HOME_RUNTIME}/AppController/scripts/appcontroller ${DESTDIR}/etc/init.d/appscale-controller - chmod -v a+x ${DESTDIR}/etc/init.d/appscale-controller + mkdir -pv ${DESTDIR}/usr/lib/tmpfiles.d + cp -v ${APPSCALE_HOME_RUNTIME}/system/tmpfiles.d/appscale.conf ${DESTDIR}/usr/lib/tmpfiles.d/ + systemd-tmpfiles --create - # Make sure the init script runs each time, so that it can start the - # AppController on system reboots. - update-rc.d -f appscale-controller defaults + mkdir -pv ${DESTDIR}/lib/systemd/system + cp -v ${APPSCALE_HOME_RUNTIME}/system/units/appscale-controller.service ${DESTDIR}/lib/systemd/system/ + systemctl daemon-reload - # Prevent monit from immediately restarting services at boot. - cp ${APPSCALE_HOME}/AppController/scripts/appscale-unmonit.sh \ - /etc/init.d/appscale-unmonit - chmod -v a+x /etc/init.d/appscale-unmonit - update-rc.d appscale-unmonit defaults 19 21 + # Enable AppController on system reboots. + systemctl enable appscale-controller || true } postinstallservice() @@ -515,6 +512,9 @@ postinstallmonit() if ! grep -v '^#' /etc/monit/monitrc |grep httpd > /dev/null; then cat < Date: Mon, 9 Sep 2019 17:28:56 -0700 Subject: [PATCH 080/221] Updates for systemd, use systemctl --- AppController/djinn.rb | 6 ++--- AppController/terminate.rb | 10 +++---- common/appscale/common/constants.py | 3 --- debian/appscale_install_functions.sh | 39 +++++++--------------------- 4 files changed, 17 insertions(+), 41 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 47dd7c0bd6..61c01a1385 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -4522,7 +4522,7 @@ def start_ejabberd # Monit does not have an entry for ejabberd yet. This allows a restart # with the new configuration if it is already running. - `service ejabberd stop` + `systemctl stop ejabberd` Ejabberd.start end @@ -4738,7 +4738,7 @@ def check_stopped_apps "#{version_key} is no longer running: removing log configuration.") begin FileUtils.rm(get_rsyslog_conf(version_key)) - HelperFunctions.shell("service rsyslog restart") + HelperFunctions.shell("systemctl restart rsyslog") rescue Errno::ENOENT, Errno::EACCES Djinn.log_debug("Old syslog for #{version_key} wasn't there.") end @@ -4862,7 +4862,7 @@ def setup_appengine_version(version_key) unless existing_app_log_config == app_log_config Djinn.log_info("Installing log configuration for #{version_key}.") HelperFunctions.write_file(app_log_config_file, app_log_config) - HelperFunctions.shell("service rsyslog restart") + HelperFunctions.shell("systemctl restart rsyslog") end if service_id == DEFAULT_SERVICE && version_id == DEFAULT_VERSION diff --git a/AppController/terminate.rb b/AppController/terminate.rb index d548270489..49a783a09d 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -19,7 +19,7 @@ def self.erase_appscale_state `rm -f ~/.appscale_cookies` `rm -f /etc/nginx/sites-enabled/appscale-*.conf` `rm -f /etc/haproxy/service-sites-enabled/*.cfg` - `service nginx reload` + `systemctl reload nginx` begin PTY.spawn('appscale-stop-services') do |stdout, _, _| @@ -53,7 +53,7 @@ def self.erase_appscale_state `rm -f /etc/logrotate.d/appscale-*` # Let's make sure we restart any non-appscale service. - `service monit restart` + `systemctl restart monit` `rm -f #{APPSCALE_CONFIG_DIR}/port-*.txt` # Remove location files. @@ -99,7 +99,7 @@ def self.erase_appscale_full_state `rm -rf /var/log/appscale/*` # Restart rsyslog so that the combined app logs can be recreated. - `service rsyslog restart` + `systemctl restart rsyslog` `rm -rf /var/log/rabbitmq/*` `rm -rf /var/log/zookeeper/*` @@ -143,8 +143,8 @@ def self.disable_database_writes # Next, stop ZooKeeper politely: we stop it with both new and old # script to be sure. - `service zookeeper-server stop` - `service zookeeper stop` + `systemctl stop zookeeper-server` + `systemctl stop zookeeper` end end diff --git a/common/appscale/common/constants.py b/common/appscale/common/constants.py index 495c48beaa..f4ed315ad6 100644 --- a/common/appscale/common/constants.py +++ b/common/appscale/common/constants.py @@ -154,9 +154,6 @@ def non_negative_int(value): # Location of where the search service is running. SEARCH_FILE_LOC = "/etc/appscale/search_ip" -# Service scripts directory. -SERVICES_DIR = '/etc/init.d' - # The AppController's service name. CONTROLLER_SERVICE = 'appscale-controller' diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index efd504354b..178c7ea9c1 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -90,21 +90,6 @@ cachepackage() { fi } -# This function is to disable the specify service so that it won't start -# at next boot. AppScale manages those services. -disableservice() { - if [ -n "$1" ]; then - update-rc.d "${1}" disable || true - # The following to make sure we disable it for upstart. - if [ -d "/etc/init" ]; then - echo "manual" > /etc/init/"${1}".override - fi - else - echo "Need a service name to disable!" - exit 1 - fi -} - increaseconnections() { if [ "${IN_DOCKER}" != "yes" ]; then @@ -279,8 +264,8 @@ postinstallhaproxy() sed -i 's/^ENABLED=0/ENABLED=1/g' /etc/default/haproxy # AppScale starts/stop the service. - service haproxy stop || true - disableservice haproxy + systemctl stop haproxy + systemctl disable haproxy } installgems() @@ -415,11 +400,11 @@ installservice() postinstallservice() { # Stop services shouldn't run at boot, then disable them. - service memcached stop || true - disableservice memcached + systemctl stop memcached + systemctl disable memcached ejabberdctl stop || true - disableservice ejabberd + systemctl disable ejabberd } installzookeeper() @@ -441,8 +426,8 @@ installurllib3() postinstallzookeeper() { - service zookeeper stop || true - disableservice zookeeper + systemctl stop zookeeper + systemctl disable zookeeper if [ ! -d /etc/zookeeper/conf ]; then echo "Cannot find zookeeper configuration!" exit 1 @@ -471,7 +456,7 @@ postinstallrabbitmq() # After install it starts up, shut it down. rabbitmqctl stop || true - disableservice rabbitmq-server + systemctl disable rabbitmq-server } installVersion() @@ -503,7 +488,7 @@ postinstallrsyslog() /etc/rsyslog.d/09-appscale.conf # Restart the service - service rsyslog restart || true + systemctl restart rsyslog || true } postinstallmonit() @@ -531,12 +516,6 @@ EOF [ ! -e /etc/monit/conf-enabled/cron ]; then ln -s /etc/monit/conf-available/cron /etc/monit/conf-enabled fi - - # Monit cannot start at boot time: in case of accidental reboot, it - # would start processes out of order. The controller will restart - # monit as soon as it starts. - service monit stop - disableservice monit } postinstallejabberd() From 36ffd9a2ef09c15dc37b1b0c2c10277e71c330bd Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:06 -0700 Subject: [PATCH 081/221] Updates for systemd, remove init.d scripts --- bootstrap.sh | 10 ++++------ debian/appscale_build.sh | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 0613ed6b1f..1020c0be2b 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -285,12 +285,10 @@ if [ -d /etc/appscale/certs ]; then mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old fi - # Remove outdated appscale-controller and appscale-progenitor. - if [ $APPSCALE_MAJOR -le 2 -a $APPSCALE_MINOR -le 2 ]; then - rm -f /etc/init.d/appscale-controller - rm -f /etc/init.d/appscale-progenitor - update-rc.d -f appscale-progenitor remove || true - fi + # Remove outdated init scripts. + [ ! -f "/etc/init.d/appscale-controller" ] || rm -fv "/etc/init.d/appscale-controller" + [ ! -f "/etc/init.d/appscale-progenitor" ] || rm -fv "/etc/init.d/appscale-progenitor" + [ ! -f "/etc/init.d/appscale-unmonit" ] || rm -fv "/etc/init.d/appscale-unmonit" # Remove control files we added before 1.14, and re-add the # default ones. diff --git a/debian/appscale_build.sh b/debian/appscale_build.sh index a04f3c1285..5467e3918d 100755 --- a/debian/appscale_build.sh +++ b/debian/appscale_build.sh @@ -142,12 +142,10 @@ if [ -d ${CONFIG_DIR}/certs ]; then mv /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.appscale.old fi - # Remove outdated appscale-controller and appscale-progenitor. - if [ $APPSCALE_MAJOR -le 2 -a $APPSCALE_MINOR -le 2 ]; then - rm -f /etc/init.d/appscale-controller - rm -f /etc/init.d/appscale-progenitor - update-rc.d -f appscale-progenitor remove || true - fi + # Remove outdated init scripts. + [ ! -f "/etc/init.d/appscale-controller" ] || rm -fv "/etc/init.d/appscale-controller" + [ ! -f "/etc/init.d/appscale-progenitor" ] || rm -fv "/etc/init.d/appscale-progenitor" + [ ! -f "/etc/init.d/appscale-unmonit" ] || rm -fv "/etc/init.d/appscale-unmonit" # Remove control files we added before 1.14, and re-add the # default ones. From 6866556ad84631d5fbd1fca67391989dcd6a0eef Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:16 -0700 Subject: [PATCH 082/221] Replace /var/run with /run --- AppController/djinn.rb | 2 +- AppController/lib/ejabberd.rb | 2 +- AppController/lib/haproxy.rb | 2 +- AppController/lib/nginx.rb | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 61c01a1385..47e77faa23 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3678,7 +3678,7 @@ def assign_search2_processes # Starts the Log Server service on this machine def start_log_server - log_server_pid = '/var/run/appscale/log_service.pid' + log_server_pid = '/run/appscale/log_service.pid' log_server_file = '/var/log/appscale/log_service.log' twistd = `which twistd`.chomp env = `which env`.chomp diff --git a/AppController/lib/ejabberd.rb b/AppController/lib/ejabberd.rb index 4b11ab3b9e..0e6162fd7f 100644 --- a/AppController/lib/ejabberd.rb +++ b/AppController/lib/ejabberd.rb @@ -26,7 +26,7 @@ def self.start service = `which service`.chomp start_cmd = "#{service} ejabberd start" stop_cmd = "#{service} ejabberd stop" - pidfile = '/var/run/ejabberd/ejabberd.pid' + pidfile = '/run/ejabberd/ejabberd.pid' self.ensure_correct_epmd MonitInterface.start_daemon(:ejabberd, start_cmd, stop_cmd, pidfile) diff --git a/AppController/lib/haproxy.rb b/AppController/lib/haproxy.rb index 6b27d0227f..8fdbe40fad 100644 --- a/AppController/lib/haproxy.rb +++ b/AppController/lib/haproxy.rb @@ -32,7 +32,7 @@ module HAProxy SERVICE_SITES_PATH = File.join(HAPROXY_PATH, 'service-sites-enabled') SERVICE_MAIN_FILE = File.join(HAPROXY_PATH, "service-haproxy.#{CONFIG_EXTENSION}") SERVICE_BASE_FILE = File.join(HAPROXY_PATH, "service-base.#{CONFIG_EXTENSION}") - SERVICE_PIDFILE = '/var/run/appscale/service-haproxy.pid'.freeze + SERVICE_PIDFILE = '/run/appscale/service-haproxy.pid'.freeze # Maximum AppServer threaded connections MAX_APPSERVER_CONN = 7 diff --git a/AppController/lib/nginx.rb b/AppController/lib/nginx.rb index e445391053..587ba6158f 100644 --- a/AppController/lib/nginx.rb +++ b/AppController/lib/nginx.rb @@ -61,7 +61,7 @@ def self.start service_bin = `which service`.chomp start_cmd = "#{service_bin} nginx start" stop_cmd = "#{service_bin} nginx stop" - pidfile = '/var/run/nginx.pid' + pidfile = '/run/nginx.pid' MonitInterface.start_daemon(:nginx, start_cmd, stop_cmd, pidfile) end From 0f309b1dc6fce07f6d83785773cc667b0453c3c2 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:26 -0700 Subject: [PATCH 083/221] Updates for systemd, use systemctl for service control --- AppController/lib/ejabberd.rb | 6 +++--- AppController/lib/monit_interface.rb | 5 ++--- AppController/lib/nginx.rb | 10 +++++----- AppController/lib/taskqueue.rb | 6 +++--- AppController/lib/zookeeper_helper.rb | 8 ++++---- AppController/test/tc_djinn.rb | 2 +- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/AppController/lib/ejabberd.rb b/AppController/lib/ejabberd.rb index 0e6162fd7f..5515e963a7 100644 --- a/AppController/lib/ejabberd.rb +++ b/AppController/lib/ejabberd.rb @@ -23,9 +23,9 @@ class UnknownVersion < StandardError; end ONLINE_USERS_FILE = '/etc/appscale/online_xmpp_users'.freeze def self.start - service = `which service`.chomp - start_cmd = "#{service} ejabberd start" - stop_cmd = "#{service} ejabberd stop" + systemctl = `which systemctl`.chomp + start_cmd = "#{systemctl} start ejabberd" + stop_cmd = "#{systemctl} stop ejabberd" pidfile = '/run/ejabberd/ejabberd.pid' self.ensure_correct_epmd diff --git a/AppController/lib/monit_interface.rb b/AppController/lib/monit_interface.rb index 3abc79fb25..4beca9628c 100644 --- a/AppController/lib/monit_interface.rb +++ b/AppController/lib/monit_interface.rb @@ -22,9 +22,8 @@ module MonitInterface MONIT = '/usr/bin/monit'.freeze def self.start_monit - ret = system('service --status-all 2> /dev/null | grep monit' \ - ' | grep + > /dev/null') - run_cmd('service monit start') unless ret + ret = system('systemctl is-active monit > /dev/null') + run_cmd('systemctl start monit') unless ret ret end diff --git a/AppController/lib/nginx.rb b/AppController/lib/nginx.rb index 587ba6158f..1b7bc8ba2c 100644 --- a/AppController/lib/nginx.rb +++ b/AppController/lib/nginx.rb @@ -58,9 +58,9 @@ def self.start # Nginx runs both a 'master process' and one or more 'worker process'es, so # when we have monit watch it, as long as one of those is running, nginx is # still running and shouldn't be restarted. - service_bin = `which service`.chomp - start_cmd = "#{service_bin} nginx start" - stop_cmd = "#{service_bin} nginx stop" + systemctl = `which systemctl`.chomp + start_cmd = "#{systemctl} start nginx" + stop_cmd = "#{systemctl} stop nginx" pidfile = '/run/nginx.pid' MonitInterface.start_daemon(:nginx, start_cmd, stop_cmd, pidfile) end @@ -78,7 +78,7 @@ def self.cleanup_failed_nginx def self.reload Djinn.log_info('Reloading nginx service.') - HelperFunctions.shell('service nginx reload') + HelperFunctions.shell('systemctl reload nginx') cleanup_failed_nginx if $?.to_i != 0 end @@ -370,7 +370,7 @@ def self.initialize_config # The pid file location was changed in the default nginx config for # Trusty. Because of this, the first reload after writing the new config # will fail on Precise. - HelperFunctions.shell('service nginx restart') + HelperFunctions.shell('systemctl restart nginx') end end diff --git a/AppController/lib/taskqueue.rb b/AppController/lib/taskqueue.rb index a4abccf02f..b6c3bd7932 100755 --- a/AppController/lib/taskqueue.rb +++ b/AppController/lib/taskqueue.rb @@ -99,9 +99,9 @@ def self.start_rabbitmq end Djinn.log_run("mkdir -p #{CELERY_STATE_DIR}") - service_bin = `which service`.chomp - start_cmd = "#{service_bin} rabbitmq-server start" - stop_cmd = "#{service_bin} rabbitmq-server stop" + systemctl = `which systemctl`.chomp + start_cmd = "#{systemctl} start rabbitmq-server" + stop_cmd = "#{systemctl} stop rabbitmq-server" Ejabberd.ensure_correct_epmd MonitInterface.start_daemon(:rabbitmq, start_cmd, stop_cmd, pidfile, diff --git a/AppController/lib/zookeeper_helper.rb b/AppController/lib/zookeeper_helper.rb index ddb7142151..39806128b8 100644 --- a/AppController/lib/zookeeper_helper.rb +++ b/AppController/lib/zookeeper_helper.rb @@ -63,7 +63,7 @@ def start_zookeeper(clear_datastore) unless File.directory?(DATA_LOCATION.to_s) Djinn.log_info('Initializing ZooKeeper.') # Let's stop zookeeper in case it is still running. - system("/usr/sbin/service zookeeper stop") + system("systemctl stop zookeeper") # Let's create the new location for zookeeper. Djinn.log_run("mkdir -pv #{DATA_LOCATION}") @@ -73,9 +73,9 @@ def start_zookeeper(clear_datastore) # myid is needed for multi node configuration. Djinn.log_run("ln -sfv /etc/zookeeper/conf/myid #{DATA_LOCATION}/myid") - service = `which service`.chomp - start_cmd = "#{service} zookeeper start" - stop_cmd = "#{service} zookeeper stop" + systemctl = `which systemctl`.chomp + start_cmd = "#{systemctl} start zookeeper" + stop_cmd = "#{systemctl} stop zookeeper" match_cmd = 'org.apache.zookeeper.server.quorum.QuorumPeerMain' MonitInterface.start_custom(:zookeeper, start_cmd, stop_cmd, match_cmd) end diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index e534a0db70..515e1f30b2 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -24,7 +24,7 @@ def setup djinn = flexmock(Djinn) djinn.should_receive(:log_run).with("").and_return() - djinn.should_receive(:log_run).with("service monit start").and_return() + djinn.should_receive(:log_run).with("systemctl start monit").and_return() flexmock(HelperFunctions).should_receive(:shell).with("").and_return() flexmock(HelperFunctions).should_receive(:log_and_crash).and_raise( From ac79d199bec82ae8c9b23f5d97e45b7f1a15d294 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:36 -0700 Subject: [PATCH 084/221] Updates for systemd, restore controller.log --- AppController/djinn.rb | 1 + debian/appscale_install_functions.sh | 3 +++ system/rsyslog.d/10-appscale-controller.conf | 5 +++++ 3 files changed, 9 insertions(+) create mode 100644 system/rsyslog.d/10-appscale-controller.conf diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 47e77faa23..e3685f99a8 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -528,6 +528,7 @@ def initialize @@secret = HelperFunctions.get_secret @@log = Logger.new(STDOUT) + @@log.datetime_format = '%S.%6N' @@log.level = Logger::INFO @my_index = nil diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 178c7ea9c1..cd0764b8ef 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -389,6 +389,9 @@ installservice() cp -v ${APPSCALE_HOME_RUNTIME}/system/tmpfiles.d/appscale.conf ${DESTDIR}/usr/lib/tmpfiles.d/ systemd-tmpfiles --create + mkdir -pv ${DESTDIR}/etc/rsyslog.d + cp -v ${APPSCALE_HOME_RUNTIME}/system/rsyslog.d/10-appscale-controller.conf ${DESTDIR}/etc/rsyslog.d/ + mkdir -pv ${DESTDIR}/lib/systemd/system cp -v ${APPSCALE_HOME_RUNTIME}/system/units/appscale-controller.service ${DESTDIR}/lib/systemd/system/ systemctl daemon-reload diff --git a/system/rsyslog.d/10-appscale-controller.conf b/system/rsyslog.d/10-appscale-controller.conf new file mode 100644 index 0000000000..bffe6c418a --- /dev/null +++ b/system/rsyslog.d/10-appscale-controller.conf @@ -0,0 +1,5 @@ +# Log controller output to file +:programname, isequal, "controller" /var/log/appscale/controller.log;APPSCALE + +# The following is to prevent further processing. +& stop \ No newline at end of file From be759638aaa1828efb51d2f05e707d7b0196d42d Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:46 -0700 Subject: [PATCH 085/221] AppController service now uses systemd, update appscale build --- debian/appscale_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/appscale_build.sh b/debian/appscale_build.sh index 5467e3918d..4f8137d786 100755 --- a/debian/appscale_build.sh +++ b/debian/appscale_build.sh @@ -122,7 +122,7 @@ if [ -d ${CONFIG_DIR}/certs ]; then echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR: upgrading it." # Make sure AppScale is not running. MONIT=$(which monit) - if $MONIT summary |grep controller > /dev/null ; then + if systemctl is-active appscale-controller > /dev/null ; then echo "AppScale is still running: please stop it" [ "$FORCE_UPGRADE" = "Y" ] || exit 1 elif echo $MONIT |grep local > /dev/null ; then From b0a661af1ee99784dd77d8db4588156e9cde556d Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:29:57 -0700 Subject: [PATCH 086/221] Updates for systemd, replace admin server monit use --- AdminServer/appscale/admin/__init__.py | 10 +- AdminServer/appscale/admin/constants.py | 1 - .../admin/instance_manager/constants.py | 8 +- .../admin/instance_manager/instance.py | 4 +- .../instance_manager/instance_manager.py | 256 +++++------------- .../appscale/admin/instance_manager/server.py | 6 +- .../admin/instance_manager/stop_instance.py | 68 ----- .../appscale/admin/instance_manager/utils.py | 4 +- .../appscale/admin/push_worker_manager.py | 115 ++------ AdminServer/appscale/admin/routing/haproxy.py | 2 +- AdminServer/appscale/admin/stop_services.py | 144 +++------- AdminServer/appscale/admin/summary.py | 14 +- AdminServer/setup.py | 1 - AdminServer/tests/test_instance_manager.py | 91 ++----- 14 files changed, 162 insertions(+), 562 deletions(-) delete mode 100644 AdminServer/appscale/admin/instance_manager/stop_instance.py diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py index 9dd8bc8c68..725bcf4a2d 100644 --- a/AdminServer/appscale/admin/__init__.py +++ b/AdminServer/appscale/admin/__init__.py @@ -27,7 +27,7 @@ VERSION_PATH_SEPARATOR, ZK_PERSISTENT_RECONNECTS ) -from appscale.common.monit_interface import MonitOperator +from appscale.common.service_helper import ServiceOperator from appscale.common.appscale_utils import get_md5 from appscale.common.ua_client import UAClient from appscale.common.ua_client import UAException @@ -76,7 +76,7 @@ from .resource_validator import validate_resource, ResourceValidationError from .routing.routing_manager import RoutingManager from .service_manager import ServiceManager, ServiceManagerHandler -from .summary import get_combined_services +from .summary import get_services logger = logging.getLogger(__name__) @@ -1364,7 +1364,7 @@ def main(): args = parser.parse_args() if args.command == 'summary': - table = sorted(list(get_combined_services().items())) + table = sorted(list(get_services().items())) print(tabulate(table, headers=['Service', 'State'])) sys.exit(0) @@ -1394,7 +1394,7 @@ def main(): zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) - monit_operator = MonitOperator() + service_operator = ServiceOperator() all_resources = { 'acc': acc, 'ua_client': ua_client, @@ -1405,7 +1405,7 @@ def main(): if options.private_ip in appscale_info.get_taskqueue_nodes(): logger.info('Starting push worker manager') - GlobalPushWorkerManager(zk_client, monit_operator) + GlobalPushWorkerManager(zk_client, service_operator) if options.private_ip in appscale_info.get_load_balancer_ips(): logger.info('Starting RoutingManager') diff --git a/AdminServer/appscale/admin/constants.py b/AdminServer/appscale/admin/constants.py index 3155dab975..f3e7af6365 100644 --- a/AdminServer/appscale/admin/constants.py +++ b/AdminServer/appscale/admin/constants.py @@ -158,7 +158,6 @@ class Types(object): + list(HAPROXY_PORTS) + [ 2181, # Zookeeper - 2812, # Monit 3306, # MySQL 4341, # UserAppServer service 4342, # UserAppServer server diff --git a/AdminServer/appscale/admin/instance_manager/constants.py b/AdminServer/appscale/admin/instance_manager/constants.py index 4bb4a1105c..75e5303086 100644 --- a/AdminServer/appscale/admin/instance_manager/constants.py +++ b/AdminServer/appscale/admin/instance_manager/constants.py @@ -35,8 +35,8 @@ def http_response(self, request, response): API_SERVER_LOCATION = os.path.join('/', 'opt', 'appscale_venvs', 'api_server', 'bin', 'appscale-api-server') -# The Monit watch prefix for API servers. -API_SERVER_PREFIX = 'api-server_' +# Prefix for API server services. +API_SERVER_PREFIX = 'appscale-api-server@' # Max application server log size in bytes. APP_LOG_SIZE = 250 * 1024 * 1024 @@ -109,8 +109,8 @@ def http_response(self, request, response): os.path.join('/', 'usr', 'share', 'appscale', 'ext', '*') ] -# A prefix added to instance entries to distinguish them from services. -MONIT_INSTANCE_PREFIX = 'app___' +# Common prefix for instance services. +SERVICE_INSTANCE_PREFIX = 'appscale-instance-run@' # The script used for starting Python AppServer instances. PYTHON_APPSERVER = os.path.join(APPSCALE_HOME, 'AppServer', diff --git a/AdminServer/appscale/admin/instance_manager/instance.py b/AdminServer/appscale/admin/instance_manager/instance.py index 46cedb7a7c..e5c82a0aa6 100644 --- a/AdminServer/appscale/admin/instance_manager/instance.py +++ b/AdminServer/appscale/admin/instance_manager/instance.py @@ -170,7 +170,7 @@ def create_java_start_cmd(app_name, port, load_balancer_port, load_balancer_host return ' '.join(cmd) -def create_python_api_start_cmd(app_name, login_ip, port, pidfile, +def create_python_api_start_cmd(app_name, login_ip, port, api_server_port): """ Creates the start command to run the python api server. @@ -178,7 +178,6 @@ def create_python_api_start_cmd(app_name, login_ip, port, pidfile, app_name: The name of the application to run login_ip: The public IP of this deployment port: The local port the api server will bind to - pidfile: A string specifying the pidfile location. api_server_port: An integer specifying the port of the external API server. Returns: A string of the start command. @@ -193,7 +192,6 @@ def create_python_api_start_cmd(app_name, login_ip, port, pidfile, '--xmpp_path', options.load_balancer_ip, '--uaserver_path', '{}:{}'.format(options.db_proxy, UA_SERVER_PORT), '--datastore_path', '{}:{}'.format(options.db_proxy, DB_SERVER_PORT), - '--pidfile', pidfile, '--external_api_port', str(api_server_port) ] diff --git a/AdminServer/appscale/admin/instance_manager/instance_manager.py b/AdminServer/appscale/admin/instance_manager/instance_manager.py index 8d29404563..772018b89e 100644 --- a/AdminServer/appscale/admin/instance_manager/instance_manager.py +++ b/AdminServer/appscale/admin/instance_manager/instance_manager.py @@ -1,12 +1,9 @@ """ Fulfills AppServer instance assignments from the scheduler. """ -import hashlib import httplib import logging import monotonic import json import os -import psutil -import signal import urllib2 from tornado import gen @@ -19,76 +16,38 @@ BadConfigurationException, DASHBOARD_LOG_SIZE, DASHBOARD_PROJECT_ID, DEFAULT_MAX_APPSERVER_MEMORY, FETCH_PATH, GO_SDK, HEALTH_CHECK_TIMEOUT, INSTANCE_CLASSES, JAVA_APPSERVER_CLASS, MAX_API_SERVER_PORT, - MAX_INSTANCE_RESPONSE_TIME, MONIT_INSTANCE_PREFIX, NoRedirection, + MAX_INSTANCE_RESPONSE_TIME, SERVICE_INSTANCE_PREFIX, NoRedirection, PIDFILE_TEMPLATE, PYTHON_APPSERVER, START_APP_TIMEOUT, STARTING_INSTANCE_PORT, VERSION_REGISTRATION_NODE) from appscale.admin.instance_manager.instance import ( create_java_app_env, create_java_start_cmd, create_python_api_start_cmd, create_python_app_env, create_python27_start_cmd, get_login_server, Instance) -from appscale.admin.instance_manager.stop_instance import stop_instance from appscale.admin.instance_manager.utils import setup_logrotate, \ remove_logrotate -from appscale.common import appscale_info, monit_app_configuration +from appscale.common import (appscale_info, file_io) from appscale.common.async_retrying import retry_data_watch_coroutine from appscale.common.constants import ( - APPS_PATH, GO, JAVA, JAVA8, MonitStates, PHP, PYTHON27, VAR_DIR, + APPS_PATH, GO, JAVA, JAVA8, PHP, PYTHON27, VAR_DIR, VERSION_PATH_SEPARATOR) -from appscale.common.monit_interface import DEFAULT_RETRIES, ProcessNotFound from appscale.common.retrying import retry logger = logging.getLogger(__name__) -def clean_up_instances(entries_to_keep): - """ Terminates instances that aren't accounted for. - - Args: - entries_to_keep: A list of dictionaries containing instance details. - """ - monitored = {(entry['revision'], entry['port']) for entry in entries_to_keep} - to_stop = [] - for process in psutil.process_iter(): - cmd = process.cmdline() - if len(cmd) < 2: - continue - - if JAVA_APPSERVER_CLASS in cmd: - revision = cmd[-1].split(os.sep)[-2] - port_arg = next(arg for arg in cmd if arg.startswith('--port=')) - port = int(port_arg.split('=')[-1]) - elif cmd[1] == PYTHON_APPSERVER: - source_arg = next(arg for arg in cmd if arg.startswith(APPS_PATH)) - revision = source_arg.split(os.sep)[-2] - port = int(cmd[cmd.index('--port') + 1]) - else: - continue - - if (revision, port) not in monitored: - to_stop.append(process) - - if not to_stop: - return - - logger.info('Killing {} unmonitored instances'.format(len(to_stop))) - for process in to_stop: - group = os.getpgid(process.pid) - os.killpg(group, signal.SIGKILL) - - class InstanceManager(object): """ Fulfills AppServer instance assignments from the scheduler. """ # The seconds to wait between performing health checks. HEALTH_CHECK_INTERVAL = 60 - def __init__(self, zk_client, monit_operator, routing_client, + def __init__(self, zk_client, service_operator, routing_client, projects_manager, deployment_config, source_manager, syslog_server, thread_pool, private_ip): """ Creates a new InstanceManager. Args: zk_client: A kazoo.client.KazooClient object. - monit_operator: An appscale.common.monit_interface.MonitOperator object. + service_operator: An appscale.common.service_helper.ServiceOperator object. routing_client: An instance_manager.routing_client.RoutingClient object. projects_manager: A ProjectsManager object. deployment_config: A common.deployment_config.DeploymentConfig object. @@ -98,7 +57,7 @@ def __init__(self, zk_client, monit_operator, routing_client, thread_pool: A ThreadPoolExecutor. private_ip: A string specifying the current machine's private IP address. """ - self._monit_operator = monit_operator + self._service_operator = service_operator self._routing_client = routing_client self._private_ip = private_ip self._syslog_server = syslog_server @@ -165,7 +124,7 @@ def _start_instance(self, version, port): source_archive = version_details['deployment']['zip']['sourceUrl'] http_port = version_details['appscaleExtensions']['httpPort'] - api_server_port = yield self._ensure_api_server(version.project_id, runtime) + api_server_port, api_services = yield self._ensure_api_server(version.project_id, runtime) yield self._source_manager.ensure_source( version.revision_key, source_archive, runtime) @@ -178,7 +137,6 @@ def _start_instance(self, version, port): 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') - watch = ''.join([MONIT_INSTANCE_PREFIX, version.revision_key]) if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd( version.project_id, @@ -218,34 +176,16 @@ def _start_instance(self, version, port): logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) - base_version = version.revision_key.rsplit(VERSION_PATH_SEPARATOR, 1)[0] - log_tag = "app_{}".format(hashlib.sha1(base_version).hexdigest()[:28]) - - monit_app_configuration.create_config_file( - watch, - start_cmd, - pidfile, - port, - env_vars, - max_memory, - self._syslog_server, - check_port=True, - kill_exceeded_memory=True, - log_tag=log_tag, - ) - - full_watch = '{}-{}'.format(watch, port) - - yield self._monit_operator.reload(self._thread_pool) - - # The reload command does not block, and we don't have a good way to check - # if Monit is ready with its new configuration yet. If the daemon begins - # reloading while it is handling the 'start', it can end up in a state - # where it never starts the process. As a temporary workaround, this - # small period allows it to finish reloading. This can be removed if - # instances are started inside a cgroup. - yield gen.sleep(1) - yield self._monit_operator.send_command_retry_process(full_watch, 'start') + env_content = ' '.join(['{}="{}"'.format(k, str(v)) for k, v in env_vars.items()]) + command_content = 'exec env {} {}'.format(env_content, start_cmd) + service_inst = '{}-{}'.format(version.revision_key, port) + service_name = 'appscale-instance-run@{}'.format(service_inst) + service_props = {'MemoryLimit': '{}M'.format(max_memory)} + command_file_path = '/run/appscale/apps/command_{}'.format(service_inst) + file_io.write(command_file_path, command_content) + + yield self._service_operator.start_async( + service_name, wants=api_services, properties=service_props) # Make sure the version registration node exists. self._zk_client.ensure_path( @@ -277,11 +217,11 @@ def api_server_info(entry): project_id = index_and_id[1] return project_id, index, int(port) - monit_entries = yield self._monit_operator.get_entries() - monit_entry_list = [entry for entry in monit_entries - if entry.startswith(API_SERVER_PREFIX)] - monit_entry_list.sort() - server_entries = [api_server_info(entry) for entry in monit_entry_list] + service_entries = yield self._service_operator.list_async() + service_entry_list = [entry for entry in service_entries + if entry.startswith(API_SERVER_PREFIX)] + service_entry_list.sort() + server_entries = [api_server_info(entry) for entry in service_entry_list] for project_id, index, port in server_entries: ports = (self._api_servers[project_id] if project_id in @@ -293,33 +233,18 @@ def api_server_info(entry): ports.insert(index, port) def _recover_state(self): - """ Establishes current state from Monit entries. """ + """ Establishes current state from services. """ logger.info('Getting current state') - monit_entries = self._monit_operator.get_entries_sync() - instance_entries = {entry: state for entry, state in monit_entries.items() - if entry.startswith(MONIT_INSTANCE_PREFIX)} - - # Remove all unmonitored entries. - removed = [] - for entry, state in instance_entries.items(): - if state == MonitStates.UNMONITORED: - self._monit_operator.remove_configuration(entry) - removed.append(entry) - - for entry in removed: - del instance_entries[entry] - - if removed: - self._monit_operator.reload_sync() + service_entries = self._service_operator.list() + instance_entries = {entry: state for entry, state in service_entries.items() + if entry.startswith(SERVICE_INSTANCE_PREFIX)} instance_details = [] for entry, state in instance_entries.items(): - revision, port = entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1) + revision, port = entry[entry.find('@')+1:].rsplit('-', 2) instance_details.append( {'revision': revision, 'port': int(port), 'state': state}) - clean_up_instances(instance_details) - # Ensure version nodes exist. running_versions = {'_'.join(instance['revision'].split('_')[:3]) for instance in instance_details} @@ -343,15 +268,21 @@ def _ensure_api_server(self, project_id, runtime): project_id: A string specifying the project ID. runtime: The runtime for the project Returns: - An integer specifying the API server port. + An integer specifying the API server port and list of api services. """ ensure_app_server_api = runtime==JAVA8 if project_id in self._api_servers: api_server_ports = self._api_servers[project_id] if not ensure_app_server_api: - raise gen.Return(api_server_ports[0]) + raise gen.Return((api_server_ports[0], + ['appscale-api-server@{}-{}' + .format(project_id, str(api_server_ports[0]))])) elif len(api_server_ports) > 1: - raise gen.Return(api_server_ports[1]) + raise gen.Return((api_server_ports[1], + ['appscale-api-server@{}-{}' + .format(project_id, str(api_server_ports[0])), + 'appscale-api-server@1_{}-{}' + .format(project_id, str(api_server_ports[1]))])) server_port = MAX_API_SERVER_PORT for ports in self._api_servers.values(): @@ -359,82 +290,48 @@ def _ensure_api_server(self, project_id, runtime): if port <= server_port: server_port = port - 1 - full_watch = None + api_services = [] if not project_id in self._api_servers: watch = ''.join([API_SERVER_PREFIX, project_id]) - full_watch = '-'.join([watch, str(server_port)]) - pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch)) zk_locations = appscale_info.get_zk_node_ips() start_cmd = ' '.join([API_SERVER_LOCATION, '--port', str(server_port), '--project-id', project_id, '--zookeeper-locations', ' '.join(zk_locations)]) - monit_app_configuration.create_config_file( - watch, - start_cmd, - pidfile, - server_port, - max_memory=DEFAULT_MAX_APPSERVER_MEMORY, - check_port=True, - check_host='127.0.0.1') + + api_command_file_path = ('/run/appscale/apps/api_command_{}-{}' + .format(project_id, str(server_port))) + api_command_content = 'exec {}'.format(start_cmd) + file_io.write(api_command_file_path, api_command_content) + api_server_port = server_port else: api_server_port = self._api_servers[project_id][0] + api_services.append('appscale-api-server@{}-{}' + .format(project_id, str(api_server_port))) - full_watch_app = None if ensure_app_server_api: # Start an Python 27 runtime API server if api_server_port==server_port: server_port -= 1 - watch = ''.join([API_SERVER_PREFIX, '1_', project_id]) - full_watch_app = '-'.join([watch, str(server_port)]) - pidfile = os.path.join(VAR_DIR, '{}.pid'.format(full_watch_app)) start_cmd = create_python_api_start_cmd(project_id, self._login_server, server_port, - pidfile, api_server_port) - monit_app_configuration.create_config_file( - watch, - start_cmd, - pidfile, - server_port, - max_memory=DEFAULT_MAX_APPSERVER_MEMORY, - check_port=True, - check_host='127.0.0.1', - group='api-server') - self._api_servers[project_id] = [api_server_port, server_port] - else: - self._api_servers[project_id] = [server_port] - yield self._monit_operator.reload(self._thread_pool) - if full_watch: - yield self._monit_operator.send_command_retry_process(full_watch, 'start') - if full_watch_app: - yield self._monit_operator.send_command_retry_process(full_watch_app, 'start') + api_command_file_path = ('/run/appscale/apps/api_command_1_{}-{}' + .format(project_id, str(server_port))) + api_command_content = 'exec {}'.format(start_cmd) + file_io.write(api_command_file_path, api_command_content) - raise gen.Return(server_port) + api_services.append('appscale-api-server@{}-{}' + .format(project_id, str(server_port))) - @gen.coroutine - def _unmonitor_and_terminate(self, watch): - """ Unmonitors an instance and terminates it. - - Args: - watch: A string specifying the Monit entry. - """ - try: - monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) - send_w_retries = monit_retry(self._monit_operator.send_command_sync) - send_w_retries(watch, 'unmonitor') - except ProcessNotFound: - # If Monit does not know about a process, assume it is already stopped. - return - - # Now that the AppServer is stopped, remove its monit config file so that - # monit doesn't pick it up and restart it. - self._monit_operator.remove_configuration(watch) + self._api_servers[project_id] = [api_server_port, server_port] + else: + self._api_servers[project_id] = [server_port] - stop_instance(watch, MAX_INSTANCE_RESPONSE_TIME) + raise gen.Return((server_port, api_services)) def _instance_healthy(self, port): """ Determines the health of an instance with an HTTP request. @@ -476,6 +373,10 @@ def _wait_for_app(self, port): raise gen.Return(False) + def _instance_service_name(self, instance): + return ''.join(['appscale-instance-run@', instance.revision_key, '-', + str(instance.port)]) + @gen.coroutine def _add_routing(self, instance): """ Tells the AppController to begin routing traffic to an AppServer. @@ -486,42 +387,22 @@ def _add_routing(self, instance): logger.info('Waiting for {}'.format(instance)) start_successful = yield self._wait_for_app(instance.port) if not start_successful: - monit_watch = ''.join( - [MONIT_INSTANCE_PREFIX, instance.revision_key, '-', - str(instance.port)]) - yield self._unmonitor_and_terminate(monit_watch) - yield self._monit_operator.reload(self._thread_pool) + instance_service = self._instance_service_name(instance) + yield self._service_operator.stop_async(instance_service) logger.warning('{} did not come up in time'.format(instance)) return self._routing_client.register_instance(instance) self._running_instances.add(instance) - @gen.coroutine - def _stop_api_server(self, project_id): - """ Make sure there are no running API servers for a project. - - Args: - project_id: A string specifying the project ID. - """ - if project_id not in self._api_servers: - return - - ports = self._api_servers[project_id] - for index, port in enumerate(ports): - index_str = '' if index==0 else '{}_'.format(index) - watch = '{}{}{}-{}'.format(API_SERVER_PREFIX, index_str, project_id, port) - yield self._unmonitor_and_terminate(watch) - del self._api_servers[project_id] - @gen.coroutine def _clean_old_sources(self): """ Removes source code for obsolete revisions. """ - monit_entries = yield self._monit_operator.get_entries() + service_entries = yield self._service_operator.list_async() active_revisions = { - entry[len(MONIT_INSTANCE_PREFIX):].rsplit('-', 1)[0] - for entry in monit_entries - if entry.startswith(MONIT_INSTANCE_PREFIX)} + entry[len(SERVICE_INSTANCE_PREFIX):].rsplit('-', 1)[0] + for entry in service_entries + if entry.startswith(SERVICE_INSTANCE_PREFIX)} for project_id, project_manager in self._projects_manager.items(): for service_id, service_manager in project_manager.items(): @@ -543,8 +424,7 @@ def _stop_app_instance(self, instance): """ logger.info('Stopping {}'.format(instance)) - monit_watch = ''.join( - [MONIT_INSTANCE_PREFIX, instance.revision_key, '-', str(instance.port)]) + instance_service = self._instance_service_name(instance) self._routing_client.unregister_instance(instance) try: @@ -553,15 +433,13 @@ def _stop_app_instance(self, instance): logger.info( 'unregister_instance: non-existent instance {}'.format(instance)) - yield self._unmonitor_and_terminate(monit_watch) + yield self._service_operator.stop_async(instance_service) project_instances = [instance_ for instance_ in self._running_instances if instance_.project_id == instance.project_id] if not project_instances: - yield self._stop_api_server(instance.project_id) remove_logrotate(instance.project_id) - yield self._monit_operator.reload(self._thread_pool) yield self._clean_old_sources() def _get_lowest_port(self): diff --git a/AdminServer/appscale/admin/instance_manager/server.py b/AdminServer/appscale/admin/instance_manager/server.py index c27580ec4b..ab5d39acbb 100644 --- a/AdminServer/appscale/admin/instance_manager/server.py +++ b/AdminServer/appscale/admin/instance_manager/server.py @@ -14,7 +14,7 @@ from appscale.admin.instance_manager.source_manager import SourceManager from appscale.common import appscale_info, file_io from appscale.common.deployment_config import DeploymentConfig -from appscale.common.monit_interface import MonitOperator +from appscale.common.service_helper import ServiceOperator logger = logging.getLogger(__name__) @@ -32,7 +32,7 @@ def main(): thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) - monit_operator = MonitOperator() + service_operator = ServiceOperator() options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) @@ -43,7 +43,7 @@ def main(): routing_client = RoutingClient(zk_client, options.private_ip, options.secret) instance_manager = InstanceManager( - zk_client, monit_operator, routing_client, projects_manager, + zk_client, service_operator, routing_client, projects_manager, deployment_config, source_manager, options.syslog_server, thread_pool, options.private_ip) instance_manager.start() diff --git a/AdminServer/appscale/admin/instance_manager/stop_instance.py b/AdminServer/appscale/admin/instance_manager/stop_instance.py deleted file mode 100644 index 1904ad334d..0000000000 --- a/AdminServer/appscale/admin/instance_manager/stop_instance.py +++ /dev/null @@ -1,68 +0,0 @@ -""" Stops an AppServer instance. """ -import argparse -import errno -import os -import psutil -import signal - -from appscale.common.constants import VAR_DIR - -# The number of seconds to wait for an instance to terminate. -DEFAULT_WAIT_TIME = 20 - - -def stop_instance(watch, timeout, force=False): - """ Stops an AppServer process. - - Args: - watch: A string specifying the Monit watch entry. - timeout: An integer specifying the time to wait for requests to finish. - force: A boolean indicating that the instance should be killed immediately - instead of being allowed to finish ongoing requests. - Raises: - IOError if the pidfile does not exist. - OSError if the process does not exist. - """ - pidfile_location = os.path.join(VAR_DIR, '{}.pid'.format(watch)) - with open(pidfile_location) as pidfile: - pid = int(pidfile.read().strip()) - - group = os.getpgid(pid) - if force: - os.killpg(group, signal.SIGKILL) - os.remove(pidfile_location) - return - - process = psutil.Process(pid) - process.terminate() - try: - process.wait(timeout) - except psutil.TimeoutExpired: - process.kill() - - try: - os.killpg(group, signal.SIGKILL) - except OSError: - # In most cases, the group will already be gone. - pass - - try: - os.remove(pidfile_location) - except OSError as e: - # In case the pidfile has already been removed. - if e.errno == errno.ENOENT: - pass - else: - raise - - -def main(): - """ Stops an AppServer instance. """ - parser = argparse.ArgumentParser(description='Stops an AppServer instance') - parser.add_argument('--watch', required=True, help='The Monit watch entry') - parser.add_argument('--timeout', default=20, - help='The seconds to wait before killing the instance') - parser.add_argument('--force', action='store_true', - help='Stop the process immediately') - args = parser.parse_args() - stop_instance(args.watch, args.timeout, args.force) diff --git a/AdminServer/appscale/admin/instance_manager/utils.py b/AdminServer/appscale/admin/instance_manager/utils.py index 84951d2e8a..3bf1008b9c 100644 --- a/AdminServer/appscale/admin/instance_manager/utils.py +++ b/AdminServer/appscale/admin/instance_manager/utils.py @@ -9,7 +9,7 @@ from appscale.admin.constants import InvalidSource from appscale.admin.instance_manager.constants import ( - CONFLICTING_JARS, LOGROTATE_CONFIG_DIR, MODIFIED_JARS, MONIT_INSTANCE_PREFIX) + CONFLICTING_JARS, LOGROTATE_CONFIG_DIR, MODIFIED_JARS) from appscale.common.constants import CONFIG_DIR logger = logging.getLogger(__name__) @@ -135,7 +135,7 @@ def setup_logrotate(app_name, log_size): app_logrotate_script = "{0}/appscale-{1}".\ format(LOGROTATE_CONFIG_DIR, app_name) - log_prefix = ''.join([MONIT_INSTANCE_PREFIX, app_name]) + log_prefix = 'app___{}'.format(app_name) # Application logrotate script content. contents = """/var/log/appscale/{log_prefix}*.log {{ diff --git a/AdminServer/appscale/admin/push_worker_manager.py b/AdminServer/appscale/admin/push_worker_manager.py index 908bc03bca..26fd17dac9 100644 --- a/AdminServer/appscale/admin/push_worker_manager.py +++ b/AdminServer/appscale/admin/push_worker_manager.py @@ -10,13 +10,11 @@ from tornado.ioloop import IOLoop from tornado.options import options +from appscale.common import file_io from appscale.common.async_retrying import ( retry_children_watch_coroutine, retry_coroutine, retry_data_watch_coroutine ) -from appscale.common.constants import (CONFIG_DIR, LOG_DIR, MonitStates, - VAR_DIR) -from appscale.common.monit_app_configuration import create_config_file -from appscale.common.monit_app_configuration import MONIT_CONFIG_DIR +from appscale.common.constants import (CONFIG_DIR, LOG_DIR, VAR_DIR) from .utils import ensure_path @@ -57,20 +55,20 @@ class ProjectPushWorkerManager(object): """ Manages the Celery worker for a single project. """ - def __init__(self, zk_client, monit_operator, project_id): + def __init__(self, zk_client, service_operator, project_id): """ Creates a new ProjectPushWorkerManager. Args: zk_client: A KazooClient. - monit_operator: A MonitOperator. + service_operator: A ServiceOperator. project_id: A string specifying a project ID. """ self.zk_client = zk_client self.project_id = project_id - self.monit_operator = monit_operator + self.service_operator = service_operator self.queues_node = '/appscale/projects/{}/queues'.format(project_id) self.watch = zk_client.DataWatch(self.queues_node, self._update_worker) - self.monit_watch = 'celery-{}'.format(project_id) + self.service_name = 'appscale-celery@{0}'.format(project_id) self._stopped = False @gen.coroutine @@ -81,87 +79,19 @@ def update_worker(self, queue_config): queue_config: A JSON string specifying queue configuration. """ self._write_worker_configuration(queue_config) - status = yield self._wait_for_stable_state() - - pid_location = os.path.join(VAR_DIR, 'celery-{}.pid'.format(self.project_id)) - try: - with open(pid_location) as pidfile: - old_pid = int(pidfile.read().strip()) - except IOError: - old_pid = None # Start the worker if it doesn't exist. Restart it if it does. - if status == MonitStates.MISSING: - command = self.celery_command() - env_vars = {'APP_ID': self.project_id, 'HOST': options.load_balancers[0], - 'C_FORCE_ROOT': True} - create_config_file(self.monit_watch, command, pid_location, - env_vars=env_vars, max_memory=CELERY_SAFE_MEMORY) - logger.info('Starting push worker for {}'.format(self.project_id)) - yield self.monit_operator.reload() - else: - logger.info('Restarting push worker for {}'.format(self.project_id)) - yield self.monit_operator.send_command(self.monit_watch, 'restart') - - start_future = self.monit_operator.ensure_running(self.monit_watch) - yield gen.with_timeout(timedelta(seconds=60), start_future, - IOLoop.current()) - - try: - yield self.ensure_pid_changed(old_pid, pid_location) - except AssertionError: - # Occasionally, Monit will get interrupted during a restart. Retry the - # restart if the Celery worker PID is the same. - logger.warning( - '{} worker PID did not change. Restarting it.'.format(self.project_id)) - yield self.update_worker(queue_config) - - @staticmethod - @retry_coroutine(retrying_timeout=10, retry_on_exception=[AssertionError]) - def ensure_pid_changed(old_pid, pid_location): - try: - with open(pid_location) as pidfile: - new_pid = int(pidfile.read().strip()) - except IOError: - new_pid = None - - if new_pid == old_pid: - raise AssertionError + logger.info('(Re)starting push worker for {}'.format(self.project_id)) + file_io.write('/run/appscale/appscale-celery.env', + 'HOST={}\n'.format(options.load_balancers[0])) + yield self.service_operator.restart_async(self.service_name) + yield self.service_operator.start_async(self.service_name) @gen.coroutine def stop_worker(self): - """ Removes the monit configuration for the project's push worker. """ - status = yield self._wait_for_stable_state() - if status == MonitStates.RUNNING: - logger.info('Stopping push worker for {}.'.format(self.project_id)) - yield self.monit_operator.send_command(self.monit_watch, 'stop') - watch_file = '{}/appscale-{}.cfg'.format(MONIT_CONFIG_DIR, self.monit_watch) - os.remove(watch_file) - else: - logger.debug('Not stopping push worker for {} since it is not running.'.format(self.project_id)) - - def celery_command(self): - """ Generates the Celery command for a project's push worker. """ - log_file = os.path.join(CELERY_WORKER_LOG_DIR, - '{}.log'.format(self.project_id)) - pidfile = os.path.join(VAR_DIR, 'celery-{}.pid'.format(self.project_id)) - state_db = os.path.join(CELERY_STATE_DIR, - 'worker___{}.db'.format(self.project_id)) - - return ' '.join([ - CELERY_TQ_DIR, 'worker', - '--app', WORKER_MODULE, - '--pool=eventlet', - '--concurrency={}'.format(CELERY_CONCURRENCY), - '--hostname', self.project_id, - '--workdir', CELERY_WORKER_DIR, - '--logfile', log_file, - '--pidfile', pidfile, - '--time-limit', str(HARD_TIME_LIMIT), - '--soft-time-limit', str(TASK_SOFT_TIME_LIMIT), - '--statedb', state_db, - '-Ofair' - ]) + """ Stop the project's push worker. """ + logger.info('Stopping push worker for {}.'.format(self.project_id)) + yield self.service_operator.stop(self.service_name) def ensure_watch(self): """ Restart the watch if it has been cancelled. """ @@ -170,17 +100,6 @@ def ensure_watch(self): self.watch = self.zk_client.DataWatch(self.queues_node, self._update_worker) - @gen.coroutine - def _wait_for_stable_state(self): - """ Waits until the worker's state is not pending. """ - stable_states = (MonitStates.MISSING, MonitStates.RUNNING, - MonitStates.UNMONITORED) - status_future = self.monit_operator.wait_for_status( - self.monit_watch, stable_states) - status = yield gen.with_timeout(timedelta(seconds=60), status_future, - IOLoop.current()) - raise gen.Return(status) - def _write_worker_configuration(self, queue_config): """ Writes a worker's configuration file. @@ -231,10 +150,10 @@ def _update_worker(self, queue_config, _): class GlobalPushWorkerManager(object): """ Manages the Celery workers for all projects. """ - def __init__(self, zk_client, monit_operator): + def __init__(self, zk_client, service_operator): """ Creates a new GlobalPushWorkerManager. """ self.zk_client = zk_client - self.monit_operator = monit_operator + self.service_operator = service_operator self.projects = {} ensure_path(CELERY_CONFIG_DIR) ensure_path(CELERY_WORKER_DIR) @@ -260,7 +179,7 @@ def update_projects(self, new_project_list): for new_project_id in new_project_list: if new_project_id not in self.projects: self.projects[new_project_id] = ProjectPushWorkerManager( - self.zk_client, self.monit_operator, new_project_id) + self.zk_client, self.service_operator, new_project_id) # Handle changes that happen between watches. self.projects[new_project_id].ensure_watch() diff --git a/AdminServer/appscale/admin/routing/haproxy.py b/AdminServer/appscale/admin/routing/haproxy.py index aac2dff79f..28c1b20f6f 100644 --- a/AdminServer/appscale/admin/routing/haproxy.py +++ b/AdminServer/appscale/admin/routing/haproxy.py @@ -78,7 +78,7 @@ class HAProxy(object): APP_CONFIG = os.path.join(CONFIG_DIR, 'app-haproxy.cfg') # The location of the pidfile for instance-related HAProxy processes. - APP_PID = os.path.join('/', 'var', 'run', 'appscale', 'app-haproxy.pid') + APP_PID = os.path.join('/', 'run', 'appscale', 'app-haproxy.pid') # The location of the unix socket used for reporting stats. APP_STATS_SOCKET = os.path.join(CONFIG_DIR, 'stats') diff --git a/AdminServer/appscale/admin/stop_services.py b/AdminServer/appscale/admin/stop_services.py index f4d850fea6..187bacbfb5 100644 --- a/AdminServer/appscale/admin/stop_services.py +++ b/AdminServer/appscale/admin/stop_services.py @@ -1,150 +1,70 @@ -""" Tries to stop all Monit services until they are stopped. """ +""" Tries to stop all services until they are stopped. """ import argparse import logging -import socket -import sys -import time -from appscale.common.async_retrying import retry_coroutine from tornado import gen, ioloop +from appscale.common import service_helper +from appscale.common.async_retrying import retry_coroutine from appscale.common.constants import LOG_FORMAT -from appscale.common.monit_interface import (DEFAULT_RETRIES, MonitOperator, - MonitStates, MonitUnavailable, - ProcessNotFound) from appscale.common.retrying import retry +from appscale.common.service_helper import ServiceOperator logger = logging.getLogger(__name__) -def order_services(running_services): - """ Arranges a list of running services in the order they should be stopped. - - Args: - running_services: A list of strings specifying running services. - Returns: - A tuple with two items. The first is a list of ordered services. The second - is a list of remaining services that are not recognized. - """ - service_order = [ - # First, stop the services that manage other services. - ['controller'], - ['admin_server'], - ['appmanagerserver'], - - # Next, stop routing requests to running instances. - ['nginx'], - ['app_haproxy'], - - # Next, stop application runtime instances. - ['app___'], - ['api-server_'], - - # Next, stop services that depend on other services. - ['service_haproxy'], - ['blobstore', 'celery-', 'flower', 'groomer_service', 'hermes', - 'iaas_manager', 'log_service', 'taskqueue-', 'transaction_groomer', - 'uaserver'], - - # Finally, stop the underlying backend services. - ['cassandra', 'ejabberd', 'memcached', 'rabbitmq', 'zookeeper'] - ] - - ordered_services = [] - for service_types in service_order: - parallel_group = [] - relevant_entries = [ - service - for service in running_services - for service_type in service_types - if service.startswith(service_type) - ] - for entry in relevant_entries: - index = running_services.index(entry) - parallel_group.append(running_services.pop(index)) - ordered_services.append(parallel_group) - - return ordered_services, running_services - - def start_service(): - """ Starts a service using the Monit HTTP API. """ + """ Starts a service. """ parser = argparse.ArgumentParser() parser.add_argument('service', help='The service to start') args = parser.parse_args() - monit_operator = MonitOperator() - monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) - send_w_retries = monit_retry(monit_operator.send_command_sync) - send_w_retries(args.service, 'start') + service_helper.start(args.service) def stop_service(): - """ Stops a service using the Monit HTTP API. """ + """ Stops a service. """ parser = argparse.ArgumentParser() parser.add_argument('service', help='The service to stop') args = parser.parse_args() - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - try: - monit_operator = MonitOperator() - monit_retry = retry(max_retries=5, retry_on_exception=DEFAULT_RETRIES) - send_w_retries = monit_retry(monit_operator.send_command_sync) - send_w_retries(args.service, 'stop') - except ProcessNotFound as e: - logger.info(str(e)) - sys.exit(1) + service_helper.stop(args.service) @gen.coroutine def main_async(): - """ Tries to stop all Monit services until they are stopped. """ - monit_operator = MonitOperator() - hostname = socket.gethostname() - - logger.info('Waiting for monit to stop services') - logged_service_warning = False + """ Tries to stop all appscale services until they are stopped. """ + @retry_coroutine(max_retries=3) + def stop_with_retries(): + logger.debug('Stopping AppScale services') + yield service_operator.start_async('appscale-down.target', enable=False) + + logger.info('Waiting for services to stop') + service_operator = ServiceOperator() + stop_requested = False + original_services_count = None stopped_count = 0 while True: - entries = yield monit_operator.get_entries() - services = {service: state for service, state in entries.items() - if 'cron' not in service and service != hostname} + services = yield service_operator.list_async() + + if original_services_count is None: + original_services_count = len(services) + running = {service: state for service, state in services.items() - if state not in (MonitStates.STOPPED, MonitStates.UNMONITORED)} + if state not in ('stopped')} + if not running: logger.info('Finished stopping services') break - if len(services) - len(running) != stopped_count: - stopped_count = len(services) - len(running) + if original_services_count - len(running) != stopped_count: + stopped_count = original_services_count - len(running) logger.info( - 'Stopped {}/{} services'.format(stopped_count, len(services))) - - try: - ordered_services, unrecognized_services = order_services(running.keys()) - if unrecognized_services and not logged_service_warning: - logger.warning( - 'Unrecognized running services: {}'.format(unrecognized_services)) - logged_service_warning = True - - ordered_services.append(unrecognized_services) - for parallel_group in ordered_services: - running = [process for process in parallel_group - if services[process] != MonitStates.PENDING] - if running: - break - else: - continue - - @retry_coroutine(max_retries=5, retry_on_exception=DEFAULT_RETRIES) - def stop_with_retries(process_name): - logger.debug('Sending command to stop "{}"..'.format(process_name)) - yield monit_operator.send_command(process_name, 'stop') - - yield [stop_with_retries(process) for process in running] - except StopIteration: - # If all running services are pending, just wait until they are not. - pass + 'Stopped {}/{} services'.format(stopped_count, original_services_count)) + + if not stop_requested: + yield stop_with_retries() + stop_requested = True yield gen.sleep(min(0.3 * len(running), 5)) diff --git a/AdminServer/appscale/admin/summary.py b/AdminServer/appscale/admin/summary.py index cf303ef719..a11980f405 100644 --- a/AdminServer/appscale/admin/summary.py +++ b/AdminServer/appscale/admin/summary.py @@ -2,20 +2,14 @@ from tornado.httpclient import HTTPClient from appscale.admin.service_manager import ServiceManager -from appscale.common.monit_interface import MonitOperator, parse_entries -def get_combined_services(): - """ Merge list of services from Monit and ServiceManager. +def get_services(): + """ Get dictionary of services from ServiceManager. Returns: A dictionary mapping service name to service state. """ - http_client = HTTPClient() - status_url = '{}/_status?format=xml'.format(MonitOperator.LOCATION) - response = http_client.fetch(status_url) - servers = parse_entries(response.body) - - servers.update({'-'.join([server.type, str(server.port)]): server.state - for server in ServiceManager.get_state()}) + servers = {'-'.join([server.type, str(server.port)]): server.state + for server in ServiceManager.get_state()} return servers diff --git a/AdminServer/setup.py b/AdminServer/setup.py index ecc5f08bd0..4e3347c49f 100644 --- a/AdminServer/setup.py +++ b/AdminServer/setup.py @@ -48,7 +48,6 @@ entry_points={'console_scripts': [ 'appscale-admin=appscale.admin:main', 'appscale-instance-manager=appscale.admin.instance_manager.server:main', - 'appscale-stop-instance=appscale.admin.instance_manager.stop_instance:main', 'appscale-stop-services=appscale.admin.stop_services:main', 'appscale-stop-service=appscale.admin.stop_services:stop_service', 'appscale-start-service=appscale.admin.stop_services:start_service' diff --git a/AdminServer/tests/test_instance_manager.py b/AdminServer/tests/test_instance_manager.py index d890b6bf3d..31ac99e173 100644 --- a/AdminServer/tests/test_instance_manager.py +++ b/AdminServer/tests/test_instance_manager.py @@ -25,11 +25,10 @@ file_io, appscale_info, misc, - monit_interface, + service_helper, testing ) -from appscale.common import monit_app_configuration -from appscale.common.monit_interface import MonitOperator +from appscale.common.service_helper import ServiceOperator options.define('login_ip', '127.0.0.1') options.define('syslog_server', '127.0.0.1') @@ -71,23 +70,16 @@ def test_start_app_goodconfig_python(self): source_manager, None, None, None) instance_manager._login_server = '192.168.33.10' - flexmock(monit_app_configuration).should_receive('create_config_file').\ - and_return('fakeconfig') - response = Future() - response.set_result(None) + response.set_result((19999, [])) flexmock(instance_manager).should_receive('_ensure_api_server').\ and_return(response) - response = Future() - response.set_result(None) - flexmock(MonitOperator).should_receive('reload').\ - and_return(response) + flexmock(file_io).should_receive('write').and_return() response = Future() response.set_result(None) - flexmock(MonitOperator).should_receive('send_command_retry_process').\ - with_args('app___test_default_v1_1-20000', 'start').\ + flexmock(ServiceOperator).should_receive('start_async').\ and_return(response) response = Future() @@ -106,9 +98,8 @@ def test_start_app_goodconfig_python(self): instance_manager._zk_client = flexmock() instance_manager._zk_client.should_receive('ensure_path') - instance_manager._monit_operator = flexmock( - reload=lambda x: response, - send_command_retry_process=lambda watch, cmd: response) + instance_manager._service_operator = flexmock( + start_async=lambda service, wants, properties: response) yield instance_manager._start_instance(version_manager, 20000) @@ -145,24 +136,17 @@ def test_start_app_goodconfig_java(self): flexmock(instance).should_receive('create_java_start_cmd').\ and_return(start_cmd) - flexmock(monit_app_configuration).should_receive('create_config_file').\ - and_return('fakeconfig') - response = Future() - response.set_result(None) + response.set_result((19999, [])) flexmock(instance_manager).should_receive('_ensure_api_server').\ and_return(response) - response = Future() - response.set_result(None) - flexmock(MonitOperator).should_receive('reload').\ - and_return(response) + flexmock(file_io).should_receive('write').and_return() response = Future() response.set_result(None) - flexmock(MonitOperator).should_receive('send_command_retry_process').\ - with_args('app___test_default_v1_1-20000', 'start').\ - and_return(response) + flexmock(ServiceOperator).should_receive('start_async').\ + and_return(response) response = Future() response.set_result(None) @@ -188,9 +172,8 @@ def test_start_app_goodconfig_java(self): response = Future() response.set_result(None) - instance_manager._monit_operator = flexmock( - reload=lambda x: response, - send_command_retry_process=lambda watch, cmd: response) + instance_manager._service_operator = flexmock( + start_async=lambda service, wants, properties: response) yield instance_manager._start_instance(version_manager, 20000) @@ -220,10 +203,15 @@ def test_start_app_failed_copy_java(self): and_return(response) instance_manager._source_manager = source_manager - flexmock(utils).should_receive('find_web_inf').\ - and_return('/path/to/dir/WEB-INF') - flexmock(monit_app_configuration).should_receive('create_config_file').\ - and_raise(IOError) + flexmock(instance).should_receive('find_web_inf'). \ + and_return('/path/to/dir/WEB-INF') + + response = Future() + response.set_result((19999, [])) + flexmock(instance_manager).should_receive('_ensure_api_server'). \ + and_return(response) + + flexmock(file_io).should_receive('write').and_raise(IOError) with self.assertRaises(IOError): yield instance_manager._start_instance(version_manager, 20000) @@ -267,33 +255,8 @@ def test_stop_app_instance(self): response.set_result(None) instance_manager._routing_client = flexmock( unregister_instance=lambda instance: response) - flexmock(MonitOperator).should_receive('send_command_sync').\ - with_args('app___test_default_v1-20000', 'unmonitor').\ - and_raise(HTTPError) - - unmonitor_future = Future() - unmonitor_future.set_exception(HTTPError(500)) - flexmock(instance_manager).should_receive('_unmonitor_and_terminate').\ - and_return(unmonitor_future) - - entries_response = Future() - entries_response.set_result(['app___test_default_v1_revid-20000']) - instance_manager._monit_operator = flexmock( - get_entries=lambda: entries_response, - reload=lambda x: response) - - with self.assertRaises(HTTPError): - yield instance_manager._stop_app_instance( - instance.Instance('_'.join([version_key, 'revid']), port)) - - flexmock(MonitOperator).should_receive('send_command_sync').\ - with_args('app___test_default_v1-20000', 'unmonitor') - flexmock(os).should_receive('remove') - flexmock(monit_interface).should_receive('safe_monit_run') - - response = Future() - response.set_result(None) - flexmock(MonitOperator).should_receive('reload').\ + flexmock(ServiceOperator).should_receive('stop_async').\ + with_args('appscale-instance-run@test_default_v1-20000').\ and_return(response) response = Future() @@ -301,10 +264,8 @@ def test_stop_app_instance(self): flexmock(instance_manager).should_receive('_clean_old_sources').\ and_return(response) - response = Future() - response.set_result(None) - flexmock(instance_manager).should_receive('_unmonitor_and_terminate').\ - and_return(response) + instance_manager._service_operator = flexmock( + stop_async=lambda service: response) yield instance_manager._stop_app_instance( instance.Instance('_'.join([version_key, 'revid']), port)) From 5fa9ba130ebd18355b3578fa25850e4ecd5f2073 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:07 -0700 Subject: [PATCH 087/221] Updates for systemd, replace app controller monit use --- AppController/djinn.rb | 133 +++++-------- AppController/lib/blobstore.rb | 26 ++- AppController/lib/cassandra_helper.rb | 28 +-- AppController/lib/ejabberd.rb | 31 +-- AppController/lib/groomer_service.rb | 33 ++-- AppController/lib/haproxy.rb | 26 +-- AppController/lib/helperfunctions.rb | 3 + AppController/lib/monit_interface.rb | 273 -------------------------- AppController/lib/nginx.rb | 45 ++--- AppController/lib/search.rb | 27 +-- AppController/lib/service_helper.rb | 83 ++++++++ AppController/lib/taskqueue.rb | 99 +++------- AppController/lib/zookeeper_helper.rb | 17 +- AppController/terminate.rb | 27 ++- AppController/test/tc_djinn.rb | 10 +- 15 files changed, 267 insertions(+), 594 deletions(-) delete mode 100644 AppController/lib/monit_interface.rb create mode 100644 AppController/lib/service_helper.rb diff --git a/AppController/djinn.rb b/AppController/djinn.rb index e3685f99a8..54a0fd66c5 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -37,7 +37,7 @@ require 'helperfunctions' require 'hermes_client' require 'infrastructure_manager_client' -require 'monit_interface' +require 'service_helper' require 'nginx' require 'search' require 'taskqueue' @@ -207,7 +207,6 @@ class Djinn # "used" => 886620160 # }, # "services" => { - # # For each Process monitored by monit # "cassandra" => "Running", # ... # }, @@ -572,9 +571,6 @@ def initialize @nodes = [] @options = {} @last_decision = {} - - # Make sure monit is started. - MonitInterface.start_monit end # A SOAP-exposed method that callers can use to determine if this node @@ -1837,12 +1833,6 @@ def job_start(secret) # Mark the beginning of the duty cycle. start_work_time = Time.now.to_i - # We want to ensure monit stays up all the time, since we rely on - # it for services and AppServers. - unless MonitInterface.start_monit - Djinn.log_warn('Monit was not running: restarted it.') - end - write_database_info update_port_files update_firewall @@ -2106,19 +2096,18 @@ def receive_server_message(timeout, secret) # a SOAP interface by which we can dynamically add and remove nodes in this # AppScale deployment. def start_infrastructure_manager - script = `which appscale-infrastructure`.chomp - service_port = 17444 - start_cmd = "#{script} -p #{service_port}" - start_cmd << ' --autoscaler' if my_node.is_shadow? - start_cmd << ' --verbose' if @options['verbose'].downcase == 'true' + service_env = {} + service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if @options['verbose'].downcase == 'true' + ServiceHelper.write_environment('appscale-infrastructure', service_env) - MonitInterface.start(:iaas_manager, start_cmd) + instance_name = if my_node.is_shadow? then 'shadow' else 'basic' end + ServiceHelper.start("appscale-infrastructure@#{instance_name}") Djinn.log_info("Started InfrastructureManager successfully!") end def stop_infrastructure_manager Djinn.log_info("Stopping InfrastructureManager") - MonitInterface.stop(:iaas_manager) + ServiceHelper.stop('appscale-infrastructure@') end def get_online_users_list(secret) @@ -3562,19 +3551,19 @@ def start_taskqueue_slave # starting and stopping applications. def start_app_manager_server @state = "Starting up AppManager" - app_manager_script = `which appscale-instance-manager`.chomp - start_cmd = "#{PYTHON27} #{app_manager_script}" - MonitInterface.start(:appmanagerserver, start_cmd) + ServiceHelper.start('appscale-instance-manager') end # Starts the Hermes service on this node. def start_hermes @state = "Starting Hermes" Djinn.log_info("Starting Hermes service.") - script = `which appscale-hermes`.chomp - start_cmd = "/usr/bin/python2 #{script}" - start_cmd << ' --verbose' if @options['verbose'].downcase == 'true' - MonitInterface.start(:hermes, start_cmd) + + service_env = {} + service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if @options['verbose'].downcase == 'true' + ServiceHelper.write_environment('appscale-hermes', service_env) + + ServiceHelper.start('appscale-hermes') if my_node.is_shadow? nginx_port = 17441 service_port = 4378 @@ -3603,24 +3592,12 @@ def start_soap_server } HelperFunctions.log_and_crash("db master ip was nil") if db_master_ip.nil? - db_local_ip = my_node.private_ip - - table = @options['table'] - - env_vars = {} - - env_vars['APPSCALE_HOME'] = APPSCALE_HOME - env_vars['MASTER_IP'] = db_master_ip - env_vars['LOCAL_DB_IP'] = db_local_ip - - if table == "simpledb" - env_vars['SIMPLEDB_ACCESS_KEY'] = @options['SIMPLEDB_ACCESS_KEY'] - env_vars['SIMPLEDB_SECRET_KEY'] = @options['SIMPLEDB_SECRET_KEY'] - end - - soap_script = `which appscale-uaserver`.chomp - start_cmd = "#{soap_script} -t #{table}" - MonitInterface.start(:uaserver, start_cmd, nil, env_vars) + service_env = { + MASTER_IP: db_master_ip, + LOCAL_DB_IP: my_node.private_ip + } + ServiceHelper.write_environment('appscale-uaserver', service_env) + ServiceHelper.start('appscale-uaserver') end def assign_datastore_processes @@ -3679,31 +3656,13 @@ def assign_search2_processes # Starts the Log Server service on this machine def start_log_server - log_server_pid = '/run/appscale/log_service.pid' - log_server_file = '/var/log/appscale/log_service.log' - twistd = `which twistd`.chomp - env = `which env`.chomp - bash = `which bash`.chomp - - env_vars = { - 'APPSCALE_HOME' => APPSCALE_HOME, - 'PYTHONPATH' => "#{APPSCALE_HOME}/LogService/" - } - start_cmd = [env, env_vars.map{ |k, v| "#{k}=#{v}" }.join(' '), - twistd, - '--pidfile', log_server_pid, - '--logfile', log_server_file, - 'appscale-logserver'].join(' ') - stop_cmd = "#{bash} -c 'kill $(cat #{log_server_pid})'" - - MonitInterface.start_daemon(:log_service, start_cmd, stop_cmd, - log_server_pid) + ServiceHelper.start('appscale-logserver') Djinn.log_info("Started Log Server successfully!") end def stop_log_server Djinn.log_info("Stopping Log Server") - MonitInterface.stop(:log_service) + ServiceHelper.stop('appscale-logserver') end # Stops the blobstore server. @@ -3713,12 +3672,12 @@ def stop_blobstore_server # Stops the User/Apps soap server. def stop_soap_server - MonitInterface.stop(:uaserver) + ServiceHelper.stop('appscale-uaserver') end # Stops the AppManager service def stop_app_manager_server - MonitInterface.stop(:appmanagerserver) + ServiceHelper.stop('appscale-instance-manager') end # Stops the groomer service. @@ -4490,13 +4449,13 @@ def start_appcontroller(node) def start_admin_server Djinn.log_info('Starting AdminServer') - script = `which appscale-admin`.chomp nginx_port = 17441 service_port = 17442 - start_cmd = "#{script} serve -p #{service_port}" - start_cmd << ' --verbose' if @options['verbose'].downcase == 'true' - MonitInterface.start(:admin_server, start_cmd, nil, - {'PATH' => ENV['PATH']}) + + service_env = {} + service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if @options['verbose'].downcase == 'true' + ServiceHelper.write_environment('appscale-admin', service_env) + ServiceHelper.start('appscale-admin') if my_node.is_load_balancer? Nginx.add_service_location('appscale-administration', my_node.private_ip, service_port, nginx_port, '/') @@ -4506,26 +4465,18 @@ def start_admin_server def start_memcache @state = "Starting up memcache" Djinn.log_info("Starting up memcache") - port = 11211 - start_cmd = "/usr/bin/memcached -m 64 -p #{port} -u root" - MonitInterface.start(:memcached, start_cmd) + ServiceHelper.start('appscale-memcached') end def stop_memcache - MonitInterface.stop(:memcached) if MonitInterface.is_running?(:memcached) + ServiceHelper.stop('appscale-memcached') end def start_ejabberd @state = "Starting up XMPP server" Djinn.log_run("rm -f /var/lib/ejabberd/*") Ejabberd.write_config_file(@options['login'], my_node.private_ip) - Ejabberd.update_ctl_config - - # Monit does not have an entry for ejabberd yet. This allows a restart - # with the new configuration if it is already running. - `systemctl stop ejabberd` - - Ejabberd.start + Ejabberd.start(true) end def stop_ejabberd @@ -4749,8 +4700,8 @@ def check_stopped_apps # Load balancers have to adjust nginx and haproxy to remove the # application routings. if my_node.is_load_balancer? - MonitInterface.running_xmpp.each { |xmpp_app| - match = xmpp_app.match(/xmpp-(.*)/) + ServiceHelper.running('appscale-xmpp@').each { |xmpp_app| + match = xmpp_app.match(/appscale-xmpp@(.*)/) next if match.nil? project_id = match.captures.first @@ -5823,10 +5774,10 @@ def fetch_revision(revision_key) # This function creates the xmpp account for 'app', as app@login_ip. def start_xmpp_for_app(app) - watch_name = "xmpp-#{app}" + service_name = "appscale-xmpp@#{app}" # If we have it already running, nothing to do - if MonitInterface.is_running?(watch_name) + if ServiceHelper.is_running?(service_name) Djinn.log_debug("xmpp already running for application #{app}") return end @@ -5862,11 +5813,13 @@ def start_xmpp_for_app(app) "[#{@@secret}] and hashed password [#{xmpp_pass}]") if Ejabberd.does_app_need_receive?(app) - start_cmd = "#{PYTHON27} #{APPSCALE_HOME}/XMPPReceiver/" \ + Djinn.log_debug("App #{app} does need xmpp receive functionality") + xmpp_command_content = "exec #{PYTHON27} #{APPSCALE_HOME}/XMPPReceiver/" \ "xmpp_receiver.py #{app} #{login_ip} " \ "#{get_load_balancer.private_ip} #{@@secret}" - MonitInterface.start(watch_name, start_cmd) - Djinn.log_debug("App #{app} does need xmpp receive functionality") + xmpp_command_path = "/run/appscale/apps/xmpp_command_#{app}" + HelperFunctions.write_file(xmpp_command_path, xmpp_command_content) + ServiceHelper.start("appscale-xmpp@#{app}") else Djinn.log_debug("App #{app} does not need xmpp receive functionality") end @@ -5878,7 +5831,9 @@ def start_xmpp_for_app(app) # app: The application ID whose XMPPReceiver we should shut down. def stop_xmpp_for_app(app) Djinn.log_info("Shutting down xmpp receiver for app: #{app}") - MonitInterface.stop("xmpp-#{app}") if MonitInterface.is_running?("xmpp-#{app}") + ServiceHelper.stop("appscale-xmpp@#{app}") + xmpp_command_path = "/run/appscale/apps/xmpp_command_#{app}" + FileUtils.rm_rf(xmpp_command_path) Djinn.log_info("Done shutting down xmpp receiver for app: #{app}") end diff --git a/AppController/lib/blobstore.rb b/AppController/lib/blobstore.rb index 91fb3acd89..a4ab8c2b43 100644 --- a/AppController/lib/blobstore.rb +++ b/AppController/lib/blobstore.rb @@ -2,7 +2,7 @@ $:.unshift File.join(File.dirname(__FILE__)) require 'helperfunctions' -require 'monit_interface' +require 'service_helper' # To support the Google App Engine Blobstore API, we have a custom server that # handles Blobstore API requests, known as the Blobstore Server. This module @@ -17,30 +17,28 @@ module BlobServer # engine node. HAPROXY_PORT = 6106 + # Service name for use with helper + SERVICE_NAME = 'appscale-blobstore'.freeze + # The server name used for HAProxy configuration. NAME = 'as_blob_server'.freeze def self.start(db_local_ip, db_local_port) - start_cmd = [ - scriptname.to_s, - "-d #{db_local_ip}:#{db_local_port}", - "-p #{self::SERVER_PORT}" - ].join(' ') - - MonitInterface.start(:blobstore, start_cmd) + service_env = { + APPSCALE_BLOBSTORE_PORT: SERVER_PORT, + APPSCALE_DATASTORE_SERVICE: "#{db_local_ip}:#{db_local_port}" + } + ServiceHelper.write_environment(SERVICE_NAME, service_env) + ServiceHelper.start(SERVICE_NAME) end def self.stop - MonitInterface.stop(:blobstore) + ServiceHelper.stop(SERVICE_NAME) end def self.is_running? - output = MonitInterface.is_running?(:blobstore) + output = ServiceHelper.is_running?(SERVICE_NAME) Djinn.log_debug("Checking if blobstore is already monitored: #{output}") output end - - def self.scriptname - `which appscale-blobstore-server`.chomp - end end diff --git a/AppController/lib/cassandra_helper.rb b/AppController/lib/cassandra_helper.rb index cc0303f571..da37ccd74c 100644 --- a/AppController/lib/cassandra_helper.rb +++ b/AppController/lib/cassandra_helper.rb @@ -2,7 +2,7 @@ require 'djinn' require 'node_info' require 'helperfunctions' -require 'monit_interface' +require 'service_helper' require 'set' # A String that indicates where we write the process ID that Cassandra runs @@ -12,9 +12,8 @@ # A String that indicates where we install Cassandra on this machine. CASSANDRA_DIR = '/opt/cassandra'.freeze -# A String that indicates where the Cassandra binary is located on this -# machine. -CASSANDRA_EXECUTABLE = "#{CASSANDRA_DIR}/cassandra/bin/cassandra".freeze +# Name for service as per helper. +CASSANDRA_SERVICE_NAME = "appscale-cassandra".freeze # The location of the script that sets up Cassandra's config files. SETUP_CONFIG_SCRIPT = "#{APPSCALE_HOME}/scripts/setup_cassandra_config_files.py".freeze @@ -25,7 +24,7 @@ # The location of the script that creates the initial tables. PRIME_SCRIPT = `which appscale-prime-cassandra`.chomp -# The number of seconds Monit should allow Cassandra to take while starting up. +# The number of seconds to allow Cassandra to take while starting up. START_TIMEOUT = 60 # The location of the Cassandra data directory. @@ -129,19 +128,12 @@ def start_cassandra(clear_datastore, needed, desired, heap_reduction) Djinn.log_run("rm -rf #{CASSANDRA_DATA_DIR}") end - # Create Cassandra data directory. - Djinn.log_run("mkdir -p #{CASSANDRA_DATA_DIR}") - Djinn.log_run("chown -R cassandra #{CASSANDRA_DATA_DIR}") - - su = `which su`.chomp - cmd = "#{CASSANDRA_EXECUTABLE} -p #{PID_FILE}" + service_env = {} if heap_reduction > 0 - cmd = "HEAP_REDUCTION=#{heap_reduction} #{cmd}" + service_env[:HEAP_REDUCTION] = heap_reduction end - - start_cmd = "#{su} -c '#{cmd}' cassandra" - stop_cmd = "/bin/bash -c 'kill $(cat #{PID_FILE})'" - MonitInterface.start_daemon(:cassandra, start_cmd, stop_cmd, PID_FILE) + ServiceHelper.write_environment(CASSANDRA_SERVICE_NAME, service_env) + ServiceHelper.start(CASSANDRA_SERVICE_NAME) # Ensure enough Cassandra nodes are available. Djinn.log_info('Waiting for Cassandra to start') @@ -151,13 +143,13 @@ def start_cassandra(clear_datastore, needed, desired, heap_reduction) # Kills Cassandra on this machine. def stop_db_master Djinn.log_info('Stopping Cassandra master') - MonitInterface.stop(:cassandra) + ServiceHelper.stop(CASSANDRA_SERVICE_NAME) end # Kills Cassandra on this machine. def stop_db_slave Djinn.log_info('Stopping Cassandra slave') - MonitInterface.stop(:cassandra) + ServiceHelper.stop(CASSANDRA_SERVICE_NAME) end # Calculates the number of nodes needed for a quorum for every token. diff --git a/AppController/lib/ejabberd.rb b/AppController/lib/ejabberd.rb index 5515e963a7..6656716964 100644 --- a/AppController/lib/ejabberd.rb +++ b/AppController/lib/ejabberd.rb @@ -5,7 +5,7 @@ $:.unshift File.join(File.dirname(__FILE__)) require 'node_info' require 'helperfunctions' -require 'monit_interface' +require 'service_helper' # Our implementation of the Google App Engine XMPP and Channel APIs uses the # open source ejabberd server. This module provides convenience methods to @@ -22,18 +22,18 @@ class UnknownVersion < StandardError; end ONLINE_USERS_FILE = '/etc/appscale/online_xmpp_users'.freeze - def self.start - systemctl = `which systemctl`.chomp - start_cmd = "#{systemctl} start ejabberd" - stop_cmd = "#{systemctl} stop ejabberd" - pidfile = '/run/ejabberd/ejabberd.pid' + SERVICE_NAME = 'appscale-ejabberd.target'.freeze - self.ensure_correct_epmd - MonitInterface.start_daemon(:ejabberd, start_cmd, stop_cmd, pidfile) + def self.start(reload = false) + if reload + ServiceHelper.reload(SERVICE_NAME, true) + else + ServiceHelper.start(SERVICE_NAME) + end end def self.stop - MonitInterface.stop(:ejabberd) if MonitInterface.is_running?(:ejabberd) + ServiceHelper.stop(SERVICE_NAME) end def self.clear_online_users @@ -67,7 +67,7 @@ def self.ensure_correct_epmd() next end } - `systemctl start epmd` + `systemctl start epmd.service` end rescue Errno::ENOENT # Distros without systemd don't have systemctl, and they do not exhibit @@ -110,17 +110,6 @@ def self.get_ejabberd_version major_version end - def self.update_ctl_config - # Make sure ejabberd writes a pidfile. - begin - config = File.read(CONFIG_FILE) - config.gsub!('#EJABBERD_PID_PATH=', 'EJABBERD_PID_PATH=') - File.open(CONFIG_FILE, 'w') { |file| file.write(config) } - rescue Errno::ENOENT - Djinn.log_debug("#{CONFIG_FILE} does not exist") - end - end - def self.write_config_file(domain, my_private_ip) config_file = 'ejabberd.yml' begin diff --git a/AppController/lib/groomer_service.rb b/AppController/lib/groomer_service.rb index c64d1e4092..273f9a768f 100644 --- a/AppController/lib/groomer_service.rb +++ b/AppController/lib/groomer_service.rb @@ -3,39 +3,36 @@ $:.unshift File.join(File.dirname(__FILE__)) require 'helperfunctions' -require 'monit_interface' +require 'service_helper' # Starts and stops the datastore groomer service. module GroomerService - # This variable is the maximum memory allowed for the groomer process. - MAX_MEM = 512 + # Groomer service name for use with helper + SERVICE_NAME_GROOMER = 'appscale-groomer'.freeze - # Starts the Groomer Service on this machine. We don't want to monitor - # it ourselves, so just tell monit to start it and watch it. + # Transaction groomer service name for use with helper + SERVICE_NAME_TX_GROOMER = 'appscale-transaction-groomer'.freeze + + # Starts the Groomer Service on this machine. def self.start() - start_cmd = self.scriptname - MonitInterface.start(:groomer_service, start_cmd, nil, nil, MAX_MEM) + ServiceHelper.start(SERVICE_NAME_GROOMER) end - # Stops the groomer service running on this machine. Since it's - # managed by monit, just tell monit to shut it down. + # Stops the groomer service running on this machine. def self.stop() - MonitInterface.stop(:groomer_service) - end - - def self.scriptname() - return `which appscale-groomer-service`.chomp + ServiceHelper.stop(SERVICE_NAME_GROOMER) end def self.start_transaction_groomer(verbose) - start_cmd = `which appscale-transaction-groomer`.chomp - start_cmd << ' --verbose' if verbose - MonitInterface.start(:transaction_groomer, start_cmd, nil, nil, MAX_MEM) + service_env = {} + service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if verbose + ServiceHelper.write_environment(SERVICE_NAME_TX_GROOMER, service_env) + ServiceHelper.start(SERVICE_NAME_TX_GROOMER) end def self.stop_transaction_groomer - MonitInterface.stop(:transaction_groomer) + ServiceHelper.stop(SERVICE_NAME_TX_GROOMER) end end diff --git a/AppController/lib/haproxy.rb b/AppController/lib/haproxy.rb index 8fdbe40fad..d9e64a9e70 100644 --- a/AppController/lib/haproxy.rb +++ b/AppController/lib/haproxy.rb @@ -6,7 +6,7 @@ $:.unshift File.join(File.dirname(__FILE__)) require 'helperfunctions' require 'app_dashboard' -require 'monit_interface' +require 'service_helper' require 'user_app_client' require 'datastore_server' require 'taskqueue' @@ -32,7 +32,6 @@ module HAProxy SERVICE_SITES_PATH = File.join(HAPROXY_PATH, 'service-sites-enabled') SERVICE_MAIN_FILE = File.join(HAPROXY_PATH, "service-haproxy.#{CONFIG_EXTENSION}") SERVICE_BASE_FILE = File.join(HAPROXY_PATH, "service-base.#{CONFIG_EXTENSION}") - SERVICE_PIDFILE = '/run/appscale/service-haproxy.pid'.freeze # Maximum AppServer threaded connections MAX_APPSERVER_CONN = 7 @@ -63,6 +62,9 @@ module HAProxy # the specific server is specified. SERVER_STATUS_INDEX = 17 + # Name for service as per helper. + SERVICE_NAME = 'appscale-haproxy'.freeze + # The position in the haproxy profiling information where the total # number of requests seen for a given app is specified. TOTAL_REQUEST_RATE_INDEX = 48 @@ -83,14 +85,7 @@ def self.services_start Djinn.log_warn('Invalid configuration for HAProxy services.') return end - return if MonitInterface.is_running?(:service_haproxy) - - start_cmd = "#{HAPROXY_BIN} -f #{SERVICE_MAIN_FILE} -D " \ - "-p #{SERVICE_PIDFILE}" - stop_cmd = "#{BASH_BIN} -c 'kill $(cat #{SERVICE_PIDFILE})'" - restart_cmd = "#{BASH_BIN} -c '#{start_cmd} -sf $(cat #{SERVICE_PIDFILE})'" - MonitInterface.start_daemon( - :service_haproxy, start_cmd, stop_cmd, SERVICE_PIDFILE, nil, restart_cmd) + ServiceHelper.start(SERVICE_NAME) end # Create the config file for UserAppServer. @@ -220,15 +215,8 @@ def self.regenerate_config if regenerate_config_file(SERVICE_SITES_PATH, SERVICE_BASE_FILE, SERVICE_MAIN_FILE) - # Ensure the service is monitored and running. - services_start - Djinn::RETRIES.downto(0) { - break if MonitInterface.is_running?(:service_haproxy) - sleep(Djinn::SMALL_WAIT) - } - - # Reload with the new configuration file. - MonitInterface.restart(:service_haproxy) + # Reload with the new configuration file, start if not running + ServiceHelper.reload(SERVICE_NAME, true) end end diff --git a/AppController/lib/helperfunctions.rb b/AppController/lib/helperfunctions.rb index 7dfdd98fd4..9010f7986e 100644 --- a/AppController/lib/helperfunctions.rb +++ b/AppController/lib/helperfunctions.rb @@ -42,6 +42,9 @@ module HelperFunctions APPSCALE_KEYS_DIR = "#{APPSCALE_CONFIG_DIR}/keys/cloud1".freeze + # Temporary location for runtime files + APPSCALE_RUN_DIR = '/run/appscale'.freeze + # Generic sleep time to take while waiting for remote operation to # complete. SLEEP_TIME = 10 diff --git a/AppController/lib/monit_interface.rb b/AppController/lib/monit_interface.rb deleted file mode 100644 index 4beca9628c..0000000000 --- a/AppController/lib/monit_interface.rb +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/ruby -w - -# Imports within Ruby's standard libraries. -require 'monitor' -require 'tmpdir' - -# Imports AppScale's libraries. -require 'helperfunctions' - -# Where we save the configuration file. -MONIT_CONFIG = '/run/appscale/monit.conf.d'.freeze - -# Monit is finicky when it comes to multiple commands at the same time. -# Let's make sure we serialize access. -MONIT_LOCK = Monitor.new - -# AppScale uses monit to start processes, restart them if they die, or kill and -# restart them if they take up too much CPU or memory. This module abstracts -# away interfacing with monit directly. -module MonitInterface - # The location on the local filesystem of the monit executable. - MONIT = '/usr/bin/monit'.freeze - - def self.start_monit - ret = system('systemctl is-active monit > /dev/null') - run_cmd('systemctl start monit') unless ret - ret - end - - # Starts a basic service. The start_cmd should be designed to run in the - # foreground, and it should not create its own pidfile. - def self.start(watch, start_cmd, ports = nil, env_vars = nil, mem = nil) - reload_monit = false - ports = [nil] if ports.nil? - ports.each { |port| - # Convert symbol to string. - process_name = watch.to_s - full_start_cmd = start_cmd - unless port.nil? - full_start_cmd += " -p #{port}" - process_name += "-#{port}" - end - - new_config = service_config(process_name, watch, full_start_cmd, - env_vars, mem) - - monit_file = "#{MONIT_CONFIG}/appscale-#{process_name}.cfg" - reload_required = update_config(monit_file, new_config) - reload_monit = true if reload_required - - Djinn.log_info("Starting #{process_name} with command #{full_start_cmd}") - } - - run_cmd("#{MONIT} reload", true) if reload_monit - ports.each { |port| - process_name = if port.nil? then watch.to_s else "#{watch.to_s}-#{port}" end - run_cmd("appscale-start-service #{process_name}") - } - end - - # Starts a daemonized service. The start_cmd should be designed to start a - # background process, and it should create its own pidfile. - def self.start_daemon(watch, start_cmd, stop_cmd, pidfile, - start_timeout = nil, restart_cmd = nil) - timeout_suffix = "with timeout #{start_timeout} seconds" if start_timeout - if restart_cmd - config = < 1 - Djinn.log_info("Found multiple monit config matches for #{watch}:" \ - " #{config}.") - end - FileUtils.rm_rf(config) - run_cmd("#{MONIT} reload", true) - end - - def self.service_config(process_name, group, start_cmd, env_vars, mem) - # Monit doesn't support environment variables in its DSL, so if the caller - # wants environment variables passed to the app, we have to collect them and - # prepend it to the executable string. - env_vars_str = '' - unless env_vars.nil? || env_vars.empty? - env_vars.each { |key, value| - env_vars_str += "#{key}=#{value} " - } - end - - # Use start-stop-daemon to handle pidfiles and start process in background. - start_stop_daemon = `which start-stop-daemon`.chomp - - # Use bash to redirect the process's output to a log file. - bash = `which bash`.chomp - rm = `which rm`.chomp - - pidfile = "/run/appscale/#{process_name}.pid" - logfile = "/var/log/appscale/#{process_name}.log" - bash_exec = "exec env #{env_vars_str} #{start_cmd} >> #{logfile} 2>&1" - - start_args = ['--start', - '--background', - '--make-pidfile', - '--pidfile', pidfile, - '--startas', "#{bash} -- -c 'unset \"${!MONIT_@}\"; #{bash_exec}'"] - - stop_cmd = "#{start_stop_daemon} --stop --pidfile #{pidfile} " \ - "--retry=TERM/20/KILL/5 && #{rm} #{pidfile}" - - contents = < error - Djinn.log_warn("Error while getting rabbitmq version: #{error.message}") - end - - Djinn.log_run("mkdir -p #{CELERY_STATE_DIR}") - systemctl = `which systemctl`.chomp - start_cmd = "#{systemctl} start rabbitmq-server" - stop_cmd = "#{systemctl} stop rabbitmq-server" - Ejabberd.ensure_correct_epmd - MonitInterface.start_daemon(:rabbitmq, start_cmd, stop_cmd, pidfile, - MAX_WAIT_FOR_RABBITMQ) + ServiceHelper.start(SERVICE_NAME_RABBITMQ) end # Starts a service that we refer to as a "taskqueue_master", a RabbitMQ @@ -225,13 +197,12 @@ def self.start_slave(master_ip, clear_data, verbose) # Starts the AppScale TaskQueue server. def self.start_taskqueue_server(verbose) + service_env = {} + service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if verbose + ServiceHelper.write_environment('appscale-taskqueue', service_env) Djinn.log_debug('Starting taskqueue servers on this node') ports = get_server_ports - - start_cmd = TASKQUEUE_SERVER_SCRIPT - start_cmd << ' --verbose' if verbose - env_vars = { PATH: '$PATH:/usr/local/bin' } - MonitInterface.start(:taskqueue, start_cmd, ports, env_vars) + ServiceHelper.start(SERVICE_NAME_TASKQUEUE, ports) Djinn.log_debug('Done starting taskqueue servers on this node') end @@ -246,17 +217,19 @@ def self.stop " celery.control.broadcast('shutdown', connection=conn)" stop_cmd = %Q(/usr/bin/python2 -c "#{stop_script}") Djinn.log_run(stop_cmd) - Djinn.log_debug('Shutting down RabbitMQ') - MonitInterface.stop(:rabbitmq) + stop_rabbitmq stop_taskqueue_server end + def self.stop_rabbitmq + Djinn.log_debug('Shutting down RabbitMQ') + ServiceHelper.stop(SERVICE_NAME_RABBITMQ) + end + # Stops the AppScale TaskQueue server. def self.stop_taskqueue_server Djinn.log_debug('Stopping taskqueue servers on this node') - self.get_server_ports.each do |port| - MonitInterface.stop("taskqueue-#{port}") - end + ServiceHelper.stop(SERVICE_NAME_TASKQUEUE) Djinn.log_debug('Done stopping taskqueue servers on this node') end @@ -290,18 +263,16 @@ def self.start_flower(flower_password) return end - flower_cmd = `which flower`.chomp - if flower_cmd.empty? - Djinn.log_warn('Couldn\'t find flower executable.') - return - end - start_cmd = "#{flower_cmd} --basic_auth=appscale:#{flower_password}" - MonitInterface.start(:flower, start_cmd) + service_env = { + APPSCALE_FLOWER_OPTION_AUTH: "--basic_auth=appscale:#{flower_password}" + } + ServiceHelper.write_environment(SERVICE_NAME_FLOWER, service_env) + ServiceHelper.start(SERVICE_NAME_FLOWER) end # Stops the Flower Server on this machine. def self.stop_flower - MonitInterface.stop(:flower) + ServiceHelper.stop(SERVICE_NAME_FLOWER) end # Number of servers is based on the number of CPUs. @@ -322,18 +293,4 @@ def self.get_server_ports } server_ports end - - def self.get_rabbitmq_version - version_re = /Version: (.*)-/ - - begin - rabbitmq_info = `dpkg -s rabbitmq-server` - rescue Errno::ENOENT - raise TaskQueue::UnknownVersion.new('The dpkg command was not found') - end - - match = version_re.match(rabbitmq_info) - raise TaskQueue::UnknownVersion.new('Unable to find version') if match.nil? - match[1] - end end diff --git a/AppController/lib/zookeeper_helper.rb b/AppController/lib/zookeeper_helper.rb index 39806128b8..e4077ba36c 100644 --- a/AppController/lib/zookeeper_helper.rb +++ b/AppController/lib/zookeeper_helper.rb @@ -6,6 +6,9 @@ # The path in ZooKeeper where the deployment ID is stored. DEPLOYMENT_ID_PATH = '/appscale/deployment_id'.freeze +# Name for service as per helper. +ZOOKEEPER_SERVICE_NAME = "appscale-zookeeper.target".freeze + def configure_zookeeper(nodes, my_index) # TODO: create multi node configuration zoocfg = < /dev/null") + if system("systemctl --quiet is-active appscale-cassandra.service") `/opt/cassandra/cassandra/bin/nodetool -h #{ip} -p 7199 drain` end - # Next, stop ZooKeeper politely: we stop it with both new and old - # script to be sure. - `systemctl stop zookeeper-server` - `systemctl stop zookeeper` + # Next, stop ZooKeeper politely + `systemctl stop zookeeper.service` end end diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index 515e1f30b2..590dd764de 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -24,7 +24,6 @@ def setup djinn = flexmock(Djinn) djinn.should_receive(:log_run).with("").and_return() - djinn.should_receive(:log_run).with("systemctl start monit").and_return() flexmock(HelperFunctions).should_receive(:shell).with("").and_return() flexmock(HelperFunctions).should_receive(:log_and_crash).and_raise( @@ -232,8 +231,8 @@ def test_taskqueue_master # the block actually contains helperfunctions = flexmock(HelperFunctions) helperfunctions.should_receive(:get_secret).and_return(@secret) - flexmock(MonitInterface).should_receive(:start_daemon).and_return() - flexmock(MonitInterface).should_receive(:start).and_return() + flexmock(ServiceHelper).should_receive(:start).and_return() + flexmock(ServiceHelper).should_receive(:start).and_return() file = flexmock(File) file.should_receive(:open).and_return() @@ -285,12 +284,13 @@ def test_taskqueue_slave file = flexmock(File) file.should_receive(:open).with(TaskQueue::COOKIE_FILE, "w+", Proc).and_return() + file.should_receive(:open).with('/run/appscale/appscale-taskqueue.env', "w+", Proc).and_return() # mock out and commands flexmock(Djinn).should_receive(:log_run).and_return() flexmock(HAProxy).should_receive(:create_tq_server_config).and_return() - flexmock(MonitInterface).should_receive(:start_daemon).and_return() - flexmock(MonitInterface).should_receive(:start).and_return() + flexmock(ServiceHelper).should_receive(:start).and_return() + flexmock(ServiceHelper).should_receive(:start).and_return() flexmock(Addrinfo).should_receive('ip.getnameinfo').and_return(["hostname-ip1"]) flexmock(HelperFunctions).should_receive(:sleep_until_port_is_open).and_return() From 8ef89cec761a1d1b92148ed28f1b6f7e6e06bc9d Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:17 -0700 Subject: [PATCH 088/221] Updates for systemd, replace common monit use --- common/appscale/common/constants.py | 8 - .../common/monit_app_configuration.py | 163 ------- common/appscale/common/monit_interface.py | 407 ------------------ common/appscale/common/service_helper.py | 318 ++++++++++++++ .../common/templates/monit_template.conf | 4 - .../test/unit/test_monit_app_configuration.py | 18 - common/test/unit/test_monit_interface.py | 53 --- 7 files changed, 318 insertions(+), 653 deletions(-) delete mode 100644 common/appscale/common/monit_app_configuration.py delete mode 100644 common/appscale/common/monit_interface.py create mode 100644 common/appscale/common/service_helper.py delete mode 100644 common/appscale/common/templates/monit_template.conf delete mode 100644 common/test/unit/test_monit_app_configuration.py delete mode 100644 common/test/unit/test_monit_interface.py diff --git a/common/appscale/common/constants.py b/common/appscale/common/constants.py index f4ed315ad6..f06548324f 100644 --- a/common/appscale/common/constants.py +++ b/common/appscale/common/constants.py @@ -20,14 +20,6 @@ class InvalidIndexConfiguration(Exception): pass -class MonitStates(object): - MISSING = 'missing' - PENDING = 'pending' # Monit is trying to either start or stop the process. - RUNNING = 'running' - STOPPED = 'stopped' # Monit will likely try to start the process soon. - UNMONITORED = 'unmonitored' - - def non_negative_int(value): """ Checks if an integer value is greater or equal than 0. """ return isinstance(value, int) and value >= 0 diff --git a/common/appscale/common/monit_app_configuration.py b/common/appscale/common/monit_app_configuration.py deleted file mode 100644 index 04c7b331d4..0000000000 --- a/common/appscale/common/monit_app_configuration.py +++ /dev/null @@ -1,163 +0,0 @@ -import os -import sys - -from appscale.common import appscale_info -from distutils.spawn import find_executable -from . import file_io -from .constants import VERSION_PATH_SEPARATOR - -# Directory with the task templates. -TEMPLATE_DIR = os.path.join( - os.path.dirname(sys.modules['appscale.common'].__file__), 'templates') - -# Template used for monit configuration files. -TEMPLATE_LOCATION = os.path.join(TEMPLATE_DIR, 'monit_template.conf') - -# The directory used when storing a service's config file. -MONIT_CONFIG_DIR = '/run/appscale/monit.conf.d' - - -def create_config_file(watch, start_cmd, pidfile, port=None, env_vars=None, - max_memory=None, syslog_server=None, check_port=False, - check_host=None, kill_exceeded_memory=False, - log_tag=None, group=None): - """ Writes a monit configuration file for a service. - - Args: - watch: A string which identifies this process with monit. - start_cmd: The start command to start the process. - pidfile: The location of the pidfile that the process creates. - port: An integer specifying the port for the process. - env_vars: A dictionary specifying environment variables. - max_memory: An integer that specifies the maximum amount of memory in - megabytes that the process should use. - syslog_server: The IP address of the remote syslog server to use. - check_port: A boolean specifying that monit should check host and port. - check_host: Optional host to use with check_port, defaults to private ip - kill_exceeded_memory: A boolean indicating that a process should be killed - (instead of terminated). This is used when the process exceeds its memory - limit. - log_tag: The tag to use with logging. Default is to derive from watch. - group: The monit group for the process, defaults to group derived from - version. - """ - if check_port: - assert port is not None, 'When using check_port, port must be defined' - - process_name = watch - watch_group = watch.rsplit(VERSION_PATH_SEPARATOR, 1)[0] - version_group = group if group is not None else watch_group - if port is not None: - process_name += '-{}'.format(port) - - env_vars_str = '' - if env_vars is not None: - for key in env_vars: - env_vars_str += '{}="{}" '.format(key, env_vars[key]) - - bash = find_executable('bash') - start_stop_daemon = find_executable('start-stop-daemon') - stop_instance = find_executable('appscale-stop-instance') - - # /usr/local/bin is not on the path in Trusty. - stop_instance_script = os.path.join('/', 'usr', 'local', 'bin', - 'appscale-stop-instance') - if stop_instance is None and os.path.isfile(stop_instance_script): - stop_instance = stop_instance_script - - assert stop_instance is not None, 'Unable to find appscale-stop-instance' - - logfile = os.path.join( - '/', 'var', 'log', 'appscale', '{}.log'.format(process_name)) - - if not log_tag: - log_tag = version_group - - if syslog_server is None: - bash_exec = 'exec env {vars} {start_cmd} >> {log} 2>&1'.format( - vars=env_vars_str, start_cmd=start_cmd, log=logfile) - else: - bash_exec = ( - 'exec env {vars} {start_cmd} 2>&1 | tee -a {log} | ' - 'logger -t {log_tag} -u /tmp/ignored -n {syslog_server} -P 514' - ).format(vars=env_vars_str, start_cmd=start_cmd, log=logfile, - log_tag=log_tag, syslog_server=syslog_server) - - start_line = ' '.join([ - start_stop_daemon, - '--start', - '--background', - '--pidfile', pidfile, - '--startas', "{} -- -c 'unset \"${{!MONIT_@}}\"; {}'".format(bash, bash_exec) - ]) - stop_line = '{} --watch {}'.format(stop_instance, process_name) - - with open(TEMPLATE_LOCATION) as template: - output = template.read() - output = output.format( - process_name=process_name, match_clause='PIDFILE "{}"'.format(pidfile), - group=version_group, start_line=start_line, stop_line=stop_line) - - if max_memory is not None: - if kill_exceeded_memory: - action = 'exec "{} --watch {} --force"'.format(stop_instance, process_name) - else: - action = 'restart' - - output += ' if totalmem > {} MB for 10 cycles then {}\n'.format( - max_memory, action) - - if check_port: - check_host = check_host or appscale_info.get_private_ip() - output += ' if failed host {} port {} for 3 cycles then restart\n'.format( - check_host, port) - - config_file = os.path.join(MONIT_CONFIG_DIR, - 'appscale-{}.cfg'.format(process_name)) - file_io.write(config_file, output) - - return - - -def create_daemon_config(watch, start_cmd, stop_cmd, pidfile, max_memory=None): - """ Writes a monit configuration file for a daemonized service. - - Args: - watch: A string which identifies this process with monit. - start_cmd: A string specifying the command to start the service. - stop_cmd: A string specifying the command to stop the service. - pidfile: A string specifying the location of the service's pidfile. - max_memory: An integer that specifies the maximum amount of memory in - megabytes that the process should use. - """ - with open(TEMPLATE_LOCATION) as template: - output = template.read() - output = output.format( - process_name=watch, match_clause='PIDFILE "{}"'.format(pidfile), - group=watch, start_line=start_cmd, stop_line=stop_cmd) - - if max_memory is not None: - output += ' if totalmem > {} MB for 10 cycles then restart\n'.format( - max_memory) - - config_file = os.path.join(MONIT_CONFIG_DIR, 'appscale-{}.cfg'.format(watch)) - file_io.write(config_file, output) - - -def create_custom_config(watch, start_cmd, stop_cmd, match_cmd): - """ Writes a monit configuration for a service without a pidfile. - - Args: - watch: A string which identifies this process with monit. - start_cmd: A string specifying the command to start the service. - stop_cmd: A string specifying the command to stop the service. - match_cmd: The string monit should use to check if the process is running. - """ - with open(TEMPLATE_LOCATION) as template: - output = template.read() - output = output.format( - process_name=watch, match_clause='MATCHING {}'.format(match_cmd), - group=watch, start_line=start_cmd, stop_line=stop_cmd) - - config_file = os.path.join(MONIT_CONFIG_DIR, 'appscale-{}.cfg'.format(watch)) - file_io.write(config_file, output) diff --git a/common/appscale/common/monit_interface.py b/common/appscale/common/monit_interface.py deleted file mode 100644 index 61acc79ea2..0000000000 --- a/common/appscale/common/monit_interface.py +++ /dev/null @@ -1,407 +0,0 @@ -import errno -import httplib -import logging -import monotonic -import os -import socket -import subprocess -import urllib -import uuid -from datetime import timedelta -from xml.etree import ElementTree - -from tornado import gen -from tornado.httpclient import AsyncHTTPClient -from tornado.httpclient import HTTPClient -from tornado.httpclient import HTTPError -from tornado.ioloop import IOLoop - -from appscale.common.async_retrying import retry_coroutine -from appscale.common.monit_app_configuration import MONIT_CONFIG_DIR -from appscale.common.retrying import retry -from . import constants -from . import misc -from .constants import MonitStates - -""" -This file contains top level functions for starting and stopping -monitoring of processes using monit. Each component is in -charge of creating configuration files for the process they want started. -""" - -MONIT = "/usr/bin/monit" - -NUM_RETRIES = 10 -DEFAULT_RETRIES = lambda err: not isinstance(err, ProcessNotFound) - -SMALL_WAIT = 3 -RETRYING_TIMEOUT = 60 - -logger = logging.getLogger(__name__) - - -class ProcessNotFound(Exception): - """ Indicates that Monit has no entry for a process. """ - pass - - -class MonitUnavailable(Exception): - """ Indicates that Monit is not currently accepting commands. """ - pass - - -class NotMonitCommand(Exception): - """ Indicates that wrong command was asked to be run """ - pass - - -class NonZeroReturnStatus(Exception): - """ Indicates that command returned non-zero return status """ - pass - - -@retry(retrying_timeout=RETRYING_TIMEOUT, backoff_multiplier=0.5, - retry_on_exception=lambda err: not isinstance(err, NotMonitCommand)) -def monit_run(args): - """ Runs the given monit command, retrying it if it fails (which can occur if - monit is busy servicing other requests). - - Args: - args: A list of strs, where each str is a command-line argument for monit. - Raises: - NonZeroReturnStatus if command returned status different from 0. - """ - return_status = subprocess.call([MONIT] + args) - if return_status != 0: - raise NonZeroReturnStatus("Command `{}` return non-zero status: {}" - .format(' '.join(args), return_status)) - - -def safe_monit_run(args): - """ Runs the given monit command, retrying it if it fails. - - Args: - args: A list of strs, where each str is a command-line argument for monit. - Returns: - True if command succeeded, False otherwise. - """ - try: - monit_run(args) - return True - except NonZeroReturnStatus as err: - logger.error(err) - return False - - -def start(watch, is_group=True): - """ Instructs monit to start the given program, assuming that a configuration - file has already been written for it. - - Args: - watch: A str representing the name of the program to start up and monitor. - is_group: A bool that indicates if we want to stop a group of programs, or - only a single program. - Returns: - True if the program was started, or False if (1) the named program is not a - valid program name, (2) if monit could not be reloaded to read the new - configuration file, or (3) monit could not start the new program. - """ - if not misc.is_string_secure(watch): - logger.error("Watch string [{0}] is a possible security violation".format( - watch)) - return False - - logger.info("Reloading monit.") - if not safe_monit_run(['reload']): - return False - - logger.info("Starting watch {0}".format(watch)) - if is_group: - safe_monit_run(['monitor', '-g', watch]) - return safe_monit_run(['start', '-g', watch]) - else: - safe_monit_run(['monitor', watch]) - return safe_monit_run(['start', watch]) - - -def stop(watch, is_group=True): - """ Shut down the named programs monit is watching, and stop monitoring it. - - Args: - watch: The name of the group of programs that monit is watching, that should - no longer be watched. - is_group: A bool that indicates if we want to stop a group of programs, or - only a single program. - Returns: - True if the named programs were stopped and no longer monitored, and False - if either (1) the named watch is not valid, (2) the programs could not be - stopped, or (3) the programs could not be unmonitored. - """ - if not misc.is_string_secure(watch): - logger.error("Watch string (%s) is a possible security violation" % watch) - return False - - logger.info("Stopping watch {0}".format(watch)) - if is_group: - stop_command = ['stop', '-g', watch] - else: - stop_command = ['stop', watch] - - return safe_monit_run(stop_command) - - -def restart(watch): - """ Instructs monit to restart all processes hosting the given watch. - - Args: - watch: A str representing the name of the programs to restart. - Returns: - True if the programs were restarted, or False if (1) the watch is not a - valid program name, (2) monit could not restart the new program. - """ - if not misc.is_string_secure(watch): - logger.error("Watch string [{0}] is a possible security violation".format( - watch)) - return False - - logger.info("Restarting watch {0}".format(watch)) - return safe_monit_run(['restart', '-g', watch]) - - -def parse_entries(response): - """ Extracts each watch's status from a Monit response. - - Args: - response: An XML string. - Returns: - A dictionary mapping Monit entries to their state. - """ - root = ElementTree.XML(response) - entries = {} - for service in root.iter('service'): - name = service.find('name').text - monitored = int(service.find('monitor').text) - status = int(service.find('status').text) - if monitored == 0: - entries[name] = MonitStates.UNMONITORED - elif monitored == 1: - if status == 0: - entries[name] = MonitStates.RUNNING - else: - entries[name] = MonitStates.STOPPED - else: - entries[name] = MonitStates.PENDING - - return entries - - -class MonitOperator(object): - """ Handles Monit operations. """ - - # The location of Monit's XML API. - LOCATION = 'http://localhost:2812' - - # The number of seconds to wait between each reload operation. - RELOAD_COOLDOWN = 1 - - # Monit's endpoint for fetching the status of each service. - STATUS_URL = '{}/_status?format=xml'.format(LOCATION) - - def __init__(self): - """ Creates a new MonitOperator. There should only be one. """ - self.reload_future = None - self._last_reload = monotonic.monotonic() - self._async_client = AsyncHTTPClient() - self._client = HTTPClient() - - @gen.coroutine - def reload(self, thread_pool=None): - """ Groups closely-timed reload operations. """ - if self.reload_future is None or self.reload_future.done(): - self.reload_future = self._reload(thread_pool) - else: - logger.info('Using future of active monit reload') - - yield self.reload_future - - @staticmethod - def reload_sync(): - """ Reloads Monit. """ - subprocess.check_call([MONIT, 'reload']) - - @retry_coroutine(retrying_timeout=RETRYING_TIMEOUT) - def get_entries(self): - """ Retrieves the status for each Monit entry. - - Returns: - A dictionary mapping Monit entries to their state. - """ - response = yield self._async_client.fetch(self.STATUS_URL) - monit_entries = parse_entries(response.body) - raise gen.Return(monit_entries) - - def get_entries_sync(self): - """ Retrieves the status for each Monit entry. - - Returns: - A dictionary mapping Monit entries to their state. - """ - response = self._client.fetch(self.STATUS_URL) - monit_entries = parse_entries(response.body) - return monit_entries - - @retry_coroutine( - retrying_timeout=RETRYING_TIMEOUT) - def send_command_retry_process(self, process_name, command): - """ Sends a command to the Monit API. - - Args: - process_name: A string specifying a monit watch. - command: A string specifying the command to send. - """ - yield self._send_command(process_name, command) - - - @retry_coroutine( - retrying_timeout=RETRYING_TIMEOUT, - retry_on_exception=DEFAULT_RETRIES) - def send_command(self, process_name, command): - """ Sends a command to the Monit API. - - Args: - process_name: A string specifying a monit watch. - command: A string specifying the command to send. - """ - yield self._send_command(process_name, command) - - @gen.coroutine - def _send_command(self, process_name, command): - process_url = '{}/{}'.format(self.LOCATION, process_name) - csrf_token = str(uuid.uuid4()) - headers = {'Cookie': 'securitytoken={}'.format(csrf_token)} - payload = urllib.urlencode({'action': command, - 'securitytoken': csrf_token}) - try: - yield self._async_client.fetch(process_url, method='POST', - headers=headers, body=payload) - except HTTPError as error: - if error.code == httplib.NOT_FOUND: - raise ProcessNotFound('{} is not monitored'.format(process_name)) - raise - - def send_command_sync(self, process_name, command): - """ Sends a command to the Monit API. - - Args: - process_name: A string specifying a monit watch. - command: A string specifying the command to send. - Raises: - ProcessNotFound if Monit cannot find the specified process_name. - MonitUnavailable if Monit is not accepting commands. - """ - process_url = '/'.join([self.LOCATION, process_name]) - csrf_token = str(uuid.uuid4()) - headers = {'Cookie': 'securitytoken={}'.format(csrf_token)} - payload = urllib.urlencode({'action': command, - 'securitytoken': csrf_token}) - - try: - self._client.fetch(process_url, method='POST', - headers=headers, body=payload) - except HTTPError as error: - if error.code == httplib.NOT_FOUND: - raise ProcessNotFound('{} is not monitored'.format(process_name)) - - if error.code == httplib.SERVICE_UNAVAILABLE: - raise MonitUnavailable('Monit is not currently available') - - raise - except socket.error: - raise MonitUnavailable('Monit is not currently available') - - @gen.coroutine - def wait_for_status(self, process_name, acceptable_states): - """ Waits until a process is in a desired state. - - Args: - process_name: A string specifying a monit watch. - acceptable_states: An iterable of strings specifying states. - """ - logger.info( - "Waiting until process '{}' gets to one of acceptable states: {}" - .format(process_name, acceptable_states) - ) - start_time = monotonic.monotonic() - backoff = 0.1 - - while True: - entries = yield self.get_entries() - status = entries.get(process_name, MonitStates.MISSING) - elapsed = monotonic.monotonic() - start_time - - if status in acceptable_states: - logger.info("Status of '{}' became '{}' after {:0.1f}s" - .format(process_name, status, elapsed)) - raise gen.Return(status) - - if elapsed > 1: - # Keep logs informative and don't report too early - logger.info("Status of '{}' is not acceptable ('{}') after {:0.1f}s." - "Checking again in {:0.1f}s." - .format(process_name, status, elapsed, backoff)) - - yield gen.sleep(backoff) - backoff = min(backoff * 1.5, 5) # Increase backoff slowly up to 5 sec. - - @gen.coroutine - def ensure_running(self, process_name): - """ Waits for a process to finish starting. - - Args: - process_name: A string specifying a monit watch. - """ - while True: - non_missing_states = ( - MonitStates.RUNNING, MonitStates.UNMONITORED, MonitStates.PENDING, - MonitStates.STOPPED) - status_future = self.wait_for_status(process_name, non_missing_states) - status = yield gen.with_timeout(timedelta(seconds=5), status_future, - IOLoop.current()) - - if status == constants.MonitStates.RUNNING: - return - - if status == constants.MonitStates.UNMONITORED: - yield self.send_command(process_name, 'start') - - yield gen.sleep(1) - - @staticmethod - def remove_configuration(entry): - """ Removes the configuration file for an entry. - - Args: - entry: A string specifying a Monit entry. - """ - monit_config_file = '{}/appscale-{}.cfg'.format(MONIT_CONFIG_DIR, entry) - try: - os.remove(monit_config_file) - except OSError as error: - if error.errno != errno.ENOENT: - raise - - logger.error('Error deleting {}'.format(monit_config_file)) - - @retry_coroutine( - retrying_timeout=RETRYING_TIMEOUT, - retry_on_exception=[subprocess.CalledProcessError]) - def _reload(self, thread_pool): - """ Reloads Monit. """ - time_since_reload = monotonic.monotonic() - self._last_reload - wait_time = max(self.RELOAD_COOLDOWN - time_since_reload, 0) - yield gen.sleep(wait_time) - self._last_reload = monotonic.monotonic() - if thread_pool: - yield thread_pool.submit(subprocess.check_call, [MONIT, 'reload']) - else: - subprocess.check_call([MONIT, 'reload']) diff --git a/common/appscale/common/service_helper.py b/common/appscale/common/service_helper.py new file mode 100644 index 0000000000..455ad49510 --- /dev/null +++ b/common/appscale/common/service_helper.py @@ -0,0 +1,318 @@ +import logging +import subprocess + +from tornado import gen + +""" +This file contains top level functions for starting and stopping +services using systemctl. Service names can be prefixes for template +services or else are the unit names but without the type suffix. +""" + + +SYSTEMCTL = '/bin/systemctl' + + +STATUS_MAP = { + 'active': 'running', + 'activating': 'pending', + 'deactivating' : 'pending', + 'reloading': 'running', +} + +logger = logging.getLogger(__name__) + +class NonZeroReturnStatus(Exception): + """ Indicates that command returned non-zero return status """ + pass + + +def systemctl_run(args): + """ Runs the given systemctl command. + + Args: + args: A list of strs, where each str is an argument for systemctl. + Raises: + NonZeroReturnStatus if command returned status different from 0. + """ + return_status = subprocess.call([SYSTEMCTL] + args) + if return_status != 0: + raise NonZeroReturnStatus('Command {0} return non-zero status: {1}' + .format(' '.join(args), return_status)) + + +def systemctl_out(args): + """ Runs the given systemctl command, returns output. + + Args: + args: A list of strs, where each str is an argument for systemctl. + Returns: + The output from the systemctl command + Raises: + NonZeroReturnStatus if command returned status different from 0. + """ + try: + return subprocess.check_output([SYSTEMCTL] + args) + except subprocess.CalledProcessError as err: + raise NonZeroReturnStatus('Command {0} return non-zero status: {1}' + .format(' '.join(args), err.returncode)) + +def safe_systemctl_run(args): + """ Runs the given systemctl command, logging any error. + + Args: + args: A list of strs, where each str is an argument for systemctl. + Returns: + True if command succeeded, False otherwise. + """ + try: + systemctl_run(args) + return True + except NonZeroReturnStatus as err: + logger.error(err) + return False + + +def start(name, background=False, enable=None, wants=None, properties=None): + """ Start the given service. + + Args: + name: A str representing the name of the service to start. + background: True to start without blocking + enable: True to enable, False to start only, None for default. + wants: services required by this service + properties: properties to set for the service + Returns: + True if the service was started, else False. + """ + logger.info('Starting service {0}'.format(name)) + expanded_name = __expand_name(name) + + if wants: + logger.info('Service {0} wants {1}'.format(name, ' '.join(wants))) + wants_args = ['--runtime', 'add-wants', expanded_name] + wants_args.extend([__expand_name(want) for want in wants]) + safe_systemctl_run(wants_args) + + if properties: + logger.info('Service {0} properties {1}'.format( + name, ' '.join('='.join(item) for item in properties.items()))) + properties_args = ['--runtime', 'set-property', expanded_name] + properties_args.extend(['='.join(item) for item in properties.items()]) + safe_systemctl_run(properties_args) + + return safe_systemctl_run(__build_command('start', + expanded_name, + background=background, + enable=enable)) + + +def stop(name, background=False): + """ Stop the given service(s). + + Args: + name: A str representing the name of the service(s) to stop. + background: True to start without blocking + Returns: + True if the named services were stopped. + """ + logger.info('Stopping service(s) {0}'.format(name)) + return safe_systemctl_run(__build_command('stop', + __name_match(name), + background=background)) + + +def restart(name, background=False, start=True): + """ Restart the given service(s). + + Args: + name: A str representing the name of the service(s) to restart. + background: True to start without blocking + start: True to start services if not already running (use False with name pattern) + Returns: + True if services were restarted. + """ + logger.info('Restarting service(s) {0}'.format(name)) + command = 'try-restart' + if start: + command = 'restart' + return safe_systemctl_run(__build_command(command, + __name_match(name), + background=background)) + + +def list(running=False): + """ List appscale service(s). + + Args: + running: True to only report active services + Returns: + Dict of services and their status (pending|running|stopped) + """ + args = ['--plain', '--no-pager', '--no-legend'] + if running: + args.append('--state=active') + args.extend(['list-units', 'appscale-*.service']) + + try: + services = {} + output = systemctl_out(args) + for output_line in output.split('\n'): + if not output_line: + continue + service, loaded, active, remain = output_line.split(None, 3) + if not service.endswith('.service'): + continue + services[service[:-8]] = STATUS_MAP.get(active, 'stopped') + return services + except NonZeroReturnStatus: + return {} + + +def __expand_name(name): + """ Expand the given name by appending .service if there is no type suffix. + + Args: + name: The unit name + Returns: + The name with type suffix + """ + expanded_name = name + if not '.' in name: + expanded_name = '{0}.service'.format(name) + return expanded_name + + +def __build_command(command, name, background=None, enable=None): + """ Constuct args for systemctl command. + + Args: + command: The systemctl command + name: The unit name or name pattern + background: True to have systemctl perform the command in the background + enable: True to enable/disable, False to start/stop only, None for default. + Returns: + The name with type suffix + """ + args = ['--quiet'] + if background: + args.append('--no-block') + if ((enable or name.startswith('appscale-')) + and not enable==False + and command in ('start', 'stop')): + args.append('--now') + args.append('--runtime') + if command == 'start': + args.append('enable') + else: + args.append('disable') + else: + args.append(command) + args.append(__expand_name(name)) + return args + + +def __name_match(name): + """ Convert a template name to a pattern matching all instances of the + service. + + Args: + name: A unit name without type suffix + Returns: + The name, possibly modified for matching + """ + service_name_match = name + if name.endswith('@'): + service_name_match = '{0}*'.format(name) + return service_name_match + + +class ServiceOperator(object): + """ Handles Service operations. """ + + @gen.coroutine + def list_async(self): + """ Retrieves the status for each service. + + Returns: + A dictionary mapping services to their state. + """ + raise gen.Return(self.list()) + + def list(self): + """ Retrieves the status for each service. + + Returns: + A dictionary mapping services to their state. + """ + return list() + + @gen.coroutine + def start_async(self, name, enable=None, wants=None, properties=None): + """ Start the given service asynchronously. + + Args: + name: A str representing the name of the service to start. + enable: True to enable, False to start only, None for default. + wants: services required by this service + properties: properties to set for the service + Returns: + True if the service was started, else False. + """ + raise gen.Return(self.start(name, enable=enable, wants=wants, + properties=properties)) + + def start(self, name, enable=None, wants=None, properties=None): + """ Start the given service. + + Args: + name: A str representing the name of the service to start. + enable: True to enable, False to start only, None for default. + wants: services required by this service + properties: properties to set for the service + Returns: + True if the service was started, else False. + """ + return start(name, enable=enable, wants=wants, properties=properties) + + @gen.coroutine + def stop_async(self, name): + """ Stop the given service(s) asynchronously. + + Args: + name: A str representing the name of the service(s) to stop. + Returns: + True if the named services were stopped. + """ + raise gen.Return(self.stop(name)) + + def stop(self, name): + """ Stop the given service(s). + + Args: + name: A str representing the name of the service(s) to stop. + Returns: + True if the named services were stopped. + """ + return stop(name) + + @gen.coroutine + def restart_async(self, name): + """ Restart the given service(s) asynchronously. + + Args: + name: A str representing the name of the service(s) to restart. + Returns: + True if services were restarted. + """ + raise gen.Return(self.restart(name)) + + def restart(self, name): + """ Restart the given service(s). + + Args: + name: A str representing the name of the service(s) to restart. + Returns: + True if services were restarted. + """ + return restart(name) \ No newline at end of file diff --git a/common/appscale/common/templates/monit_template.conf b/common/appscale/common/templates/monit_template.conf deleted file mode 100644 index 705c860af5..0000000000 --- a/common/appscale/common/templates/monit_template.conf +++ /dev/null @@ -1,4 +0,0 @@ -CHECK PROCESS {process_name} {match_clause} - group {group} - start program = "{start_line}" - stop program = "{stop_line}" diff --git a/common/test/unit/test_monit_app_configuration.py b/common/test/unit/test_monit_app_configuration.py deleted file mode 100644 index a2c1a728bc..0000000000 --- a/common/test/unit/test_monit_app_configuration.py +++ /dev/null @@ -1,18 +0,0 @@ -# Programmer: Navraj Chohan - -import unittest -from flexmock import flexmock - -from appscale.common import file_io -from appscale.common import monit_app_configuration - - -class TestGodAppInterface(unittest.TestCase): - def test_create_config_file(self): - flexmock(file_io).should_receive('write') - monit_app_configuration.create_config_file( - 'mywatch', 'start_cmd', 'pidfile', 4000, - {'ENV1': 'VALUE1', 'ENV2': 'VALUE2'}) - -if __name__ == "__main__": - unittest.main() diff --git a/common/test/unit/test_monit_interface.py b/common/test/unit/test_monit_interface.py deleted file mode 100644 index 58efcdc499..0000000000 --- a/common/test/unit/test_monit_interface.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -import sys -import subprocess -import time -import unittest -from flexmock import flexmock - -sys.path.append(os.path.join(os.path.dirname(__file__), "../../")) -from appscale.common import ( - file_io, - monit_interface, - testing -) - - -class TestGodInterface(unittest.TestCase): - def setUp(self): - flexmock(time).should_receive('sleep').and_return() - - def test_start(self): - testing.disable_logging() - - flexmock(file_io)\ - .should_receive('delete')\ - .and_return() - - flexmock(subprocess)\ - .should_receive('call')\ - .and_return(0) - - self.assertEqual(True, monit_interface.start("watch_name")) - - flexmock(subprocess)\ - .should_receive('call')\ - .and_return(1) - - self.assertEqual(False, monit_interface.start("watch_name")) - - def test_stop(self): - testing.disable_logging() - - flexmock(subprocess)\ - .should_receive('call')\ - .and_return(0) - self.assertEqual(True, monit_interface.stop("watch_name")) - - flexmock(subprocess)\ - .should_receive('call')\ - .and_return(1) - self.assertEqual(False, monit_interface.stop("watch_name")) - -if __name__ == "__main__": - unittest.main() From 1fcbbb21e16bb7d7c305e476b34ac860536ee5f8 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:27 -0700 Subject: [PATCH 089/221] Updates for systemd, remove monit service dependency and tmpfiles config --- system/tmpfiles.d/appscale.conf | 3 +-- system/units/appscale-controller.service | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/system/tmpfiles.d/appscale.conf b/system/tmpfiles.d/appscale.conf index 5542a13f77..5abad279d1 100644 --- a/system/tmpfiles.d/appscale.conf +++ b/system/tmpfiles.d/appscale.conf @@ -1,3 +1,2 @@ # path mode uid gid age -d /run/appscale 0755 root root - -d /run/appscale/monit.conf.d 0755 root root - \ No newline at end of file +d /run/appscale 0755 root root - \ No newline at end of file diff --git a/system/units/appscale-controller.service b/system/units/appscale-controller.service index c5612f0485..79004adde5 100644 --- a/system/units/appscale-controller.service +++ b/system/units/appscale-controller.service @@ -1,8 +1,7 @@ [Unit] Description=AppScale Controller AssertPathExists=/etc/appscale/secret.key -Wants=monit.service -After=monit.service network-online.target time-sync.target +After=network-online.target time-sync.target [Service] Type=simple From 7a91bb239e97510067f054dc6f48c0115f23b936 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:38 -0700 Subject: [PATCH 090/221] Updates for systemd, remove dashboard monit link --- AppDashboard/lib/app_dashboard.py | 3 +-- AppDashboard/lib/app_dashboard_data.py | 20 +------------------ .../test/functional/test_dashboard.py | 2 -- .../test/unit/test_app_dashboard_helper.py | 2 -- 4 files changed, 2 insertions(+), 25 deletions(-) diff --git a/AppDashboard/lib/app_dashboard.py b/AppDashboard/lib/app_dashboard.py index 5e24696573..8c73ca8123 100644 --- a/AppDashboard/lib/app_dashboard.py +++ b/AppDashboard/lib/app_dashboard.py @@ -81,7 +81,6 @@ def render_template(self, template_file, values=None): 'versions_user_is_admin_on': versions_user_is_admin_on, 'user_layout_pref': self.dstore.get_dash_layout_settings(), 'flower_url': self.dstore.get_flower_url(), - 'monit_url': self.dstore.get_monit_url() } for key in values.keys(): sub_vars[key] = values[key] @@ -100,7 +99,7 @@ def get_shared_navigation(self, page): show_create_account = False # These sections do not lend themselves well to having panels. - panel_blacklist = ['monit', 'taskqueue', 'datastore_viewer'] + panel_blacklist = ['taskqueue', 'datastore_viewer'] return self.render_template(template_file='shared/navigation.html', values={'show_create_account': show_create_account, diff --git a/AppDashboard/lib/app_dashboard_data.py b/AppDashboard/lib/app_dashboard_data.py index 06163242b5..e3e0becf96 100644 --- a/AppDashboard/lib/app_dashboard_data.py +++ b/AppDashboard/lib/app_dashboard_data.py @@ -85,9 +85,6 @@ class AppDashboardData(): # The port that the Celery Flower service runs on, by default. FLOWER_PORT = 5555 - # The port that the Monit Dashboard runs on, by default. - MONIT_PORT = 2812 - def __init__(self, helper=None): """ Creates a new AppDashboard, which will cache SOAP-exposed information provided to us by the AppDashboardHelper. @@ -139,8 +136,6 @@ def build_dict(self, user_info): "logging": {"title": "Log Viewer", "link": "/logs", "template": "logs/main.html"}, - "monit": {"title": "Monit", - "link": self.get_monit_url()}, "taskqueue": {"title": "TaskQueue", "link": self.get_flower_url()}, "pull_queue_viewer": {"title": "Pull Queue Viewer", @@ -171,7 +166,7 @@ def build_dict(self, user_info): {"manage_users": lookup_dict[ "manage_users"]}]} if user_info.owned_apps or user_info.is_user_cloud_admin: - sections = ['monit', 'taskqueue', 'pull_queue_viewer', 'logging', + sections = ['taskqueue', 'pull_queue_viewer', 'logging', 'app_console', 'cron', 'datastore_viewer'] lookup_dict["debugging_monitoring"] = { "Debugging/Monitoring": [{section: lookup_dict[section]} @@ -242,19 +237,6 @@ def get_flower_url(self): """ return "http://{0}:{1}".format(self.get_head_node_ip(), self.FLOWER_PORT) - def get_monit_url(self): - """ Retrieves the URL where the Monit Dashboard web service can be found in - this AppScale deployment. - - Note that although a Monit Dashboard runs on each node, we will send users - to the one on the login node. - - Returns: - A str that names the URL where the services on the login node can be - viewed, started, and stopped. - """ - return "http://{0}:{1}".format(self.get_head_node_ip(), self.MONIT_PORT) - def get_head_node_ip(self): """ Retrieves the IP address or FQDN where the machine running the shadow service can be found, via the Datastore. diff --git a/AppDashboard/test/functional/test_dashboard.py b/AppDashboard/test/functional/test_dashboard.py index fd0215b139..45d3ce835a 100644 --- a/AppDashboard/test/functional/test_dashboard.py +++ b/AppDashboard/test/functional/test_dashboard.py @@ -68,7 +68,6 @@ def setUp(self): "used": 886620160 }, "services": { - # For each Process monitored by monit "cassandra": "Running", }, "loadavg": { @@ -121,7 +120,6 @@ def setUp(self): "used": 0 }, "services": { - # For each Process monitored by monit # TODO }, "loadavg": { diff --git a/AppDashboard/test/unit/test_app_dashboard_helper.py b/AppDashboard/test/unit/test_app_dashboard_helper.py index 416e42398a..c8f33acc9b 100644 --- a/AppDashboard/test/unit/test_app_dashboard_helper.py +++ b/AppDashboard/test/unit/test_app_dashboard_helper.py @@ -77,7 +77,6 @@ def setUpClusterStats(self): "used": 886620160 }, "services": { - # For each Process monitored by monit "cassandra": "Running", }, "loadavg": { @@ -143,7 +142,6 @@ def setUpClusterStats(self): "used": 0 }, "services": { - # For each Process monitored by monit }, "loadavg": { "last_1min": 0.08, From 41281fb3348bb4acf10dc503e46442365c751108 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:48 -0700 Subject: [PATCH 091/221] Updates for systemd, remove search server kill script --- SearchService/kill_search_server.sh | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 SearchService/kill_search_server.sh diff --git a/SearchService/kill_search_server.sh b/SearchService/kill_search_server.sh deleted file mode 100644 index f6b704292e..0000000000 --- a/SearchService/kill_search_server.sh +++ /dev/null @@ -1,2 +0,0 @@ -# Monit watches the Search Server, so it will restart after being killed. -ps aux | grep search_server | grep -v grep | awk '{print $2}' | xargs kill -9 From 60f70cb5b63071af04930ed50623680a015ec9d3 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:30:58 -0700 Subject: [PATCH 092/221] Updates for systemd, update java app server comment --- .../google/appengine/tools/development/DevAppServerMain.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppServer_Java/src/com/google/appengine/tools/development/DevAppServerMain.java b/AppServer_Java/src/com/google/appengine/tools/development/DevAppServerMain.java index bc8c45ff82..a5d4393bc7 100644 --- a/AppServer_Java/src/com/google/appengine/tools/development/DevAppServerMain.java +++ b/AppServer_Java/src/com/google/appengine/tools/development/DevAppServerMain.java @@ -243,7 +243,7 @@ public void apply() { updateCheck.checkJavaVersion(System.err); - // AppScale: Write a pidfile for Monit. + // AppScale: Write pidfile String pidfile = System.getProperty("PIDFILE"); if (pidfile != null) { String pidString = ManagementFactory.getRuntimeMXBean().getName().split("@")[0]; From 3413ff57244e7cfac949afc69950061e2307abb5 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:31:08 -0700 Subject: [PATCH 093/221] Updates for systemd, unit files for services --- system/tmpfiles.d/appscale.conf | 3 ++- .../appscale-.d/10-appscale-common.conf | 19 ++++++++++++++ .../10-appscale-infrastructure-shadow.conf | 4 +++ .../10-appscale-ejabberd.conf | 5 ++++ .../nginx.service.d/10-appscale-nginx.conf | 10 +++++++ .../10-appscale-rabbitmq-server.conf | 11 ++++++++ .../10-appscale-zookeeper.conf | 13 ++++++++++ system/units/appscale-admin.service | 12 +++++++++ system/units/appscale-api-server@.service | 10 +++++++ system/units/appscale-backend.target | 4 +++ system/units/appscale-blobstore.service | 12 +++++++++ system/units/appscale-cassandra.service | 26 +++++++++++++++++++ system/units/appscale-celery@.service | 25 ++++++++++++++++++ system/units/appscale-control.target | 5 ++++ system/units/appscale-controller.service | 19 +++++--------- system/units/appscale-down.target | 2 ++ system/units/appscale-ejabberd.target | 9 +++++++ system/units/appscale-flower.service | 12 +++++++++ system/units/appscale-groomer.service | 13 ++++++++++ system/units/appscale-haproxy.service | 18 +++++++++++++ system/units/appscale-hermes.service | 11 ++++++++ system/units/appscale-infrastructure@.service | 13 ++++++++++ .../units/appscale-instance-manager.service | 11 ++++++++ system/units/appscale-instance-run@.service | 14 ++++++++++ system/units/appscale-instance.target | 5 ++++ system/units/appscale-logserver.service | 12 +++++++++ system/units/appscale-memcached.service | 12 +++++++++ system/units/appscale-nginx.target | 9 +++++++ system/units/appscale-rabbitmq-server.target | 9 +++++++ system/units/appscale-routing.target | 5 ++++ system/units/appscale-search.service | 11 ++++++++ system/units/appscale-service.target | 5 ++++ system/units/appscale-solr.service | 16 ++++++++++++ system/units/appscale-taskqueue@.service | 11 ++++++++ .../appscale-transaction-groomer.service | 13 ++++++++++ system/units/appscale-uaserver.service | 11 ++++++++ system/units/appscale-xmpp@.service | 13 ++++++++++ system/units/appscale-zookeeper.target | 9 +++++++ system/units/appscale.target | 5 ++++ 39 files changed, 413 insertions(+), 14 deletions(-) create mode 100644 system/units.d/appscale-.d/10-appscale-common.conf create mode 100644 system/units.d/appscale-infrastructure@shadow.service.d/10-appscale-infrastructure-shadow.conf create mode 100644 system/units.d/ejabberd.service.d/10-appscale-ejabberd.conf create mode 100644 system/units.d/nginx.service.d/10-appscale-nginx.conf create mode 100644 system/units.d/rabbitmq-server.service.d/10-appscale-rabbitmq-server.conf create mode 100644 system/units.d/zookeeper.service.d/10-appscale-zookeeper.conf create mode 100644 system/units/appscale-admin.service create mode 100644 system/units/appscale-api-server@.service create mode 100644 system/units/appscale-backend.target create mode 100644 system/units/appscale-blobstore.service create mode 100644 system/units/appscale-cassandra.service create mode 100644 system/units/appscale-celery@.service create mode 100644 system/units/appscale-control.target create mode 100644 system/units/appscale-down.target create mode 100644 system/units/appscale-ejabberd.target create mode 100644 system/units/appscale-flower.service create mode 100644 system/units/appscale-groomer.service create mode 100644 system/units/appscale-haproxy.service create mode 100644 system/units/appscale-hermes.service create mode 100644 system/units/appscale-infrastructure@.service create mode 100644 system/units/appscale-instance-manager.service create mode 100644 system/units/appscale-instance-run@.service create mode 100644 system/units/appscale-instance.target create mode 100644 system/units/appscale-logserver.service create mode 100644 system/units/appscale-memcached.service create mode 100644 system/units/appscale-nginx.target create mode 100644 system/units/appscale-rabbitmq-server.target create mode 100644 system/units/appscale-routing.target create mode 100644 system/units/appscale-search.service create mode 100644 system/units/appscale-service.target create mode 100644 system/units/appscale-solr.service create mode 100644 system/units/appscale-taskqueue@.service create mode 100644 system/units/appscale-transaction-groomer.service create mode 100644 system/units/appscale-uaserver.service create mode 100644 system/units/appscale-xmpp@.service create mode 100644 system/units/appscale-zookeeper.target create mode 100644 system/units/appscale.target diff --git a/system/tmpfiles.d/appscale.conf b/system/tmpfiles.d/appscale.conf index 5abad279d1..809bdba97d 100644 --- a/system/tmpfiles.d/appscale.conf +++ b/system/tmpfiles.d/appscale.conf @@ -1,2 +1,3 @@ # path mode uid gid age -d /run/appscale 0755 root root - \ No newline at end of file +d /run/appscale 0755 root root - +d /run/appscale/apps 0755 root root - \ No newline at end of file diff --git a/system/units.d/appscale-.d/10-appscale-common.conf b/system/units.d/appscale-.d/10-appscale-common.conf new file mode 100644 index 0000000000..f8f2c40509 --- /dev/null +++ b/system/units.d/appscale-.d/10-appscale-common.conf @@ -0,0 +1,19 @@ +# Common drop-in for AppScale services [e.g. systemd v239 style] + +[Unit] +AssertPathExists=/etc/appscale/secret.key +After=network-online.target time-sync.target +StopWhenUnneeded=true + +[Service] +Environment=APPSCALE_HOME=/root/appscale APPSCALE_SERVICE_BIN=/usr/local/bin +EnvironmentFile=-/etc/default/appscale +EnvironmentFile=-/run/appscale/%p.env +TimeoutSec=30 +Restart=on-failure +RestartSec=10 +# Limits +LimitNOFILE=200000 +# Security +PrivateTmp=true +ProtectSystem=true diff --git a/system/units.d/appscale-infrastructure@shadow.service.d/10-appscale-infrastructure-shadow.conf b/system/units.d/appscale-infrastructure@shadow.service.d/10-appscale-infrastructure-shadow.conf new file mode 100644 index 0000000000..adb5cf3529 --- /dev/null +++ b/system/units.d/appscale-infrastructure@shadow.service.d/10-appscale-infrastructure-shadow.conf @@ -0,0 +1,4 @@ +# Unit configuration for infrastructure manager shadow instance, /lib/systemd/system/appscale-infrastructure@shadow.service.d/appscale-infrastructure-shadow.conf + +[Service] +Environment=APPSCALE_IM_OPTIONS=--autoscaler \ No newline at end of file diff --git a/system/units.d/ejabberd.service.d/10-appscale-ejabberd.conf b/system/units.d/ejabberd.service.d/10-appscale-ejabberd.conf new file mode 100644 index 0000000000..7dd4315a83 --- /dev/null +++ b/system/units.d/ejabberd.service.d/10-appscale-ejabberd.conf @@ -0,0 +1,5 @@ +# Unit configuration for ejabberd, /lib/systemd/system/ejabberd.service.d/appscale-ejabberd.conf + +[Unit] +PartOf=appscale-ejabberd.target +ReloadPropagatedFrom=appscale-ejabberd.target \ No newline at end of file diff --git a/system/units.d/nginx.service.d/10-appscale-nginx.conf b/system/units.d/nginx.service.d/10-appscale-nginx.conf new file mode 100644 index 0000000000..e9cce46bb1 --- /dev/null +++ b/system/units.d/nginx.service.d/10-appscale-nginx.conf @@ -0,0 +1,10 @@ +# Unit configuration for nginx, /lib/systemd/system/nginx.service.d/appscale-nginx.conf + +[Unit] +PartOf=appscale-nginx.target +ReloadPropagatedFrom=appscale-nginx.target + +[Service] +LimitNOFILE=200000 +RestartSec=10 +Restart=on-failure \ No newline at end of file diff --git a/system/units.d/rabbitmq-server.service.d/10-appscale-rabbitmq-server.conf b/system/units.d/rabbitmq-server.service.d/10-appscale-rabbitmq-server.conf new file mode 100644 index 0000000000..2161e44b62 --- /dev/null +++ b/system/units.d/rabbitmq-server.service.d/10-appscale-rabbitmq-server.conf @@ -0,0 +1,11 @@ +# Unit configuration for rabbitmq-server, /lib/systemd/system/rabbitmq-server.service.d/appscale-rabbitmq-server.conf + +[Unit] +PartOf=appscale-rabbitmq-server.target +ReloadPropagatedFrom=appscale-rabbitmq-server.target +After=epmd.service +Requires=epmd.service + +[Service] +RestartSec=10 +Restart=on-failure \ No newline at end of file diff --git a/system/units.d/zookeeper.service.d/10-appscale-zookeeper.conf b/system/units.d/zookeeper.service.d/10-appscale-zookeeper.conf new file mode 100644 index 0000000000..4e70286264 --- /dev/null +++ b/system/units.d/zookeeper.service.d/10-appscale-zookeeper.conf @@ -0,0 +1,13 @@ +# Unit configuration for zookeeper, /lib/systemd/system/zookeeper.service.d/appscale-zookeeper.conf + +[Unit] +PartOf=appscale-zookeeper.target +ReloadPropagatedFrom=appscale-zookeeper.target + +[Service] +GuessMainPID=yes +KillMode=mixed +SuccessExitStatus=0 143 +RemainAfterExit=no +RestartSec=10 +Restart=on-failure \ No newline at end of file diff --git a/system/units/appscale-admin.service b/system/units/appscale-admin.service new file mode 100644 index 0000000000..7d523bc863 --- /dev/null +++ b/system/units/appscale-admin.service @@ -0,0 +1,12 @@ +[Unit] +Description=AppScale Admin API +Before=appscale-control.target +PartOf=appscale-control.target + +[Service] +Environment=APPSCALE_ADMIN_OPTION_PORT=17442 +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-admin serve -p ${APPSCALE_ADMIN_OPTION_PORT} $APPSCALE_OPTION_VERBOSE +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-control.target \ No newline at end of file diff --git a/system/units/appscale-api-server@.service b/system/units/appscale-api-server@.service new file mode 100644 index 0000000000..29b2393a59 --- /dev/null +++ b/system/units/appscale-api-server@.service @@ -0,0 +1,10 @@ +[Unit] +Description=AppScale API server for %i +Before=appscale-instance-run@.service +AssertPathExists=/run/appscale/apps/api_command_%i + +[Service] +ExecStart=/bin/bash /run/appscale/apps/api_command_%i +SyslogIdentifier=%p-%i +# Resource controls +MemoryLimit=400M diff --git a/system/units/appscale-backend.target b/system/units/appscale-backend.target new file mode 100644 index 0000000000..2df06f33f7 --- /dev/null +++ b/system/units/appscale-backend.target @@ -0,0 +1,4 @@ +[Unit] +Description=AppScale GTS backend/foundational services +Before=appscale-down.target +Conflicts=appscale-down.target \ No newline at end of file diff --git a/system/units/appscale-blobstore.service b/system/units/appscale-blobstore.service new file mode 100644 index 0000000000..83433caff4 --- /dev/null +++ b/system/units/appscale-blobstore.service @@ -0,0 +1,12 @@ +[Unit] +Description=AppScale Blobstore +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Environment=APPSCALE_BS_OPTION_PORT=6107 APPSCALE_DATASTORE_SERVICE=127.0.0.1:8888 +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-blobstore-server -p ${APPSCALE_BS_OPTION_PORT} -d ${APPSCALE_DATASTORE_SERVICE} +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-cassandra.service b/system/units/appscale-cassandra.service new file mode 100644 index 0000000000..a21d031c39 --- /dev/null +++ b/system/units/appscale-cassandra.service @@ -0,0 +1,26 @@ +[Unit] +Description=AppScale Cassandra +Before=appscale-backend.target +PartOf=appscale-backend.target +After=network-online.target time-sync.target + +[Service] +Type=forking +User=cassandra +PermissionsStartOnly=true +ExecStartPre=/bin/mkdir -p /opt/appscale/cassandra +ExecStartPre=/bin/chown -R cassandra /opt/appscale/cassandra +ExecStart=/opt/cassandra/cassandra/bin/cassandra +GuessMainPID=yes +SuccessExitStatus=0 143 +KillMode=mixed +TimeoutStartSec=60 +TimeoutStopSec=60 +SyslogIdentifier=cassandra +# Limits +LimitAS=infinity +LimitMEMLOCK=infinity +LimitNPROC=32768 + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale-celery@.service b/system/units/appscale-celery@.service new file mode 100644 index 0000000000..81c63d9554 --- /dev/null +++ b/system/units/appscale-celery@.service @@ -0,0 +1,25 @@ +[Unit] +Description=AppScale Celery Worker for %i +Before=appscale-instance.target +After=appscale-instance-run@.service +PartOf=appscale-instance.target + +[Service] +Environment=C_FORCE_ROOT=True APP_ID=%i APPSCALE_TQ_OPTION_CONCURRENCY=1000 HOST=127.0.0.1 +ExecStart=/opt/appscale_venvs/appscale_taskqueue/bin/celery worker \ + --app=appscale.taskqueue.push_worker \ + --concurrency=${APPSCALE_TQ_OPTION_CONCURRENCY} \ + --pool=eventlet \ + --hostname=%i \ + --time-limit=610 \ + --soft-time-limit=600 \ + --logfile=/var/log/appscale/celery_workers/%i.log \ + --statedb=/opt/appscale/celery/worker___%i.db \ + --workdir=/etc/appscale/celery/workers \ + -Ofair +SyslogIdentifier=%p-%i +# Resource controls +MemoryLimit=1000M + +[Install] +WantedBy=appscale-instance.target \ No newline at end of file diff --git a/system/units/appscale-control.target b/system/units/appscale-control.target new file mode 100644 index 0000000000..d1019332df --- /dev/null +++ b/system/units/appscale-control.target @@ -0,0 +1,5 @@ +[Unit] +Description=AppScale Management/Controllers +Requires=appscale-routing.target +After=appscale-routing.target +Conflicts=appscale-down.target \ No newline at end of file diff --git a/system/units/appscale-controller.service b/system/units/appscale-controller.service index 79004adde5..d12d06508b 100644 --- a/system/units/appscale-controller.service +++ b/system/units/appscale-controller.service @@ -1,24 +1,17 @@ [Unit] Description=AppScale Controller AssertPathExists=/etc/appscale/secret.key +PartOf=appscale-control.target +Wants=appscale.target +Before=appscale-control.target After=network-online.target time-sync.target [Service] -Type=simple -Environment=APPSCALE_HOME=/root/appscale HOME=/root +Environment=HOME=/root ExecStart=/usr/bin/ruby -w /root/appscale/AppController/djinnServer.rb -TimeoutStopSec=30 -KillMode=process -Restart=on-failure -RestartSec=10 -SyslogIdentifier=controller -# Limits -LimitNOFILE=200000 -LimitNPROC=infinity +SyslogIdentifier=%p # Security -PrivateTmp=true -ProtectSystem=true CapabilityBoundingSet=CAP_DAC_OVERRIDE CAP_SETGID CAP_SETUID CAP_CHOWN CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_NET_ADMIN CAP_NET_RAW [Install] -WantedBy=multi-user.target \ No newline at end of file +WantedBy=appscale-control.target multi-user.target \ No newline at end of file diff --git a/system/units/appscale-down.target b/system/units/appscale-down.target new file mode 100644 index 0000000000..394322601c --- /dev/null +++ b/system/units/appscale-down.target @@ -0,0 +1,2 @@ +[Unit] +Description=Shutdown AppScale GTS services \ No newline at end of file diff --git a/system/units/appscale-ejabberd.target b/system/units/appscale-ejabberd.target new file mode 100644 index 0000000000..ed5d4d5c64 --- /dev/null +++ b/system/units/appscale-ejabberd.target @@ -0,0 +1,9 @@ +[Unit] +Description=AppScale ejabberd +PartOf=appscale-backend.target +Before=appscale-backend.target +After=ejabberd.service +Requires=ejabberd.service + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale-flower.service b/system/units/appscale-flower.service new file mode 100644 index 0000000000..88b895bc2f --- /dev/null +++ b/system/units/appscale-flower.service @@ -0,0 +1,12 @@ +[Unit] +Description=AppScale Flower +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Environment=APPSCALE_FLOWER_OPTION_AUTH=--basic_auth=appscale:appscale +ExecStart=/usr/bin/flower $APPSCALE_FLOWER_OPTION_AUTH +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-groomer.service b/system/units/appscale-groomer.service new file mode 100644 index 0000000000..cd38eb62e7 --- /dev/null +++ b/system/units/appscale-groomer.service @@ -0,0 +1,13 @@ +[Unit] +Description=AppScale Datastore Groomer +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-groomer-service +SyslogIdentifier=%p +# Resource controls +MemoryLimit=512M + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-haproxy.service b/system/units/appscale-haproxy.service new file mode 100644 index 0000000000..e018ab2be4 --- /dev/null +++ b/system/units/appscale-haproxy.service @@ -0,0 +1,18 @@ +[Unit] +Description=AppScale Services HAProxy Load Balancer +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Type=notify +Environment=CONFIG=/etc/haproxy/service-haproxy.cfg +ExecStartPre=/usr/sbin/haproxy -f ${CONFIG} -c -q +ExecStart=/usr/sbin/haproxy -Ws -f ${CONFIG} -p /run/appscale/service-haproxy.pid $EXTRAOPTS +ExecReload=/usr/sbin/haproxy -f ${CONFIG} -c -q +ExecReload=/bin/kill -USR2 $MAINPID +SuccessExitStatus=0 143 +KillMode=mixed +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-hermes.service b/system/units/appscale-hermes.service new file mode 100644 index 0000000000..d0e553982c --- /dev/null +++ b/system/units/appscale-hermes.service @@ -0,0 +1,11 @@ +[Unit] +Description=AppScale Hermes +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-hermes $APPSCALE_OPTION_VERBOSE +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-infrastructure@.service b/system/units/appscale-infrastructure@.service new file mode 100644 index 0000000000..9e1a77b012 --- /dev/null +++ b/system/units/appscale-infrastructure@.service @@ -0,0 +1,13 @@ +[Unit] +Description=AppScale Infrastructure Manager +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Environment=APPSCALE_IM_OPTION_PORT=17444 +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-infrastructure -p ${APPSCALE_IM_OPTION_PORT} $APPSCALE_OPTION_VERBOSE $APPSCALE_IM_OPTIONS +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target +DefaultInstance=basic \ No newline at end of file diff --git a/system/units/appscale-instance-manager.service b/system/units/appscale-instance-manager.service new file mode 100644 index 0000000000..40538b93b9 --- /dev/null +++ b/system/units/appscale-instance-manager.service @@ -0,0 +1,11 @@ +[Unit] +Description=AppScale Application Instance Manager +Before=appscale-control.target +PartOf=appscale-control.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-instance-manager +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-control.target \ No newline at end of file diff --git a/system/units/appscale-instance-run@.service b/system/units/appscale-instance-run@.service new file mode 100644 index 0000000000..f5e90480dc --- /dev/null +++ b/system/units/appscale-instance-run@.service @@ -0,0 +1,14 @@ +[Unit] +Description=AppScale Instance for %i +Before=appscale-instance.target +PartOf=appscale-instance.target +AssertPathExists=/run/appscale/apps/command_%i + +[Service] +ExecStart=/bin/bash /run/appscale/apps/command_%i +SyslogIdentifier=%p-%i +# Resource controls +MemoryLimit=2G + +[Install] +WantedBy=appscale-instance.target \ No newline at end of file diff --git a/system/units/appscale-instance.target b/system/units/appscale-instance.target new file mode 100644 index 0000000000..b17c9f5057 --- /dev/null +++ b/system/units/appscale-instance.target @@ -0,0 +1,5 @@ +[Unit] +Description=AppScale Application Instances +Requires=appscale-service.target +After=appscale-service.target +Conflicts=appscale-down.target \ No newline at end of file diff --git a/system/units/appscale-logserver.service b/system/units/appscale-logserver.service new file mode 100644 index 0000000000..e1500fcdf5 --- /dev/null +++ b/system/units/appscale-logserver.service @@ -0,0 +1,12 @@ +[Unit] +Description=AppScale LogServer +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Environment=PYTHONPATH=/root/appscale/LogService/ +ExecStart=/usr/bin/python2 /usr/bin/twistd --nodaemon --pidfile= --syslog %p +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-memcached.service b/system/units/appscale-memcached.service new file mode 100644 index 0000000000..93fc6e59f3 --- /dev/null +++ b/system/units/appscale-memcached.service @@ -0,0 +1,12 @@ +[Unit] +Description=AppScale Memcached +Before=appscale-backend.target +PartOf=appscale-backend.target +After=network-online.target time-sync.target + +[Service] +ExecStart=/usr/bin/memcached -m 64 -p 11211 -u root +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale-nginx.target b/system/units/appscale-nginx.target new file mode 100644 index 0000000000..857700fd60 --- /dev/null +++ b/system/units/appscale-nginx.target @@ -0,0 +1,9 @@ +[Unit] +Description=AppScale NGINX +PartOf=appscale-routing.target +Before=appscale-routing.target +After=nginx.service +Requires=nginx.service + +[Install] +WantedBy=appscale-routing.target \ No newline at end of file diff --git a/system/units/appscale-rabbitmq-server.target b/system/units/appscale-rabbitmq-server.target new file mode 100644 index 0000000000..ea3fba7b95 --- /dev/null +++ b/system/units/appscale-rabbitmq-server.target @@ -0,0 +1,9 @@ +[Unit] +Description=AppScale RabbitMQ Server +PartOf=appscale-backend.target +Before=appscale-backend.target +After=rabbitmq-server.service +Requires=rabbitmq-server.service + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale-routing.target b/system/units/appscale-routing.target new file mode 100644 index 0000000000..aa7757f8f2 --- /dev/null +++ b/system/units/appscale-routing.target @@ -0,0 +1,5 @@ +[Unit] +Description=AppScale routing +Requires=appscale-instance.target +After=appscale-instance.target +Conflicts=appscale-down.target \ No newline at end of file diff --git a/system/units/appscale-search.service b/system/units/appscale-search.service new file mode 100644 index 0000000000..51097f67ea --- /dev/null +++ b/system/units/appscale-search.service @@ -0,0 +1,11 @@ +[Unit] +Description=AppScale Search +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_HOME}/SearchService/search_server.py $APPSCALE_OPTION_VERBOSE +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-service.target b/system/units/appscale-service.target new file mode 100644 index 0000000000..73eab11d44 --- /dev/null +++ b/system/units/appscale-service.target @@ -0,0 +1,5 @@ +[Unit] +Description=AppScale Intermediate Services +Requires=appscale-backend.target +After=appscale-backend.target +Conflicts=appscale-down.target \ No newline at end of file diff --git a/system/units/appscale-solr.service b/system/units/appscale-solr.service new file mode 100644 index 0000000000..31696789ae --- /dev/null +++ b/system/units/appscale-solr.service @@ -0,0 +1,16 @@ +[Unit] +Description=AppScale Solr +Before=appscale-backend.target +PartOf=appscale-backend.target + +[Service] +Type=forking +ExecStartPre=/bin/mkdir -p /opt/appscale/solr/data +ExecStartPre=/bin/cp -r ${APPSCALE_HOME}/SearchService/templates/schemaless-appscale /opt/appscale/solr/ +ExecStart=/bin/bash ${APPSCALE_HOME}/SearchService/solr/solr/bin/solr start -noprompt -solr.home /opt/appscale/solr/schemaless-appscale/solr/ +ExecStop=/bin/bash ${APPSCALE_HOME}/SearchService/solr/solr/bin/solr stop +KillMode=mixed +SyslogIdentifier=solr + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale-taskqueue@.service b/system/units/appscale-taskqueue@.service new file mode 100644 index 0000000000..7b914e986e --- /dev/null +++ b/system/units/appscale-taskqueue@.service @@ -0,0 +1,11 @@ +[Unit] +Description=AppScale Task Queue on Port %i +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/opt/appscale_venvs/appscale_taskqueue/bin/python2 /opt/appscale_venvs/appscale_taskqueue/bin/appscale-taskqueue -p %i $APPSCALE_OPTION_VERBOSE +SyslogIdentifier=%p-%i + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-transaction-groomer.service b/system/units/appscale-transaction-groomer.service new file mode 100644 index 0000000000..f652806850 --- /dev/null +++ b/system/units/appscale-transaction-groomer.service @@ -0,0 +1,13 @@ +[Unit] +Description=AppScale Datastore Transaction Groomer +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-transaction-groomer $APPSCALE_OPTION_VERBOSE +SyslogIdentifier=%p +# Resource controls +MemoryLimit=512M + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-uaserver.service b/system/units/appscale-uaserver.service new file mode 100644 index 0000000000..ee122e39d8 --- /dev/null +++ b/system/units/appscale-uaserver.service @@ -0,0 +1,11 @@ +[Unit] +Description=AppScale UAServer +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-uaserver $APPSCALE_UASERVER_OPTIONS +SyslogIdentifier=%p + +[Install] +WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-xmpp@.service b/system/units/appscale-xmpp@.service new file mode 100644 index 0000000000..ee913d4ec5 --- /dev/null +++ b/system/units/appscale-xmpp@.service @@ -0,0 +1,13 @@ +[Unit] +Description=AppScale XMPP Receiver for %i +After=appscale-instance-run@.service +Before=appscale-instance.target +PartOf=appscale-instance.target +AssertPathExists=/run/appscale/apps/xmpp_command_%i + +[Service] +ExecStart=/bin/bash /run/appscale/apps/xmpp_command_%i +SyslogIdentifier=%p-%i + +[Install] +WantedBy=appscale-instance.target \ No newline at end of file diff --git a/system/units/appscale-zookeeper.target b/system/units/appscale-zookeeper.target new file mode 100644 index 0000000000..58ce676c76 --- /dev/null +++ b/system/units/appscale-zookeeper.target @@ -0,0 +1,9 @@ +[Unit] +Description=AppScale Zookeeper +PartOf=appscale-backend.target +Before=appscale-backend.target +After=zookeeper.service +Requires=zookeeper.service + +[Install] +WantedBy=appscale-backend.target \ No newline at end of file diff --git a/system/units/appscale.target b/system/units/appscale.target new file mode 100644 index 0000000000..6287f756b5 --- /dev/null +++ b/system/units/appscale.target @@ -0,0 +1,5 @@ +[Unit] +Description=AppScale GTS +Wants=appscale-control.target +After=appscale-control.target +Conflicts=appscale-down.target From 1259e77e9c38af54086d907166854fe500f693fd Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:31:18 -0700 Subject: [PATCH 094/221] Updates for systemd, logserver move unix socket under /run/appscale --- .../google/appengine/api/logservice/logservice_stub.py | 2 -- LogService/logquery.py | 9 ++++++--- LogService/twisted/plugins/appscale_logserver_plugin.py | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/AppServer/google/appengine/api/logservice/logservice_stub.py b/AppServer/google/appengine/api/logservice/logservice_stub.py index 765e3aa279..7dc6858a2c 100644 --- a/AppServer/google/appengine/api/logservice/logservice_stub.py +++ b/AppServer/google/appengine/api/logservice/logservice_stub.py @@ -83,8 +83,6 @@ def _fill_request_log(requestLog, log, include_app_logs): class LogServiceStub(apiproxy_stub.APIProxyStub): """Python stub for Log Service service.""" - _LOGSERVER_PATH = '/tmp/.appscale_logserver' - THREADSAFE = True _ACCEPTS_REQUEST_ID = True diff --git a/LogService/logquery.py b/LogService/logquery.py index 497fc297be..62b9530c3c 100755 --- a/LogService/logquery.py +++ b/LogService/logquery.py @@ -139,13 +139,16 @@ def query_or_follow(args): parser = argparse.ArgumentParser(description='Query AppScale logserver.') parser.add_argument('--app_id', type=str, required=True, help='app_id') parser.add_argument('--version', type=str, required=True, help='app version') - parser.add_argument('--con', type=str, nargs='?', default='unix:///tmp/.appscale_logserver', help='Connection eg tcp://10.10.10.10:1010. (Default local log server)') + parser.add_argument('--con', type=str, nargs='?', default='unix:///run/appscale/logserver.sock', + help='Connection eg tcp://10.10.10.10:1010. (Default local log server)') parser.add_argument('--start', type=int, nargs='?', help='start epoch timestamp') parser.add_argument('--end', type=int, nargs='?', help='end epoch timestamp') parser.add_argument('--ids', type=str, nargs='+', help='requestIds') parser.add_argument('--count', type=int, nargs='?', help='count', default=10) - parser.add_argument('--format', type=str, choices=['http', 'appengine', 'plain'], nargs='?', help='output format', default='appengine') - parser.add_argument('--mode', type=str, choices=['query', 'follow', 'log'], nargs='?', help='mode', default='query') + parser.add_argument('--format', type=str, choices=['http', 'appengine', 'plain'], nargs='?', + help='output format', default='appengine') + parser.add_argument('--mode', type=str, choices=['query', 'follow', 'log'], nargs='?', + help='mode', default='query') parser.add_argument('--reverse', action='store_true', help='reverse log order', default=False) args = parser.parse_args() #import pdb; pdb.set_trace() diff --git a/LogService/twisted/plugins/appscale_logserver_plugin.py b/LogService/twisted/plugins/appscale_logserver_plugin.py index 60255e90a5..bc48ae82e7 100644 --- a/LogService/twisted/plugins/appscale_logserver_plugin.py +++ b/LogService/twisted/plugins/appscale_logserver_plugin.py @@ -13,7 +13,7 @@ class Options(usage.Options): optParameters = [["port", "p", 7422, "The port number to listen on."], ["path", "a", "/opt/appscale/logserver", "Path where logs are stored."], ["size", "s", 2, "Size in GiB of retention of logs."], - ["unix_socket", "u", "/tmp/.appscale_logserver", "Path for unix socket to logserver."]] + ["unix_socket", "u", "/run/appscale/logserver.sock", "Path for unix socket to logserver."]] @implementer(IServiceMaker, IPlugin) @@ -36,9 +36,9 @@ def makeService(self, options): unix_news_server = internet.UNIXServer(options["unix_socket"], logserver_factory) unix_news_server.setServiceParent(application) log.startLogging(sys.stdout) - log.msg("Log Service started with parameters: port: {} path:{} " + log.msg("Log Service started with parameters: port:{} path:{} " "size:{} unix_socket:{}".format(options.get("port"), - options.get("port"), + options.get("path"), options.get("size"), options.get("unix_socket"))) return application From 32dd2cab50824b80f46ee7104a53c991ace3d837 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:31:28 -0700 Subject: [PATCH 095/221] Updates for systemd, hermes stats use systemctl not monit --- .../hermes/producers/process_stats.py | 54 +++++---- .../hermes/producers/tests/test_process.py | 86 ++++---------- .../tests/test_unified_service_names.py | 71 +++++------- .../appscale/hermes/unified_service_names.py | 107 ++++++++++-------- 4 files changed, 141 insertions(+), 177 deletions(-) diff --git a/Hermes/appscale/hermes/producers/process_stats.py b/Hermes/appscale/hermes/producers/process_stats.py index 36d81aaa9e..47e54647e9 100644 --- a/Hermes/appscale/hermes/producers/process_stats.py +++ b/Hermes/appscale/hermes/producers/process_stats.py @@ -10,8 +10,8 @@ from appscale.common import appscale_info from appscale.hermes.converter import Meta, include_list_name -from appscale.hermes.unified_service_names import \ - find_service_by_monit_name +from appscale.hermes.unified_service_names import ( + find_service_by_external_name, systemd_mapper) logger = logging.getLogger(__name__) @@ -67,17 +67,17 @@ class ProcessStats(object): Every Hermes node collects its processes statistics, but Master node also requests this statistics of all nodes in cluster. - All processes started by monit should be profiled. + AppSsale services started by systemd should be profiled. """ pid = attr.ib() - monit_name = attr.ib() + monit_name = attr.ib() # Monit / external name unified_service_name = attr.ib() #| These 4 fields are primary key application_id = attr.ib() #| for an instance of appscale service private_ip = attr.ib() #| - Application ID can be None if port = attr.ib() #| process is not related to specific app #| - port can be missed if it is not - #| mentioned in monit process name + #| mentioned in the external name cmdline = attr.ib() cpu = attr.ib(metadata={Meta.ENTITY: ProcessCPU}) memory = attr.ib(metadata={Meta.ENTITY: ProcessMemory}) @@ -94,10 +94,13 @@ class ProcessesStatsSnapshot(object): processes_stats = attr.ib(metadata={Meta.ENTITY_LIST: ProcessStats}) -MONIT_PROCESS_PATTERN = re.compile( - r"^Process \'(?P[^']+)\' *\n" - r"(^ .*\n)*?" - r"^ pid +(?P\d+)\n", +SYSTEMCTL_SHOW = ( + 'systemctl', '--type=service', '--state=active', '--property=Id,MainPID', + 'show', '*.service' +) +SYSTEMCTL_SHOW_PATTERN = re.compile( + r"^MainPID=(?P\d+)\n" + r"^Id=(?P[a-zA-Z0-9@_-]+\.service)\n", re.MULTILINE ) PROCESS_ATTRS = ( @@ -111,30 +114,33 @@ class ProcessesStatsSource(object): @staticmethod def get_current(): """ Method for building a list of ProcessStats. - It parses output of `monit status` and generates ProcessStats object - for each monitored service. + It parses output of `systemctl show` and generates ProcessStats object + for each service of interest. Returns: An instance ofProcessesStatsSnapshot. """ start = time.time() - monit_status = subprocess.check_output('monit status', shell=True) + systemctl_show = subprocess.check_output(SYSTEMCTL_SHOW) processes_stats = [] private_ip = appscale_info.get_private_ip() - for match in MONIT_PROCESS_PATTERN.finditer(monit_status): - monit_name = match.group('name') + for match in SYSTEMCTL_SHOW_PATTERN.finditer(systemctl_show): + systemd_name = match.group('name') pid = int(match.group('pid')) - service = find_service_by_monit_name(monit_name) + service = find_service_by_external_name(systemd_name, + default_mapper=systemd_mapper) + if service is None: + continue try: - stats = _process_stats(pid, service, monit_name, private_ip) + stats = _process_stats(pid, service, systemd_name, private_ip) processes_stats.append(stats) except psutil.Error as err: - logger.warn(u"Unable to get process stats for {monit_name} ({err})" - .format(monit_name=monit_name, err=err)) + logger.warn(u"Unable to get process stats for {name} ({err})" + .format(name=service.name, err=err)) # Add processes managed by the ServiceManager. for server in ServiceManager.get_state(): - service = find_service_by_monit_name(server.monit_name) + service = find_service_by_external_name(server.monit_name) try: stats = _process_stats(server.process.pid, service, server.monit_name, private_ip) @@ -152,7 +158,7 @@ def get_current(): return stats -def _process_stats(pid, service, monit_name, private_ip): +def _process_stats(pid, service, ext_name, private_ip): """ Static method for building an instance of ProcessStats. It summarize stats of the specified process and its children. @@ -160,7 +166,7 @@ def _process_stats(pid, service, monit_name, private_ip): pid: A string representing Process ID to describe. service: An instance of unified_service_names.Service which corresponds to this process. - monit_name: A string, name of corresponding monit process. + ext_name: A string, name of corresponding external service/process. Returns: An object of ProcessStats with detailed explanation of resources used by the specified process and its children. @@ -221,9 +227,9 @@ def _process_stats(pid, service, monit_name, private_ip): ) return ProcessStats( - pid=pid, monit_name=monit_name, unified_service_name=service.name, - application_id=service.get_application_id_by_monit_name(monit_name), - port=service.get_port_by_monit_name(monit_name), private_ip=private_ip, + pid=pid, monit_name=ext_name, unified_service_name=service.name, + application_id=service.get_application_id_by_external_name(ext_name), + port=service.get_port_by_external_name(ext_name), private_ip=private_ip, cmdline=process_info['cmdline'], cpu=cpu, memory=memory, disk_io=disk_io, network=network, threads_num=threads_num, children_stats_sum=children_sum, children_num=len(children_info) diff --git a/Hermes/appscale/hermes/producers/tests/test_process.py b/Hermes/appscale/hermes/producers/tests/test_process.py index e9e57bd930..cbecf77c06 100644 --- a/Hermes/appscale/hermes/producers/tests/test_process.py +++ b/Hermes/appscale/hermes/producers/tests/test_process.py @@ -6,58 +6,12 @@ from appscale.hermes.unified_service_names import ServicesEnum from appscale.hermes.producers import process_stats -MONIT_STATUS = """ -The Monit daemon 5.6 uptime: 20h 22m +SYSTEMCTL_SHOW = """ +MainPID=8466 +Id=appscale-haproxy.service -Process 'haproxy' - status Running - monitoring status Monitored - pid 8466 - parent pid 1 - uptime 20h 21m - children 0 - memory kilobytes 8140 - memory kilobytes total 8140 - memory percent 0.2% - memory percent total 0.2% - cpu percent 0.0% - cpu percent total 0.0% - data collected Wed, 19 Apr 2017 14:15:29 - -File 'groomer_file_check' - status Accessible - monitoring status Monitored - permission 644 - -Process 'appmanagerserver' - status Not monitored - monitoring status Not monitored - data collected Wed, 19 Apr 2017 13:49:44 - -Process 'app___my-25app-20003' - status Running - monitoring status Monitored - pid 5045 - parent pid 5044 - uptime 21h 41m - children 1 - memory kilobytes 65508 - memory kilobytes total 132940 - memory percent 1.7% - memory percent total 3.5% - cpu percent 0.0% - cpu percent total 0.0% - port response time 0.000s to 10.10.9.111:20000 [DEFAULT via TCP] - data collected Wed, 19 Apr 2017 14:18:33 - -System 'appscale-image0' - status Running - monitoring status Monitored - load average [0.23] [0.40] [0.46] - cpu 2.8%us 2.4%sy 1.3%wa - memory usage 2653952 kB [70.7%] - swap usage 0 kB [0.0%] - data collected Wed, 19 Apr 2017 14:15:29 +MainPID=5045 +Id=appscale-instance-run@appscaledashboard_default_v1_1566168050028-20000.service """ @@ -66,10 +20,10 @@ class TestCurrentProcessesStats(unittest.TestCase): @patch.object(process_stats.appscale_info, 'get_private_ip') @patch.object(process_stats, '_process_stats') @patch.object(process_stats.subprocess, 'check_output') - def test_reading_monit_status(self, mock_check_output, mock_process_stats, - mock_get_private_ip): - # Mocking `monit status` output and appscale_info.get_private_ip - mock_check_output.return_value = MONIT_STATUS + def test_reading_systemd_status(self, mock_check_output, mock_process_stats, + mock_get_private_ip): + # Mocking `systemctl show` output and appscale_info.get_private_ip + mock_check_output.return_value = SYSTEMCTL_SHOW mock_get_private_ip.return_value = '1.1.1.1' # Calling method under test @@ -77,8 +31,8 @@ def test_reading_monit_status(self, mock_check_output, mock_process_stats, # Checking expectations mock_process_stats.assert_has_calls([ - call(8466, ServicesEnum.HAPROXY, 'haproxy', '1.1.1.1'), - call(5045, ServicesEnum.APPLICATION, 'app___my-25app-20003', '1.1.1.1') + call(8466, ServicesEnum.SERVICE_HAPROXY, 'appscale-haproxy.service', '1.1.1.1'), + call(5045, ServicesEnum.APPLICATION, 'appscale-instance-run@appscaledashboard_default_v1_1566168050028-20000.service', '1.1.1.1') ]) self.assertIsInstance(snapshot, process_stats.ProcessesStatsSnapshot) @@ -87,13 +41,13 @@ def test_reading_monit_status(self, mock_check_output, mock_process_stats, @patch.object(process_stats.logger, 'warn') def test_process_stats(self, mock_logging_warn, mock_check_output, mock_get_private_ip): - # Mocking `monit status` output and appscale_info.get_private_ip - mock_check_output.return_value = ( - "Process 'app___fakeapp-testprocess-321'\n" - " pid {mypid}\n" - "Process 'proc-with-invalid-PID'\n" - " pid 70000\n".format(mypid=os.getpid()) - ) + # Mocking `systemctl show` output and appscale_info.get_private_ip + mock_check_output.return_value = """\ +MainPID={mypid} +Id=appscale-instance-run@fakeapp-testprocess-321.service + +MainPID=70000 +Id=appscale-proc-with-invalid-PID.service\n""".format(mypid=os.getpid()) mock_get_private_ip.return_value = '10.10.11.12' # Calling method under test @@ -103,14 +57,14 @@ def test_process_stats(self, mock_logging_warn, mock_check_output, self.assertIsInstance(stats_snapshot.utc_timestamp, float) processes_stats = stats_snapshot.processes_stats mock_logging_warn.assert_called_once_with( - "Unable to get process stats for proc-with-invalid-PID " + "Unable to get process stats for proc_with_invalid_PID " "(psutil.NoSuchProcess no process found with pid 70000)" ) self.assertEqual(len(processes_stats), 1) stats = processes_stats[0] self.assertIsInstance(stats, process_stats.ProcessStats) self.assertEqual(stats.pid, os.getpid()) - self.assertEqual(stats.monit_name, 'app___fakeapp-testprocess-321') + self.assertEqual(stats.monit_name, 'appscale-instance-run@fakeapp-testprocess-321.service') self.assertEqual(stats.unified_service_name, 'application') self.assertEqual(stats.application_id, 'fakeapp-testprocess') self.assertEqual(stats.private_ip, '10.10.11.12') diff --git a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py index e24b779551..1d3ff59c73 100644 --- a/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py +++ b/Hermes/appscale/hermes/producers/tests/test_unified_service_names.py @@ -1,68 +1,59 @@ import unittest from appscale.hermes.unified_service_names import ( - ServicesEnum, find_service_by_monit_name, find_service_by_pxname, Service + ServicesEnum, find_service_by_external_name, find_service_by_pxname, Service ) -class TestMonitNames(unittest.TestCase): +class TestExternalNames(unittest.TestCase): def test_search_for_known_service(self): - monit_name_to_expectation = { - 'uaserver': ServicesEnum.UASERVER, - 'taskqueue-17448': ServicesEnum.TASKQUEUE, + external_name_to_expectation = { + 'appscale-admin.service': ServicesEnum.ADMIN, + 'appscale-uaserver.service': ServicesEnum.UASERVER, + 'appscale-taskqueue@17448.service': ServicesEnum.TASKQUEUE, 'datastore_server-4002': ServicesEnum.DATASTORE, - 'blobstore': ServicesEnum.BLOBSTORE, - 'app___superapp-20005': ServicesEnum.APPLICATION, - 'zookeeper': ServicesEnum.ZOOKEEPER, - 'rabbitmq': ServicesEnum.RABBITMQ, - 'nginx': ServicesEnum.NGINX, - 'log_service': ServicesEnum.LOG_SERVICE, - 'iaas_manager': ServicesEnum.IAAS_MANAGER, - 'hermes': ServicesEnum.HERMES, - 'haproxy': ServicesEnum.HAPROXY, - 'groomer_service': ServicesEnum.GROOMER, - 'flower': ServicesEnum.FLOWER, - 'ejabberd': ServicesEnum.EJABBERD, - 'controller': ServicesEnum.CONTROLLER, - 'celery-snowmachineapp-9999': ServicesEnum.CELERY, - 'cassandra': ServicesEnum.CASSANDRA, - 'backup_recovery_service': ServicesEnum.BACKUP_RECOVERY_SERVICE, - 'memcached': ServicesEnum.MEMCACHED, - 'appmanagerserver': ServicesEnum.APPMANAGER, + 'appscale-blobstore.service': ServicesEnum.BLOBSTORE, + 'appscale-instance-run@superapp-20005.service': ServicesEnum.APPLICATION, + 'zookeeper.service': ServicesEnum.ZOOKEEPER, + 'rabbitmq-server.service': ServicesEnum.RABBITMQ, + 'nginx.service': ServicesEnum.NGINX, + 'appscale-logserver.service': ServicesEnum.LOG_SERVICE, + 'appscale-infrastructure@basic.service': ServicesEnum.IAAS_MANAGER, + 'appscale-infrastructure@shadow.service': ServicesEnum.IAAS_MANAGER, + 'appscale-haproxy.service': ServicesEnum.SERVICE_HAPROXY, + 'ejabberd.service': ServicesEnum.EJABBERD, + 'appscale-celery@snowmachineapp.service': ServicesEnum.CELERY, + 'appscale-instance-manager.service': ServicesEnum.APPMANAGER, } - for monit_name, expected in monit_name_to_expectation.iteritems(): - self.assertEqual(find_service_by_monit_name(monit_name), expected) + for external_name, expected in external_name_to_expectation.iteritems(): + self.assertEqual(find_service_by_external_name(external_name), expected) def test_search_for_unknown_service(self): - service = find_service_by_monit_name('irrelevant-monit-process') - self.assertEqual(service.name, 'irrelevant-monit-process') + service = find_service_by_external_name('irrelevant-process') + self.assertEqual(service.name, 'irrelevant-process') def test_parsing_application_id(self): # Celery service celery = ServicesEnum.CELERY - app = celery.get_application_id_by_monit_name('celery-app-ppa-9999') + app = celery.get_application_id_by_external_name('appscale-celery@app-ppa.service') self.assertEqual(app, 'app-ppa') # Application service application = ServicesEnum.APPLICATION - app = application.get_application_id_by_monit_name('app___appppa-20008') + app = application.get_application_id_by_external_name('appscale-instance-run@appppa-20008.service') self.assertEqual(app, 'appppa') def test_parsing_port(self): - # Celery service - celery = ServicesEnum.CELERY - port = celery.get_port_by_monit_name('celery-app-ppa-9999') - self.assertEqual(port, 9999) # Application service application = ServicesEnum.APPLICATION - port = application.get_port_by_monit_name('app___appppa-20008') + port = application.get_port_by_external_name('appscale-instance-run@appppa-20008.service') self.assertEqual(port, 20008) # Taskqueue service taskqueue = ServicesEnum.TASKQUEUE - port = taskqueue.get_port_by_monit_name('taskqueue-17448') + port = taskqueue.get_port_by_external_name('appscale-taskqueue@17448.service') self.assertEqual(port, 17448) # Datastore service datastore = ServicesEnum.DATASTORE - port = datastore.get_port_by_monit_name('datastore_server-4002') + port = datastore.get_port_by_external_name('datastore_server-4002') self.assertEqual(port, 4002) @@ -122,14 +113,14 @@ class TestUnknownService(unittest.TestCase): def test_unknown_service(self): service = Service(name='smth-out-of-stats-28') self.assertEqual(service.name, 'smth-out-of-stats-28') - # Application ID by unknown monit name - app = service.get_application_id_by_monit_name('smth-out-of-stats-28') + # Application ID by unknown external name + app = service.get_application_id_by_external_name('smth-out-of-stats-28') self.assertIsNone(app) # Application ID by unknown haproxy name app = service.get_application_id_by_pxname('smth-out-of-stats-1.1.1.1:2') self.assertIsNone(app) - # Port by unknown monit name - port = service.get_port_by_monit_name('smth-out-of-stats-28') + # Port by unknown external name + port = service.get_port_by_external_name('smth-out-of-stats-28') self.assertIsNone(port) # IP/Port by unknown haproxy ip, port = service.get_ip_port_by_svname('smth-out-of-stats-1.1.1.1:2') diff --git a/Hermes/appscale/hermes/unified_service_names.py b/Hermes/appscale/hermes/unified_service_names.py index 3a51617844..fa84104f75 100644 --- a/Hermes/appscale/hermes/unified_service_names.py +++ b/Hermes/appscale/hermes/unified_service_names.py @@ -11,18 +11,18 @@ class Service(object): An instance of this class correspond to specific family of AppScale services. e.g.: taskqueue, datastore, application (user application), cassandra, ... - It's able to recognize itself in monit name and haproxy proxy name. + It's able to recognize itself in external name and haproxy proxy name. Patterns which are used for recognition should also match application id, port and ip when possible. It's aimed to centralize parsing of service names - in monit output and haproxy stats. - It helps to define monit and haproxy name formats in a compact way. + in systemctl show output and haproxy stats. + It helps to define name formats in a compact way. """ name = attr.ib() - # monit_matcher have to contain 'app' and 'port' groups when possible - monit_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) + # name_matcher have to contain 'app' and 'port' groups when possible + name_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) # haproxy_proxy_matcher have to contain 'app' group when possible haproxy_proxy_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) @@ -30,15 +30,15 @@ class Service(object): # haproxy_server_matcher have to contain 'app', 'ip' and 'port' groups when possible haproxy_server_matcher = attr.ib(default=UNMATCHABLE, convert=re.compile) - def recognize_monit_process(self, monit_name): - """ Checks whether monit process corresponds to this service. + def recognize_external_name(self, external_name): + """ Checks whether the name corresponds to this service. Args: - monit_name: A string, name of process as it's shown in monit status. + external_name: A string, name from external namespace. Returns: - True if monit_name corresponds to this service, False otherwise. + True if external_name corresponds to this service, False otherwise. """ - return self.monit_matcher.match(monit_name) is not None + return self.name_matcher.match(external_name) is not None def recognize_haproxy_proxy(self, proxy_name): """ Checks whether haproxy proxy corresponds to this service. @@ -50,15 +50,15 @@ def recognize_haproxy_proxy(self, proxy_name): """ return self.haproxy_proxy_matcher.match(proxy_name) is not None - def get_application_id_by_monit_name(self, monit_name): - """ Parses monit_name and returns application ID if it was found. + def get_application_id_by_external_name(self, external_name): + """ Parses external_name and returns application ID if it was found. Args: - monit_name: A string, name of process as it's shown in monit status. + external_name: A string, name of external service/process. Returns: A string representing App ID, or None if it wasn't found. """ - match = self.monit_matcher.match(monit_name) + match = self.name_matcher.match(external_name) if not match: return None try: @@ -66,15 +66,15 @@ def get_application_id_by_monit_name(self, monit_name): except IndexError: return None - def get_port_by_monit_name(self, monit_name): - """ Parses monit_name and returns port if it was found. + def get_port_by_external_name(self, external_name): + """ Parses external_name and returns port if it was found. Args: - monit_name: A string, name of process as it's shown in monit status. + external_name: A string, name of external service/process. Returns: An integer representing port, or None if it wasn't found. """ - match = self.monit_matcher.match(monit_name) + match = self.name_matcher.match(external_name) try: port_group = match.group('port') if match else None return int(port_group) if port_group else None @@ -121,54 +121,50 @@ def get_ip_port_by_svname(self, svname): class ServicesEnum(object): - # Known by both (Monit and HAProxy) + # Known by both (Systemd and HAProxy) UASERVER = Service( - name='uaserver', monit_matcher='^uaserver$', + name='uaserver', name_matcher='^appscale-uaserver.service$', haproxy_proxy_matcher=r'^UserAppServer$', haproxy_server_matcher=r'^UserAppServer-(?P[\d.]+):(?P\d+)$' ) TASKQUEUE = Service( name='taskqueue', - monit_matcher=r'^taskqueue-(?P\d+)$', + name_matcher=r'^appscale-taskqueue@(?P\d+).service$', haproxy_proxy_matcher='^TaskQueue$', haproxy_server_matcher=r'^TaskQueue-(?P[\d.]+):(?P\d+)$' ) DATASTORE = Service( - name='datastore', monit_matcher=r'^datastore_server-(?P\d+)$', + name='datastore', name_matcher=r'^datastore_server-(?P\d+)$', haproxy_proxy_matcher='^appscale-datastore_server$', haproxy_server_matcher=r'^appscale-datastore_server-(?P[\d.]+):(?P\d+)$' ) BLOBSTORE = Service( - name='blobstore', monit_matcher='^blobstore$', + name='blobstore', name_matcher='^appscale-blobstore.service$', haproxy_proxy_matcher='^as_blob_server$', haproxy_server_matcher=r'^as_blob_server-(?P[\d.]+):(?P\d+)$' ) APPLICATION = Service( name='application', - monit_matcher=r'^app___(?P[\w_-]+)-(?P\d+)$', + name_matcher=r'^appscale-instance-run@(?P[\w_-]+)-(?P\d+).service$', haproxy_proxy_matcher=r'^gae_(?P[\w_-]+)$', haproxy_server_matcher=r'^gae_(?P[\w_-]+)-(?P[\d.]+):(?P\d+)$' ) - # Known only on Monit side - ZOOKEEPER = Service(name='zookeeper', monit_matcher='^zookeeper$') - RABBITMQ = Service(name='rabbitmq', monit_matcher='^rabbitmq$') - NGINX = Service(name='nginx', monit_matcher='^nginx$') - LOG_SERVICE = Service(name='log_service', monit_matcher='^log_service$') - IAAS_MANAGER = Service(name='iaas_manager', monit_matcher='^iaas_manager$') - HERMES = Service(name='hermes', monit_matcher='^hermes$') - HAPROXY = Service(name='haproxy', monit_matcher='^haproxy$') - GROOMER = Service(name='groomer', monit_matcher='^groomer_service$') - FLOWER = Service(name='flower', monit_matcher='^flower$') - EJABBERD = Service(name='ejabberd', monit_matcher='^ejabberd$') - CONTROLLER = Service(name='controller', monit_matcher='^controller$') + # Known only on systemd side, defaults are added for each + # appscale-XXX.service if no mapping is present + ZOOKEEPER = Service(name='zookeeper', name_matcher='^zookeeper.service$') + RABBITMQ = Service(name='rabbitmq', name_matcher='^rabbitmq-server.service$') + NGINX = Service(name='nginx', name_matcher='^nginx.service$') + LOG_SERVICE = Service(name='log_service', name_matcher='^appscale-logserver.service$') + IAAS_MANAGER = Service(name='iaas_manager', name_matcher='^appscale-infrastructure@(basic|shadow).service$') + EJABBERD = Service(name='ejabberd', name_matcher='^ejabberd.service$') + ADMIN = Service(name='admin_server', name_matcher='^appscale-admin.service$') CELERY = Service(name='celery', - monit_matcher=r'^celery-(?P[\w_-]+)-(?P\d+)$') - CASSANDRA = Service(name='cassandra', monit_matcher='^cassandra$') - BACKUP_RECOVERY_SERVICE = Service(name='backup_recovery_service', - monit_matcher='^backup_recovery_service$') - MEMCACHED = Service(name='memcached', monit_matcher='^memcached$') - APPMANAGER = Service(name='appmanager', monit_matcher='^appmanagerserver$') + name_matcher=r'^appscale-celery@(?P[\w_-]+).service$') + CRON = Service(name='crond', + name_matcher=r'^cron.service$') + APPMANAGER = Service(name='appmanager', name_matcher='^appscale-instance-manager.service$') + SERVICE_HAPROXY = Service(name='service_haproxy', name_matcher='^appscale-haproxy.service$') KNOWN_SERVICES = [ @@ -180,6 +176,20 @@ class ServicesEnum(object): } +def systemd_mapper(external_name): + """ Map a systemd service name to a Hermes name. + + This will ignore instance of templated services which would require + special handling for any instance parameters (e.g. port) + + This mapping can be used with `find_service_by_external_name` + """ + if (external_name.startswith('appscale-') and + external_name.endswith('.service') and + not '@' in external_name): + return external_name[9:-8].replace('-','_') + return None + def find_service_by_pxname(proxy_name): # Try to find service corresponding to the proxy_name known_service = next(( @@ -192,13 +202,16 @@ def find_service_by_pxname(proxy_name): return Service(name=proxy_name) -def find_service_by_monit_name(monit_name): - # Try to find service corresponding to the monit_name +def find_service_by_external_name(external_name, default_mapper=str): + # Try to find service corresponding to the external_name known_service = next(( service for service in KNOWN_SERVICES - if service.recognize_monit_process(monit_name) + if service.recognize_external_name(external_name) ), None) if known_service: return known_service - # Return new default dummy service if the monit_name is not recognized - return Service(name=monit_name) + # Return new default dummy service if the external_name is not recognized + mapped_name = default_mapper(external_name) + if mapped_name is None: + return None + return Service(name=mapped_name) From 6284bbca7fb9e2fda4e7eb278eb0d0c577106a61 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:31:39 -0700 Subject: [PATCH 096/221] Updates for systemd, rsyslog.d files --- .../rsyslog.d/09-appscale.conf | 0 system/rsyslog.d/10-appscale-controller.conf | 5 -- system/rsyslog.d/20-appscale-service.conf | 61 +++++++++++++++++++ 3 files changed, 61 insertions(+), 5 deletions(-) rename common/appscale/common/templates/rsyslog-template.conf => system/rsyslog.d/09-appscale.conf (100%) delete mode 100644 system/rsyslog.d/10-appscale-controller.conf create mode 100644 system/rsyslog.d/20-appscale-service.conf diff --git a/common/appscale/common/templates/rsyslog-template.conf b/system/rsyslog.d/09-appscale.conf similarity index 100% rename from common/appscale/common/templates/rsyslog-template.conf rename to system/rsyslog.d/09-appscale.conf diff --git a/system/rsyslog.d/10-appscale-controller.conf b/system/rsyslog.d/10-appscale-controller.conf deleted file mode 100644 index bffe6c418a..0000000000 --- a/system/rsyslog.d/10-appscale-controller.conf +++ /dev/null @@ -1,5 +0,0 @@ -# Log controller output to file -:programname, isequal, "controller" /var/log/appscale/controller.log;APPSCALE - -# The following is to prevent further processing. -& stop \ No newline at end of file diff --git a/system/rsyslog.d/20-appscale-service.conf b/system/rsyslog.d/20-appscale-service.conf new file mode 100644 index 0000000000..8444c40183 --- /dev/null +++ b/system/rsyslog.d/20-appscale-service.conf @@ -0,0 +1,61 @@ +# Output AppScale service logs to file + +template(name="APPSCALE_LOGNAME" type="list") { + constant(value="/var/log/appscale/") + property(name="programname" position.from="10") + constant(value=".log") +} + +template(name="APPSCALE_INSTANCE_LOGNAME" type="list") { + constant(value="/var/log/appscale/app___") + property(name="programname" position.from="23") + constant(value=".log") +} + +:programname, isequal, "appscale-admin" /var/log/appscale/admin_server.log;APPSCALE +& stop + +:programname, startswith, "appscale-api-server-" -?APPSCALE_LOGNAME;APPSCALE +& stop + +:programname, isequal, "appscale-instance-manager" /var/log/appscale/appmanagerserver.log;APPSCALE +& stop + +:programname, startswith, "appscale-instance-run-" -?APPSCALE_INSTANCE_LOGNAME;APPSCALE +& stop + +:programname, isequal, "appscale-blobstore" /var/log/appscale/blobstore.log;APPSCALE +& stop + +:programname, startswith, "appscale-celery-" -?APPSCALE_LOGNAME;APPSCALE +& stop + +:programname, isequal, "appscale-controller" /var/log/appscale/controller.log;APPSCALE +& stop + +:programname, isequal, "appscale-flower" /var/log/appscale/flower.log;APPSCALE +& stop + +:programname, isequal, "appscale-groomer" /var/log/appscale/groomer_service.log;APPSCALE +& stop + +:programname, isequal, "appscale-hermes" /var/log/appscale/hermes.log;APPSCALE +& stop + +:programname, isequal, "appscale-infrastructure" /var/log/appscale/iaas_manager.log;APPSCALE +& stop + +:programname, isequal, "appscale-logserver" /var/log/appscale/log_service.log;APPSCALE +& stop + +:programname, isequal, "appscale-memcached" /var/log/appscale/memcached.log;APPSCALE +& stop + +:programname, startswith, "appscale-taskqueue-" -?APPSCALE_LOGNAME;APPSCALE +& stop + +:programname, isequal, "appscale-transaction-groomer" /var/log/appscale/transaction_groomer.log;APPSCALE +& stop + +:programname, isequal, "appscale-uaserver" /var/log/appscale/uaserver.log;APPSCALE +& stop From f2cca0233e1a65ccd9263ed406ffa559a93d83fd Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:31:49 -0700 Subject: [PATCH 097/221] Updates for systemd, build and install updates --- bootstrap.sh | 18 --------- debian/appscale_build.sh | 19 ---------- debian/appscale_install.sh | 1 - debian/appscale_install_functions.sh | 55 ++++++++-------------------- debian/control.bionic | 1 - debian/control.stretch | 1 - debian/control.xenial | 1 - 7 files changed, 16 insertions(+), 80 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 1020c0be2b..e408ae36e9 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -269,14 +269,9 @@ if [ -d /etc/appscale/certs ]; then fi # Make sure AppScale is not running. - MONIT=$(which monit) if systemctl is-active appscale-controller > /dev/null ; then echo "AppScale is still running: please stop it" [ "${FORCE_UPGRADE}" = "Y" ] || exit 1 - elif echo $MONIT | grep local > /dev/null ; then - # AppScale is not running but there is a monit - # leftover from the custom install. - $MONIT quit fi # Let's keep a copy of the old config: we need to move it to avoid @@ -290,19 +285,6 @@ if [ -d /etc/appscale/certs ]; then [ ! -f "/etc/init.d/appscale-progenitor" ] || rm -fv "/etc/init.d/appscale-progenitor" [ ! -f "/etc/init.d/appscale-unmonit" ] || rm -fv "/etc/init.d/appscale-unmonit" - # Remove control files we added before 1.14, and re-add the - # default ones. - if [ $APPSCALE_MAJOR -le 1 -a $APPSCALE_MINOR -le 14 ]; then - rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc - if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install haproxy - fi - if dpkg-query -l monit > /dev/null 2> /dev/null ; then - apt-get -o DPkg::Options::="--force-confmiss" --reinstall install monit - fi - fi - - if [ "${UPDATE_REPO}" = "Y" ]; then echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR."\ "An upgrade to the latest version available will be"\ diff --git a/debian/appscale_build.sh b/debian/appscale_build.sh index 4f8137d786..dbad204304 100755 --- a/debian/appscale_build.sh +++ b/debian/appscale_build.sh @@ -121,14 +121,9 @@ if [ -d ${CONFIG_DIR}/certs ]; then echo echo "Found AppScale version $APPSCALE_MAJOR.$APPSCALE_MINOR: upgrading it." # Make sure AppScale is not running. - MONIT=$(which monit) if systemctl is-active appscale-controller > /dev/null ; then echo "AppScale is still running: please stop it" [ "$FORCE_UPGRADE" = "Y" ] || exit 1 - elif echo $MONIT |grep local > /dev/null ; then - # AppScale is not running but there is a monit - # leftover from the custom install. - $MONIT quit fi # This sleep is to allow the user to Ctrl-C in case an upgrade is @@ -146,20 +141,6 @@ if [ -d ${CONFIG_DIR}/certs ]; then [ ! -f "/etc/init.d/appscale-controller" ] || rm -fv "/etc/init.d/appscale-controller" [ ! -f "/etc/init.d/appscale-progenitor" ] || rm -fv "/etc/init.d/appscale-progenitor" [ ! -f "/etc/init.d/appscale-unmonit" ] || rm -fv "/etc/init.d/appscale-unmonit" - - # Remove control files we added before 1.14, and re-add the - # default ones. - if [ $APPSCALE_MAJOR -le 1 -a $APPSCALE_MINOR -le 14 ]; then - rm -f /etc/default/haproxy /etc/init.d/haproxy /etc/default/monit /etc/monitrc - if dpkg-query -l haproxy > /dev/null 2> /dev/null ; then - ${PKG_CMD} -o DPkg::Options::="--force-confmiss" --reinstall install haproxy - fi - if dpkg-query -l monit > /dev/null 2> /dev/null ; then - ${PKG_CMD} -o DPkg::Options::="--force-confmiss" --reinstall install monit - fi - fi - - # In version past 2.3.1 we are incompatible with ruby1.8. fi if [ $1 ]; then diff --git a/debian/appscale_install.sh b/debian/appscale_install.sh index 87fc016156..3552ec3402 100755 --- a/debian/appscale_install.sh +++ b/debian/appscale_install.sh @@ -53,7 +53,6 @@ case "$1" in installsolr7 installservice postinstallservice - postinstallmonit postinstallejabberd setulimits increaseconnections diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index cd0764b8ef..0e3ffcdaea 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -127,11 +127,6 @@ EOF # On distros with systemd, the open file limit must be adjusted for each # service. if which systemctl > /dev/null && [ "${IN_DOCKER}" != "yes" ]; then - mkdir -p /etc/systemd/system/monit.service.d - cat < /etc/systemd/system/monit.service.d/override.conf -[Service] -LimitNOFILE=200000 -EOF mkdir -p /etc/systemd/system/nginx.service.d cat < /etc/systemd/system/nginx.service.d/override.conf [Service] @@ -389,11 +384,20 @@ installservice() cp -v ${APPSCALE_HOME_RUNTIME}/system/tmpfiles.d/appscale.conf ${DESTDIR}/usr/lib/tmpfiles.d/ systemd-tmpfiles --create - mkdir -pv ${DESTDIR}/etc/rsyslog.d - cp -v ${APPSCALE_HOME_RUNTIME}/system/rsyslog.d/10-appscale-controller.conf ${DESTDIR}/etc/rsyslog.d/ - mkdir -pv ${DESTDIR}/lib/systemd/system - cp -v ${APPSCALE_HOME_RUNTIME}/system/units/appscale-controller.service ${DESTDIR}/lib/systemd/system/ + cp -v ${APPSCALE_HOME_RUNTIME}/system/units/appscale*.service ${DESTDIR}/lib/systemd/system/ + cp -v ${APPSCALE_HOME_RUNTIME}/system/units/appscale*.target ${DESTDIR}/lib/systemd/system/ + cp -rv ${APPSCALE_HOME_RUNTIME}/system/units.d/*.d ${DESTDIR}/lib/systemd/system/ + + SYSTEMD_VERSION=$(systemctl --version | grep '^systemd ' | grep -o '[[:digit:]]*') + if [ ${SYSTEMD_VERSION} -lt 239 ] ; then + echo "Linking appscale common systemd drop-in" + for APPSCALE_SYSTEMD_SERVICE in ${DESTDIR}/lib/systemd/system/appscale-*.service; do + mkdir "${APPSCALE_SYSTEMD_SERVICE}.d" + ln -t "${APPSCALE_SYSTEMD_SERVICE}.d" ${DESTDIR}/lib/systemd/system/appscale-.d/10-appscale-common.conf + done + fi + systemctl daemon-reload # Enable AppController on system reboots. @@ -486,41 +490,14 @@ postinstallrsyslog() sed -i 's/#module(load="imtcp")/module(load="imtcp")/' /etc/rsyslog.conf sed -i 's/#input(type="imtcp" port="514")/input(type="imtcp" port="514")/' /etc/rsyslog.conf - # Set up template for formatting combined application log messages. - cp ${APPSCALE_HOME}/common/appscale/common/templates/rsyslog-template.conf\ - /etc/rsyslog.d/09-appscale.conf + # Install rsyslog drop-ins + mkdir -pv ${DESTDIR}/etc/rsyslog.d + cp -v ${APPSCALE_HOME}/system/rsyslog.d/*.conf ${DESTDIR}/etc/rsyslog.d/ # Restart the service systemctl restart rsyslog || true } -postinstallmonit() -{ - # We need to have http connection enabled to talk to monit. - if ! grep -v '^#' /etc/monit/monitrc |grep httpd > /dev/null; then - cat < Date: Mon, 9 Sep 2019 17:31:59 -0700 Subject: [PATCH 098/221] Updates for systemd, update logrotate for rsyslog use --- .../common/templates/appscale-logrotate.conf | 21 --------- debian/appscale_install_functions.sh | 3 +- system/logrotate.d/appscale | 43 +++++++++++++++++++ 3 files changed, 44 insertions(+), 23 deletions(-) delete mode 100644 common/appscale/common/templates/appscale-logrotate.conf create mode 100644 system/logrotate.d/appscale diff --git a/common/appscale/common/templates/appscale-logrotate.conf b/common/appscale/common/templates/appscale-logrotate.conf deleted file mode 100644 index 01e80b6017..0000000000 --- a/common/appscale/common/templates/appscale-logrotate.conf +++ /dev/null @@ -1,21 +0,0 @@ -su root syslog - -/var/log/appscale/appmanager*.log { - size 10M - missingok - rotate 7 - compress - delaycompress - notifempty - copytruncate -} - -/var/log/appscale/[!a]*.log /var/log/appscale/*/*.log { - size 10M - missingok - rotate 7 - compress - delaycompress - notifempty - copytruncate -} diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 0e3ffcdaea..dde08cb413 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -187,8 +187,7 @@ EOF # This puts in place the logrotate rules. if [ -d /etc/logrotate.d/ ]; then - cp ${APPSCALE_HOME}/common/appscale/common/templates/appscale-logrotate.conf \ - /etc/logrotate.d/appscale + cp -v ${APPSCALE_HOME}/system/logrotate.d/* /etc/logrotate.d/ fi # Logrotate AppScale logs hourly. diff --git a/system/logrotate.d/appscale b/system/logrotate.d/appscale new file mode 100644 index 0000000000..ea08b6fc77 --- /dev/null +++ b/system/logrotate.d/appscale @@ -0,0 +1,43 @@ +su root syslog + +# rsyslog logs +/var/log/appscale/admin_server.log +/var/log/appscale/api-server-*.log +/var/log/appscale/app___*.log +/var/log/appscale/appmanagerserver.log +/var/log/appscale/blobstore.log +/var/log/appscale/celery-*.log +/var/log/appscale/controller.log +/var/log/appscale/flower.log +/var/log/appscale/groomer_service.log +/var/log/appscale/hermes.log +/var/log/appscale/iaas_manager.log +/var/log/appscale/log_service.log +/var/log/appscale/memcached.log +/var/log/appscale/taskqueue-*.log +/var/log/appscale/transaction_groomer.log +/var/log/appscale/uaserver.log +{ + size 10M + missingok + rotate 7 + compress + delaycompress + notifempty + postrotate + /usr/lib/rsyslog/rsyslog-rotate + endscript +} + +# other logs (uses copytruncate) +/var/log/appscale/datastore_server-*.log +/var/log/appscale/*/*.log +{ + size 10M + missingok + rotate 7 + compress + delaycompress + notifempty + copytruncate +} From b95bac266fe55542c860865a1b72b4c39090418c Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:32:09 -0700 Subject: [PATCH 099/221] Updates for systemd, update cassandra config script comment --- scripts/setup_cassandra_config_files.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setup_cassandra_config_files.py b/scripts/setup_cassandra_config_files.py index add05b9156..2eacdb3b19 100755 --- a/scripts/setup_cassandra_config_files.py +++ b/scripts/setup_cassandra_config_files.py @@ -20,7 +20,7 @@ if __name__ == "__main__": parser = argparse.ArgumentParser( - description="Creates Cassandra's Monit configuration files") + description="Creates Cassandra's configuration files") parser.add_argument('--local-ip', required=True, help='The private IP address of this machine.') parser.add_argument('--master-ip', required=True, From 47aba824a08919dc93b5a0b01e68c8cf03786c16 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:32:20 -0700 Subject: [PATCH 100/221] Updates for systemd, remove unused cassandra monit name --- AppDB/appscale/datastore/cassandra_env/cassandra_interface.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py b/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py index a7de3295f5..e8e0a170dc 100644 --- a/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py +++ b/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py @@ -57,9 +57,6 @@ # The keyspace used for all tables KEYSPACE = "Keyspace1" -# Cassandra watch name. -CASSANDRA_MONIT_WATCH_NAME = "cassandra" - # The number of times to retry connecting to Cassandra. INITIAL_CONNECT_RETRIES = 20 From 1edc49eefc581550acc47adafecf2997d744fb7b Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:32:30 -0700 Subject: [PATCH 101/221] Updates for systemd, install wrapper override drop-in for haproxy pre 1.8 --- debian/appscale_install_functions.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index dde08cb413..828312a63b 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -260,6 +260,18 @@ postinstallhaproxy() # AppScale starts/stop the service. systemctl stop haproxy systemctl disable haproxy + + # Pre 1.8 uses wrapper with systemd + if [ -f "/usr/sbin/haproxy-systemd-wrapper" ] ; then + HAPROXY_UNITD_DIR="${DESTDIR}/lib/systemd/system/appscale-haproxy.service.d" + [ -d "${HAPROXY_UNITD_DIR}" ] || mkdir -p "${HAPROXY_UNITD_DIR}" + cat <<"EOF" > "${DESTDIR}/lib/systemd/system/appscale-haproxy.service.d/10-appscale-haproxy.conf" +[Service] +Type=simple +ExecStart= +ExecStart=/usr/sbin/haproxy-systemd-wrapper -f ${CONFIG} -p /run/appscale/service-haproxy.pid $EXTRAOPTS +EOF + fi } installgems() @@ -392,7 +404,7 @@ installservice() if [ ${SYSTEMD_VERSION} -lt 239 ] ; then echo "Linking appscale common systemd drop-in" for APPSCALE_SYSTEMD_SERVICE in ${DESTDIR}/lib/systemd/system/appscale-*.service; do - mkdir "${APPSCALE_SYSTEMD_SERVICE}.d" + [ -d "${APPSCALE_SYSTEMD_SERVICE}.d" ] || mkdir "${APPSCALE_SYSTEMD_SERVICE}.d" ln -t "${APPSCALE_SYSTEMD_SERVICE}.d" ${DESTDIR}/lib/systemd/system/appscale-.d/10-appscale-common.conf done fi From 281325ac318831129d076037fa731e1d7b0abd03 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:32:40 -0700 Subject: [PATCH 102/221] Updates for systemd, install unit.d cleanup and services disable --- debian/appscale_install_functions.sh | 36 ++++++++++++---------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 828312a63b..78c0d87e3b 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -123,17 +123,6 @@ root soft nofile 200000 * soft nofile 200000 * - nproc 32768 EOF - - # On distros with systemd, the open file limit must be adjusted for each - # service. - if which systemctl > /dev/null && [ "${IN_DOCKER}" != "yes" ]; then - mkdir -p /etc/systemd/system/nginx.service.d - cat < /etc/systemd/system/nginx.service.d/override.conf -[Service] -LimitNOFILE=200000 -EOF - systemctl daemon-reload - fi } installappscaleprofile() @@ -258,8 +247,8 @@ postinstallhaproxy() sed -i 's/^ENABLED=0/ENABLED=1/g' /etc/default/haproxy # AppScale starts/stop the service. - systemctl stop haproxy - systemctl disable haproxy + systemctl stop haproxy || true + systemctl disable haproxy || true # Pre 1.8 uses wrapper with systemd if [ -f "/usr/sbin/haproxy-systemd-wrapper" ] ; then @@ -312,6 +301,8 @@ installgems() postinstallnginx() { + systemctl stop nginx || true + systemctl disable nginx || true rm -fv /etc/nginx/sites-enabled/default } @@ -417,12 +408,15 @@ installservice() postinstallservice() { - # Stop services shouldn't run at boot, then disable them. - systemctl stop memcached - systemctl disable memcached - + # Stop/disable services that shouldn't run at boot ejabberdctl stop || true - systemctl disable ejabberd + systemctl disable ejabberd || true + systemctl stop memcached || true + systemctl disable memcached || true + systemctl stop nginx || true + systemctl disable nginx || true + systemctl stop zookeeper || true + systemctl disable zookeeper || true } installzookeeper() @@ -444,8 +438,8 @@ installurllib3() postinstallzookeeper() { - systemctl stop zookeeper - systemctl disable zookeeper + systemctl stop zookeeper || true + systemctl disable zookeeper || true if [ ! -d /etc/zookeeper/conf ]; then echo "Cannot find zookeeper configuration!" exit 1 @@ -474,7 +468,7 @@ postinstallrabbitmq() # After install it starts up, shut it down. rabbitmqctl stop || true - systemctl disable rabbitmq-server + systemctl disable rabbitmq-server || true } installVersion() From 46b354373e1048aa63212e6bae6d9200db7a128e Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:32:50 -0700 Subject: [PATCH 103/221] Search2 ensure running script fails when directory exists --- .../solr-management/ensure_solr_running.sh | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/SearchService2/solr-management/ensure_solr_running.sh b/SearchService2/solr-management/ensure_solr_running.sh index 6554e0d886..7f489056bf 100755 --- a/SearchService2/solr-management/ensure_solr_running.sh +++ b/SearchService2/solr-management/ensure_solr_running.sh @@ -8,7 +8,7 @@ SOLR_MANAGEMENT_DIR="$( realpath --strip "$( dirname "${BASH_SOURCE[0]}" )" )" # Check if Solr is installed VERSION=7.6.0 -if ! ${SOLR_EXTRACT_DIR}/solr/bin/solr -version | grep "${VERSION}" +if ! ${SOLR_EXTRACT_DIR}/solr/bin/solr -version | grep "${VERSION}" >/dev/null then echo "Can not start Solr ${VERSION} as it's not installed." exit 1 @@ -28,7 +28,7 @@ ZK_HOST="${ZK_HOST}${SOLR_ZK_ROOT}" PRIVATE_IP=$(cat /etc/appscale/my_private_ip) solr_zk="${SOLR_EXTRACT_DIR}/solr/bin/solr zk" -if ${solr_zk} ls ${SOLR_ZK_ROOT} -z "${FIRST_ZK}" +if ${solr_zk} ls ${SOLR_ZK_ROOT} -z "${FIRST_ZK}" >/dev/null then echo "Zookeeper root is already created." else @@ -49,10 +49,10 @@ SOLR_MEM_LOW=$(echo "$SOLR_MEM_MAX" | awk '{ printf "%d", $1 * 0.70 }') # Slow process down when usage is higher. SOLR_MEM_HIGH=$(echo "$SOLR_MEM_MAX" | awk '{ printf "%d", $1 * 0.90 }') -mkdir /var/solr7 -sudo chown solr:solr /var/solr7 +mkdir -p /var/solr7 +chown solr:solr /var/solr7 mkdir -p /var/log/appscale/solr -sudo chown -R solr:solr /var/log/appscale/solr +chown -R solr:solr /var/log/appscale/solr export SOLR_HEAP="${SOLR_MEM_HIGH}m" export MEMORY_LOW="${SOLR_MEM_LOW}M" @@ -70,17 +70,17 @@ then echo "/etc/default/solr.in.sh has no changes." echo "/etc/systemd/system/solr.service has no changes." echo "Making sure Solr is running." - sudo systemctl enable solr - sudo systemctl start solr + systemctl enable solr + systemctl start solr else echo "Copying new solr.in.sh to /etc/default/solr.in.sh" - sudo cp "/tmp/solr.in.sh" "/etc/default/solr.in.sh" + cp "/tmp/solr.in.sh" "/etc/default/solr.in.sh" echo "Copying new solr.service to /etc/systemd/system/solr.service" - sudo cp "/tmp/solr.service" "/etc/systemd/system/solr.service" + cp "/tmp/solr.service" "/etc/systemd/system/solr.service" echo "Making sure Solr is restarted." - sudo systemctl daemon-reload - sudo systemctl enable solr - sudo systemctl restart solr + systemctl daemon-reload + systemctl enable solr + systemctl restart solr fi echo "Making sure appscale-specific config set is uploaded to zookeeper." From 9f75b6569003a62e0335fe4e8cfde41fea6c5efb Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 9 Sep 2019 17:33:10 -0700 Subject: [PATCH 104/221] Updates for systemd, hack to pass broken appscale tools reboot test --- system/units.d/nginx.service.d/10-appscale-nginx.conf | 3 +++ system/units/appscale-admin.service | 1 + system/units/appscale-api-server@.service | 1 + system/units/appscale-instance-run@.service | 1 + 4 files changed, 6 insertions(+) diff --git a/system/units.d/nginx.service.d/10-appscale-nginx.conf b/system/units.d/nginx.service.d/10-appscale-nginx.conf index e9cce46bb1..a769bb6680 100644 --- a/system/units.d/nginx.service.d/10-appscale-nginx.conf +++ b/system/units.d/nginx.service.d/10-appscale-nginx.conf @@ -5,6 +5,9 @@ PartOf=appscale-nginx.target ReloadPropagatedFrom=appscale-nginx.target [Service] +ExecStop= +ExecStop=/bin/sleep 5 +ExecStop=-/sbin/start-stop-daemon --quiet --stop --retry QUIT/5 --pidfile /run/nginx.pid LimitNOFILE=200000 RestartSec=10 Restart=on-failure \ No newline at end of file diff --git a/system/units/appscale-admin.service b/system/units/appscale-admin.service index 7d523bc863..c952e82de2 100644 --- a/system/units/appscale-admin.service +++ b/system/units/appscale-admin.service @@ -6,6 +6,7 @@ PartOf=appscale-control.target [Service] Environment=APPSCALE_ADMIN_OPTION_PORT=17442 ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-admin serve -p ${APPSCALE_ADMIN_OPTION_PORT} $APPSCALE_OPTION_VERBOSE +ExecStop=/bin/sleep 5 SyslogIdentifier=%p [Install] diff --git a/system/units/appscale-api-server@.service b/system/units/appscale-api-server@.service index 29b2393a59..b623a73cee 100644 --- a/system/units/appscale-api-server@.service +++ b/system/units/appscale-api-server@.service @@ -5,6 +5,7 @@ AssertPathExists=/run/appscale/apps/api_command_%i [Service] ExecStart=/bin/bash /run/appscale/apps/api_command_%i +ExecStop=/bin/sleep 5 SyslogIdentifier=%p-%i # Resource controls MemoryLimit=400M diff --git a/system/units/appscale-instance-run@.service b/system/units/appscale-instance-run@.service index f5e90480dc..8ede5e9eff 100644 --- a/system/units/appscale-instance-run@.service +++ b/system/units/appscale-instance-run@.service @@ -7,6 +7,7 @@ AssertPathExists=/run/appscale/apps/command_%i [Service] ExecStart=/bin/bash /run/appscale/apps/command_%i SyslogIdentifier=%p-%i +ExecStop=/bin/sleep 5 # Resource controls MemoryLimit=2G From 1e6ca4372f6ea15401f9c9cc01debd64d522c371 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 10 Sep 2019 18:34:59 -0700 Subject: [PATCH 105/221] Rework stats module to support all stat kinds --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 31 +- AppDB/appscale/datastore/fdb/indexes.py | 20 +- AppDB/appscale/datastore/fdb/stats.py | 795 ------------------ .../appscale/datastore/fdb/stats/__init__.py | 0 AppDB/appscale/datastore/fdb/stats/buffer.py | 169 ++++ .../datastore/fdb/stats/containers.py | 419 +++++++++ .../appscale/datastore/fdb/stats/entities.py | 477 +++++++++++ AppDB/appscale/datastore/fdb/utils.py | 10 + AppDB/setup.py | 1 + 9 files changed, 1103 insertions(+), 819 deletions(-) delete mode 100644 AppDB/appscale/datastore/fdb/stats.py create mode 100644 AppDB/appscale/datastore/fdb/stats/__init__.py create mode 100644 AppDB/appscale/datastore/fdb/stats/buffer.py create mode 100644 AppDB/appscale/datastore/fdb/stats/containers.py create mode 100644 AppDB/appscale/datastore/fdb/stats/entities.py diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index b34e42e5b2..466a20994f 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -27,7 +27,7 @@ from appscale.datastore.fdb.gc import GarbageCollector from appscale.datastore.fdb.indexes import ( get_order_info, IndexManager, KEY_PROP) -from appscale.datastore.fdb.stats import StatsBuffer, StatsSummary +from appscale.datastore.fdb.stats.buffer import StatsBuffer from appscale.datastore.fdb.transactions import TransactionManager from appscale.datastore.fdb.utils import ( ABSENT_VERSION, fdb, FDBErrorCodes, next_entity_version, DS_ROOT, @@ -123,9 +123,11 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) - for old_entry, _, stats in writes if stats is not None] - IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + mutations = [(old_entry, new_entry, index_stats) + for old_entry, new_entry, index_stats in writes + if index_stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, + mutations) for _, new_entry, _ in writes: put_response.add_key().CopyFrom(new_entry.key) @@ -214,9 +216,10 @@ def dynamic_delete(self, project_id, delete_request, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) - for old_entry, _, stats in deletes if stats is not None] - IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + mutations = [(old_entry, None, stats) for old_entry, _, stats in deletes + if stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, + mutations) # TODO: Once the Cassandra backend is removed, populate a delete response. for old_entry, new_version, _ in deletes: @@ -411,16 +414,16 @@ def _upsert(self, tr, entity, old_entry_future=None): encoded_entity = entity.Encode() yield self._data_manager.put( tr, entity.key(), new_version, encoded_entity) - stats = yield self.index_manager.put_entries(tr, old_entry, entity) + index_stats = yield self.index_manager.put_entries(tr, old_entry, entity) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) - stats -= StatsSummary.from_entity(old_entry.encoded) new_entry = VersionEntry.from_key(entity.key()) + new_entry._encoded_entity = encoded_entity + new_entry._decoded_entity = entity new_entry.version = new_version - stats += StatsSummary.from_entity(encoded_entity) - raise gen.Return((old_entry, new_entry, stats)) + raise gen.Return((old_entry, new_entry, index_stats)) @gen.coroutine def _delete(self, tr, key, old_entry_future=None): @@ -434,13 +437,13 @@ def _delete(self, tr, key, old_entry_future=None): new_version = next_entity_version(old_entry.version) yield self._data_manager.put(tr, key, new_version, b'') - stats = yield self.index_manager.put_entries(tr, old_entry, new_entity=None) + index_stats = yield self.index_manager.put_entries( + tr, old_entry, new_entity=None) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) - stats -= StatsSummary.from_entity(old_entry.encoded) - raise gen.Return((old_entry, new_version, stats)) + raise gen.Return((old_entry, new_version, index_stats)) @gen.coroutine def _apply_mutations(self, tr, project_id, queried_groups, mutations, diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 9f48fb402b..eb50385053 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -17,7 +17,7 @@ from appscale.datastore.fdb.codecs import ( decode_str, decode_value, encode_value, encode_versionstamp_index, Path) from appscale.datastore.fdb.sdk import FindIndexToUse, ListCursor -from appscale.datastore.fdb.stats import StatsSummary +from appscale.datastore.fdb.stats.containers import IndexStatsSummary from appscale.datastore.fdb.utils import ( format_prop_val, DS_ROOT, fdb, get_scatter_val, MAX_FDB_TX_DURATION, ResultIterator, SCATTER_PROP, VERSIONSTAMP_SIZE) @@ -1008,7 +1008,7 @@ def __init__(self, db, tornado_fdb, data_manager, directory_cache): @gen.coroutine def put_entries(self, tr, old_version_entry, new_entity): - old_key_stats = StatsSummary() + old_key_stats = IndexStatsSummary() if old_version_entry.has_entity: old_keys, old_key_stats = yield self._get_index_keys( tr, old_version_entry.decoded, old_version_entry.commit_versionstamp) @@ -1017,7 +1017,7 @@ def put_entries(self, tr, old_version_entry, new_entity): tr.set_versionstamped_value( key, b'\x00' * VERSIONSTAMP_SIZE + encode_versionstamp_index(0)) - new_key_stats = StatsSummary() + new_key_stats = IndexStatsSummary() if new_entity is not None: new_keys, new_key_stats = yield self._get_index_keys( tr, new_entity) @@ -1166,17 +1166,17 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): path = Path.flatten(entity.key().path()) kind = path[-2] - stats = StatsSummary() + stats = IndexStatsSummary() kindless_index = yield self._kindless_index(tr, project_id, namespace) kind_index = yield self._kind_index(tr, project_id, namespace, kind) composite_indexes = yield self._get_indexes( tr, project_id, namespace, kind) - kindless_keys = kindless_index.encode_key(path, commit_versionstamp) - kind_keys = kind_index.encode_key(path, commit_versionstamp) - stats.add_kindless_keys(kindless_keys) - stats.add_kind_keys(kindless_keys) - all_keys = [kindless_keys, kind_keys] + kindless_key = kindless_index.encode_key(path, commit_versionstamp) + kind_key = kind_index.encode_key(path, commit_versionstamp) + stats.add_kindless_key(kindless_key) + stats.add_kind_key(kind_key) + all_keys = [kindless_key, kind_key] entity_prop_names = [] for prop in entity.property_list(): prop_name = decode_str(prop.name()) @@ -1201,7 +1201,7 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): composite_keys = index.encode_keys(entity.property_list(), path, commit_versionstamp) - stats.add_composite_keys(composite_keys) + stats.add_composite_keys(index.id, composite_keys) all_keys.extend(composite_keys) raise gen.Return((all_keys, stats)) diff --git a/AppDB/appscale/datastore/fdb/stats.py b/AppDB/appscale/datastore/fdb/stats.py deleted file mode 100644 index 1caa582665..0000000000 --- a/AppDB/appscale/datastore/fdb/stats.py +++ /dev/null @@ -1,795 +0,0 @@ -import datetime -import logging -import random -import struct -import sys -import time -from collections import defaultdict - -import six -from tornado import gen -from tornado.ioloop import IOLoop -from tornado.locks import Lock as AsyncLock - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore.fdb.codecs import decode_str -from appscale.datastore.fdb.polling_lock import PollingLock -from appscale.datastore.fdb.utils import fdb, ResultIterator - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.api import datastore -from google.appengine.datastore.entity_pb import Property as Meaning - -logger = logging.getLogger(__name__) - - -class StatsPropTypes(object): - STRING = 0x01 - BOOLEAN = 0x02 - INTEGER = 0x03 - NULL = 0x04 - FLOAT = 0x05 - KEY = 0x06 - BLOB = 0x07 - EMBEDDED_ENTITY = 0x08 - SHORT_BLOB = 0x09 - TEXT = 0x0A - USER = 0x0B - CATEGORY = 0x0C - LINK = 0x0D - EMAIL = 0x0E - DATE_TIME = 0x0F - GEO_PT = 0x10 - IM = 0x11 - PHONE_NUMBER = 0x12 - POSTAL_ADDRESS = 0x13 - RATING = 0x14 - BLOB_KEY = 0x15 - - MEANING_TYPES = { - Meaning.GD_WHEN: DATE_TIME, - Meaning.ATOM_CATEGORY: CATEGORY, - Meaning.ATOM_LINK: LINK, - Meaning.GD_EMAIL: EMAIL, - Meaning.GD_IM: IM, - Meaning.GD_PHONENUMBER: PHONE_NUMBER, - Meaning.GD_POSTALADDRESS: POSTAL_ADDRESS, - Meaning.GD_RATING: RATING, - Meaning.BLOB: BLOB, - Meaning.ENTITY_PROTO: EMBEDDED_ENTITY, - Meaning.BYTESTRING: SHORT_BLOB, - Meaning.TEXT: TEXT, - Meaning.BLOBKEY: BLOB_KEY - } - - VALUE_TYPES = { - 'string': STRING, - 'int64': INTEGER, - 'boolean': BOOLEAN, - 'double': FLOAT, - 'reference': KEY, - 'point': GEO_PT, - 'user': USER - } - - NAMES = { - STRING: u'String', - BOOLEAN: u'Boolean', - INTEGER: u'Integer', - NULL: u'NULL', - FLOAT: u'Float', - KEY: u'Key', - BLOB: u'Blob', - EMBEDDED_ENTITY: u'EmbeddedEntity', - SHORT_BLOB: u'ShortBlob', - TEXT: u'Text', - USER: u'User', - CATEGORY: u'Category', - LINK: u'Link', - EMAIL: u'Email', - DATE_TIME: u'Date/Time', - GEO_PT: u'GeoPt', - IM: u'IM', - PHONE_NUMBER: u'PhoneNumber', - POSTAL_ADDRESS: u'PostalAddress', - RATING: u'Rating', - BLOB_KEY: u'BlobKey' - } - - -def stats_prop_type(prop_pb): - """ Determines the property type for a Property object. - - Args: - prop_pb: An entity_pb.Property object. - - Returns: - A constant from PropertyTypes. - """ - value_type = StatsPropTypes.NULL - for type_name, type_code in six.iteritems(StatsPropTypes.VALUE_TYPES): - if getattr(prop_pb.value(), 'has_{}value'.format(type_name))(): - value_type = type_code - break - - if prop_pb.has_meaning(): - value_type = StatsPropTypes.MEANING_TYPES.get( - prop_pb.meaning(), value_type) - - return value_type - - -def fill_stat_entities(project_id, stats_by_ns_kind_isroot, - entity_bytes_by_prop, timestamp): - stats_by_ns_kind = defaultdict(lambda: defaultdict(StatsSummary)) - for namespace, kinds in six.iteritems(stats_by_ns_kind_isroot): - for kind, (root, non_root) in six.iteritems(kinds): - stats_by_ns_kind[namespace][kind] += root + non_root - - stats_by_namespace = {} - for namespace, kinds in six.iteritems(stats_by_ns_kind): - stats_by_namespace[namespace] = sum(six.itervalues(kinds), StatsSummary()) - - stats_by_kind = defaultdict(StatsSummary) - for namespace, kinds in six.iteritems(stats_by_ns_kind): - for kind, stats in six.iteritems(kinds): - stats_by_kind[kind] += stats - - entities = [] - - # TODO: Cover and test all stat entity types. - total_stats = sum(six.itervalues(stats_by_namespace), StatsSummary()) - entity = datastore.Entity( - '__Stat_Total__', _app=project_id, name='total_entity_usage') - entity['bytes'] = total_stats.total_bytes - entity['count'] = total_stats.entity_count - entity['timestamp'] = timestamp - - entity['entity_bytes'] = total_stats.entity_bytes - entity['builtin_index_bytes'] = total_stats.builtin_bytes - entity['builtin_index_count'] = total_stats.builtin_count - entity['composite_index_bytes'] = total_stats.composite_bytes - entity['composite_index_count'] = total_stats.composite_count - entities.append(entity) - - for namespace, stats in six.iteritems(stats_by_namespace): - if namespace: - entity = datastore.Entity('__Stat_Namespace__', _app=project_id, - name=namespace) - else: - entity = datastore.Entity('__Stat_Namespace__', _app=project_id, id=1) - - entity['bytes'] = stats.total_bytes - entity['count'] = stats.entity_count - entity['timestamp'] = timestamp - - entity['subject_namespace'] = namespace - entity['entity_bytes'] = stats.entity_bytes - entity['builtin_index_bytes'] = stats.builtin_bytes - entity['builtin_index_count'] = stats.builtin_count - entity['composite_index_bytes'] = stats.composite_bytes - entity['composite_index_count'] = stats.composite_count - entities.append(entity) - - for kind, stats in six.iteritems(stats_by_kind): - entity = datastore.Entity('__Stat_Kind__', _app=project_id, name=kind) - entity['bytes'] = stats.total_bytes - entity['count'] = stats.entity_count - entity['timestamp'] = timestamp - - entity['builtin_index_bytes'] = stats.builtin_bytes - entity['builtin_index_count'] = stats.builtin_count - entity['composite_index_bytes'] = stats.composite_bytes - entity['composite_index_count'] = stats.composite_count - entities.append(entity) - - stats_by_kind_root = defaultdict(StatsSummary) - stats_by_kind_nonroot = defaultdict(StatsSummary) - for namespace, kinds in six.iteritems(stats_by_ns_kind_isroot): - for kind, (root, non_root) in six.iteritems(kinds): - stats_by_kind_root[kind] += root - stats_by_kind_nonroot[kind] += non_root - - for kind, stats in six.iteritems(stats_by_kind_root): - entity = datastore.Entity('__Stat_Kind_IsRootEntity__', _app=project_id, - name=kind) - entity['bytes'] = stats.total_bytes - entity['count'] = stats.entity_count - entity['timestamp'] = timestamp - - entity['kind_name'] = kind - entity['entity_bytes'] = stats.entity_bytes - entities.append(entity) - - for kind, stats in six.iteritems(stats_by_kind_nonroot): - entity = datastore.Entity('__Stat_Kind_NotRootEntity__', _app=project_id, - name=kind) - entity['bytes'] = stats.total_bytes - entity['count'] = stats.entity_count - entity['timestamp'] = timestamp - - entity['kind_name'] = kind - entity['entity_bytes'] = stats.entity_bytes - entities.append(entity) - - # entity_bytes, builtin_index_bytes, builtin_index_count - stats_by_prop_type = defaultdict(lambda: [0, 0, 0]) - for namespace, kinds in six.iteritems(entity_bytes_by_prop): - for kind, prop_names in six.iteritems(kinds): - for prop_name, prop_types in six.iteritems(prop_names): - for prop_type, byte_count in six.iteritems(prop_types): - stats_by_prop_type[prop_type][0] += byte_count - - for prop_name, prop_types in six.iteritems(total_stats.prop_bytes): - for prop_type, byte_count in six.iteritems(prop_types): - stats_by_prop_type[prop_type][1] += byte_count - - for prop_name, prop_types in six.iteritems(total_stats.prop_count): - for prop_type, count in six.iteritems(prop_types): - stats_by_prop_type[prop_type][2] += count - - for prop_type, (entity_bytes, builtin_bytes, builtin_count) in \ - six.iteritems(stats_by_prop_type): - entity = datastore.Entity('__Stat_PropertyType__', _app=project_id, - name=StatsPropTypes.NAMES[prop_type]) - entity['bytes'] = entity_bytes + builtin_bytes - entity['count'] = builtin_count - entity['timestamp'] = timestamp - - entity['property_type'] = StatsPropTypes.NAMES[prop_type] - entity['entity_bytes'] = entity_bytes - entity['builtin_index_bytes'] = builtin_bytes - entity['builtin_index_count'] = builtin_count - entities.append(entity) - - # entity_bytes, builtin_index_bytes, builtin_index_count - stats_by_kind_prop_type = defaultdict(lambda: defaultdict(lambda: [0, 0, 0])) - for namespace, kinds in six.iteritems(entity_bytes_by_prop): - for kind, prop_names in six.iteritems(kinds): - for prop_name, prop_types in six.iteritems(prop_names): - for prop_type, byte_count in six.iteritems(prop_types): - stats_by_kind_prop_type[kind][prop_type][0] += byte_count - - for kind, stats in six.iteritems(stats_by_kind): - for prop_name, prop_types in six.iteritems(stats.prop_bytes): - for prop_type, byte_count in six.iteritems(prop_types): - stats_by_kind_prop_type[kind][prop_type][1] += byte_count - - for prop_name, prop_types in six.iteritems(stats.prop_count): - for prop_type, count in six.iteritems(prop_types): - stats_by_kind_prop_type[kind][prop_type][2] += count - - for kind, prop_types in six.iteritems(stats_by_kind_prop_type): - for prop_type, (entity_bytes, builtin_bytes, builtin_count) \ - in six.iteritems(prop_types): - type_name = StatsPropTypes.NAMES[prop_type] - entity = datastore.Entity('__Stat_PropertyType_Kind__', _app=project_id, - name=u'_'.join([type_name, kind])) - entity['bytes'] = entity_bytes + builtin_bytes - entity['count'] = builtin_count - entity['timestamp'] = timestamp - - entity['kind_name'] = kind - entity['entity_bytes'] = entity_bytes - - entity['property_type'] = type_name - entity['builtin_index_bytes'] = builtin_bytes - entity['builtin_index_count'] = builtin_count - entities.append(entity) - - # entity_bytes, builtin_index_bytes, builtin_index_count - stats_by_kind_prop_name = defaultdict(lambda: defaultdict(lambda: [0, 0, 0])) - for namespace, kinds in six.iteritems(entity_bytes_by_prop): - for kind, prop_names in six.iteritems(kinds): - for prop_name, prop_types in six.iteritems(prop_names): - stats_by_kind_prop_name[kind][prop_name][0] += \ - sum(six.itervalues(prop_types)) - - for kind, stats in six.iteritems(stats_by_kind): - for prop_name, prop_types in six.iteritems(stats.prop_bytes): - stats_by_kind_prop_name[kind][prop_name][1] += \ - sum(six.itervalues(prop_types)) - - for prop_name, prop_types in six.iteritems(stats.prop_count): - stats_by_kind_prop_name[kind][prop_name][2] += \ - sum(six.itervalues(prop_types)) - - for kind, prop_types in six.iteritems(stats_by_kind_prop_name): - for prop_name, (entity_bytes, builtin_bytes, builtin_count) \ - in six.iteritems(prop_types): - entity = datastore.Entity('__Stat_PropertyType_Kind__', _app=project_id, - name=u'_'.join([prop_name, kind])) - entity['bytes'] = entity_bytes + builtin_bytes - entity['count'] = builtin_count - entity['timestamp'] = timestamp - - entity['kind_name'] = kind - entity['entity_bytes'] = entity_bytes - - entity['property_name'] = prop_name - entity['builtin_index_bytes'] = builtin_bytes - entity['builtin_index_count'] = builtin_count - entities.append(entity) - - for namespace, kinds in six.iteritems(stats_by_ns_kind): - for kind, stats in six.iteritems(kinds): - entity = datastore.Entity( - '__Stat_Ns_Kind__', _app=project_id, name=kind, namespace=namespace) - entity['bytes'] = stats.total_bytes - entity['count'] = stats.entity_count - entity['timestamp'] = timestamp - - entity['kind_name'] = kind - entity['entity_bytes'] = stats.entity_bytes - - entity['builtin_index_bytes'] = stats.builtin_bytes - entity['builtin_index_count'] = stats.builtin_count - entity['composite_index_bytes'] = stats.composite_bytes - entity['composite_index_count'] = stats.composite_count - entities.append(entity) - - return entities - - -class ProjectStatsDir(object): - """ - A ProjectStatsDir handles the encoding and decoding details for a project's - stats entries. - - The directory path looks like (, 'stats'). - """ - DIR_NAME = u'stats' - - def __init__(self, directory): - self.directory = directory - - def encode_entity_count(self, namespace, kind, is_root, count): - key = self.directory.pack((u'entities', namespace, kind, is_root, u'count')) - return key, self._encode_delta(count) - - def encode_entity_bytes(self, namespace, kind, is_root, byte_count): - key = self.directory.pack((u'entities', namespace, kind, is_root, u'bytes')) - return key, self._encode_delta(byte_count) - - def encode_kindless_count(self, namespace, kind, is_root, count): - key = self.directory.pack((u'kindless', namespace, kind, is_root, u'count')) - return key, self._encode_delta(count) - - def encode_kindless_bytes(self, namespace, kind, is_root, byte_count): - key = self.directory.pack((u'kindless', namespace, kind, is_root, u'bytes')) - return key, self._encode_delta(byte_count) - - def encode_kind_count(self, namespace, kind, is_root, count): - key = self.directory.pack((u'kind', namespace, kind, is_root, u'count')) - return key, self._encode_delta(count) - - def encode_kind_bytes(self, namespace, kind, is_root, byte_count): - key = self.directory.pack((u'kind', namespace, kind, is_root, u'bytes')) - return key, self._encode_delta(byte_count) - - def encode_prop_type_count(self, namespace, kind, is_root, prop_name, - prop_type, count): - key = self.directory.pack((u'prop-type', namespace, kind, is_root, - prop_name, prop_type, u'count')) - return key, self._encode_delta(count) - - def encode_prop_type_bytes(self, namespace, kind, is_root, prop_name, - prop_type, byte_count): - key = self.directory.pack((u'prop-type', namespace, kind, is_root, - prop_name, prop_type, u'bytes')) - return key, self._encode_delta(byte_count) - - def encode_composite_count(self, namespace, kind, is_root, count): - key = self.directory.pack((u'composite', namespace, kind, is_root, u'count')) - return key, self._encode_delta(count) - - def encode_composite_bytes(self, namespace, kind, is_root, byte_count): - key = self.directory.pack((u'composite', namespace, kind, is_root, u'bytes')) - return key, self._encode_delta(byte_count) - - def encode_entity_bytes_by_prop(self, namespace, kind, prop_name, prop_type, - byte_count): - key = self.directory.pack((u'entity-bytes-by-prop', namespace, kind, - prop_name, prop_type)) - return key, self._encode_delta(byte_count) - - def encode_last_versionstamp(self): - return self.directory.pack((u'last-versionstamp',)), b'\x00' * 14 - - def encode_last_timestamp(self): - key = self.directory.pack((u'last-timestamp',)) - value = fdb.tuple.pack((int(time.time()),)) - return key, value - - def decode(self, kvs): - # By namespace/kind/[root, nonroot] - stats_by_ns_kind_isroot = defaultdict( - lambda: defaultdict(lambda: [StatsSummary(), StatsSummary()])) - - # By namespace/kind/prop_name/prop_type - entity_bytes_by_prop = defaultdict( - lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(int)))) - last_timestamp = None - for kv in kvs: - path = self.directory.unpack(kv.key) - section = path[0] - if section == u'last-versionstamp': - continue - - if section == u'last-timestamp': - last_timestamp = datetime.datetime.utcfromtimestamp( - fdb.tuple.unpack(kv.value)[0]) - continue - - namespace = path[1] - kind = path[2] - value = struct.unpack(', 'stats'). + """ + DIR_NAME = u'stats' + + def __init__(self, directory): + self.directory = directory + + def encode_last_versionstamp(self): + return self.directory.pack((u'last-versionstamp',)), b'\x00' * 14 + + def encode_last_timestamp(self): + key = self.directory.pack((u'last-timestamp',)) + value = fdb.tuple.pack((int(time.time()),)) + return key, value + + def decode(self, kvs): + project_stats = ProjectStats() + last_timestamp = None + for kv in kvs: + path = self.directory.unpack(kv.key) + section = path[0] + if section == u'last-versionstamp': + continue + + if section == u'last-timestamp': + last_timestamp = datetime.datetime.utcfromtimestamp( + fdb.tuple.unpack(kv.value)[0]) + continue + + project_stats.update_from_kv(section, path[1:], kv.value) + + return project_stats, last_timestamp + + @classmethod + def directory_path(cls, project_id): + return project_id, cls.DIR_NAME + + +class StatsBuffer(object): + AVG_FLUSH_INTERVAL = 30 + + SUMMARY_INTERVAL = 120 + + _LOCK_KEY = u'stats-lock' + + def __init__(self, db, tornado_fdb, directory_cache, ds_access): + self._db = db + self._tornado_fdb = tornado_fdb + self._directory_cache = directory_cache + self._buffer_lock = AsyncLock() + self._ds_access = ds_access + + summary_lock_key = self._directory_cache.root_dir.pack((self._LOCK_KEY,)) + self._summary_lock = PollingLock( + self._db, self._tornado_fdb, summary_lock_key) + + # By project + self._last_summarized = {} + + # By project + self._buffers = defaultdict(ProjectStats) + + def start(self): + self._summary_lock.start() + IOLoop.current().spawn_callback(self._periodic_flush) + IOLoop.current().spawn_callback(self._periodic_summary) + + @gen.coroutine + def update(self, project_id, mutations): + with (yield self._buffer_lock.acquire()): + for old_entry, new_entry, index_stats in mutations: + self._buffers[project_id].update(old_entry, new_entry, index_stats) + + @gen.coroutine + def _periodic_flush(self): + while True: + try: + yield gen.sleep(random.random() * self.AVG_FLUSH_INTERVAL) + yield self._flush() + except Exception: + # TODO: Exponential backoff here. + logger.exception(u'Unexpected error while flushing stats') + yield gen.sleep(random.random() * 2) + continue + + @gen.coroutine + def _flush(self): + if all(buffer_.empty for buffer_ in six.itervalues(self._buffers)): + return + + with (yield self._buffer_lock.acquire()): + tr = self._db.create_transaction() + for project_id, buffer_ in six.iteritems(self._buffers): + stats_dir = yield self._project_stats_dir(tr, project_id) + buffer_.apply(tr, stats_dir.directory) + + vs_key, vs_value = stats_dir.encode_last_versionstamp() + tr.set_versionstamped_value(vs_key, vs_value) + ts_key, ts_value = stats_dir.encode_last_timestamp() + tr[ts_key] = ts_value + + yield self._tornado_fdb.commit(tr) + logger.debug(u'Finished flushing stats') + self._buffers.clear() + + @gen.coroutine + def _periodic_summary(self): + while True: + try: + yield self._summary_lock.acquire() + tr = self._db.create_transaction() + last_summarized = {} + + # TODO: This can be made async. + project_ids = self._directory_cache.root_dir.list(tr) + + for project_id in project_ids: + stats_dir = yield self._project_stats_dir(tr, project_id) + last_vs_key = stats_dir.encode_last_versionstamp()[0] + last_versionstamp = yield self._tornado_fdb.get(tr, last_vs_key) + if (not last_versionstamp.present() or + last_versionstamp.value == self._last_summarized.get(project_id)): + continue + + last_summarized[project_id] = last_versionstamp.value + results = yield ResultIterator( + tr, self._tornado_fdb, stats_dir.directory.range(), + snapshot=True).list() + project_stats, last_timestamp = stats_dir.decode(results) + entities = fill_entities(project_id, project_stats, last_timestamp) + yield [self._ds_access._upsert(tr, entity) for entity in entities] + + yield self._tornado_fdb.commit(tr) + self._last_summarized = last_summarized + logger.debug(u'Finished summarizing stats') + yield gen.sleep(self.SUMMARY_INTERVAL) + except Exception: + logger.exception(u'Unexpected error while summarizing stats') + yield gen.sleep(random.random() * 20) + + @gen.coroutine + def _project_stats_dir(self, tr, project_id): + path = ProjectStatsDir.directory_path(project_id) + directory = yield self._directory_cache.get(tr, path) + raise gen.Return(ProjectStatsDir(directory)) diff --git a/AppDB/appscale/datastore/fdb/stats/containers.py b/AppDB/appscale/datastore/fdb/stats/containers.py new file mode 100644 index 0000000000..fe492fffb1 --- /dev/null +++ b/AppDB/appscale/datastore/fdb/stats/containers.py @@ -0,0 +1,419 @@ +""" +There are five main stats sections. +- composite-indexes: (namespace, index_id, kind, count/bytes) +- builtin-indexes: (namespace, kind, is_root, count/bytes) +- entities: (namespace, kind, is_root, count/bytes) +- entity-properties: (namespace, kind, prop_type, prop_name, count/bytes) +- index-properties: (namespace, kind, prop_type, prop_name, count/bytes) +""" +import logging +import sys +from collections import defaultdict + +import six + +from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.fdb.codecs import decode_str +from appscale.datastore.fdb.utils import decode_delta, encode_delta + +sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.datastore.entity_pb import Property as Meaning + +logger = logging.getLogger(__name__) + + +class StatsPropTypes(object): + STRING = 0x01 + BOOLEAN = 0x02 + INTEGER = 0x03 + NULL = 0x04 + FLOAT = 0x05 + KEY = 0x06 + BLOB = 0x07 + EMBEDDED_ENTITY = 0x08 + SHORT_BLOB = 0x09 + TEXT = 0x0A + USER = 0x0B + CATEGORY = 0x0C + LINK = 0x0D + EMAIL = 0x0E + DATE_TIME = 0x0F + GEO_PT = 0x10 + IM = 0x11 + PHONE_NUMBER = 0x12 + POSTAL_ADDRESS = 0x13 + RATING = 0x14 + BLOB_KEY = 0x15 + + MEANING_TYPES = { + Meaning.GD_WHEN: DATE_TIME, + Meaning.ATOM_CATEGORY: CATEGORY, + Meaning.ATOM_LINK: LINK, + Meaning.GD_EMAIL: EMAIL, + Meaning.GD_IM: IM, + Meaning.GD_PHONENUMBER: PHONE_NUMBER, + Meaning.GD_POSTALADDRESS: POSTAL_ADDRESS, + Meaning.GD_RATING: RATING, + Meaning.BLOB: BLOB, + Meaning.ENTITY_PROTO: EMBEDDED_ENTITY, + Meaning.BYTESTRING: SHORT_BLOB, + Meaning.TEXT: TEXT, + Meaning.BLOBKEY: BLOB_KEY + } + + VALUE_TYPES = { + 'string': STRING, + 'int64': INTEGER, + 'boolean': BOOLEAN, + 'double': FLOAT, + 'reference': KEY, + 'point': GEO_PT, + 'user': USER + } + + NAMES = { + STRING: u'String', + BOOLEAN: u'Boolean', + INTEGER: u'Integer', + NULL: u'NULL', + FLOAT: u'Float', + KEY: u'Key', + BLOB: u'Blob', + EMBEDDED_ENTITY: u'EmbeddedEntity', + SHORT_BLOB: u'ShortBlob', + TEXT: u'Text', + USER: u'User', + CATEGORY: u'Category', + LINK: u'Link', + EMAIL: u'Email', + DATE_TIME: u'Date/Time', + GEO_PT: u'GeoPt', + IM: u'IM', + PHONE_NUMBER: u'PhoneNumber', + POSTAL_ADDRESS: u'PostalAddress', + RATING: u'Rating', + BLOB_KEY: u'BlobKey' + } + + +def stats_prop_type(prop_pb): + """ Determines the property type for a Property object. + + Args: + prop_pb: An entity_pb.Property object. + + Returns: + A constant from PropertyTypes. + """ + value_type = StatsPropTypes.NULL + for type_name, type_code in six.iteritems(StatsPropTypes.VALUE_TYPES): + if getattr(prop_pb.value(), 'has_{}value'.format(type_name))(): + value_type = type_code + break + + if prop_pb.has_meaning(): + value_type = StatsPropTypes.MEANING_TYPES.get( + prop_pb.meaning(), value_type) + + return value_type + + +class CountBytes(object): + __slots__ = ['count', 'bytes'] + + def __init__(self, count=0, bytes_=0): + self.count = count + self.bytes = bytes_ + + def __repr__(self): + return u'CountBytes({!r}, {!r})'.format(self.count, self.bytes) + + def __add__(self, other): + self.count += other.count + self.bytes += other.bytes + return self + + def __sub__(self, other): + self.count -= other.count + self.bytes -= other.bytes + return self + + +def create_apply_fields(tr, stats_dir): + def apply_fields(prefix, count_bytes): + tr.add(stats_dir.pack(prefix + (u'count',)), + encode_delta(count_bytes.count)) + tr.add(stats_dir.pack(prefix + (u'bytes',)), + encode_delta(count_bytes.bytes)) + + return apply_fields + + +def create_apply_props(entity_stats, namespace, kind): + def apply_props(prop_list, subtract=False): + for prop_pb in prop_list: + prop_type = stats_prop_type(prop_pb) + prop_name = decode_str(prop_pb.name()) + fields = entity_stats[namespace][kind][prop_type][prop_name] + delta = CountBytes(1, len(prop_pb.Encode())) + if subtract: + fields -= delta + else: + fields += delta + + return apply_props + + +class IndexStatsSummary(object): + __slots__ = ['kindless', 'kind', 'single_prop', 'composite'] + + def __init__(self): + self.kindless = CountBytes() + self.kind = CountBytes() + + # By prop_type/prop_name + self.single_prop = defaultdict(lambda: defaultdict(CountBytes)) + + # By index ID + self.composite = defaultdict(CountBytes) + + @property + def builtin(self): + return self.kindless + self.kind + sum( + (sum(six.itervalues(by_name), CountBytes()) + for by_name in six.itervalues(self.single_prop)), CountBytes()) + + def __repr__(self): + return u'IndexStatsSummary({!r}, {!r}, {!r}, {!r})'.format( + self.kindless, self.kind, + {prop_name: dict(prop_types) + for prop_name, prop_types in six.iteritems(self.single_prop)}, + dict(self.composite)) + + def add_kindless_key(self, key): + self.kindless += CountBytes(1, len(key)) + + def add_kind_key(self, key): + self.kind += CountBytes(1, len(key)) + + def add_prop_key(self, prop_pb, key): + prop_type = stats_prop_type(prop_pb) + prop_name = decode_str(prop_pb.name()) + self.single_prop[prop_type][prop_name] += CountBytes(1, len(key)) + + def add_composite_keys(self, index_id, keys): + self.composite[index_id] += CountBytes(len(keys), + sum(len(key) for key in keys)) + + def __sub__(self, other): + self.kindless -= other.kindless + self.kind -= other.kind + for prop_type, by_name in six.iteritems(other.single_prop): + for prop_name, fields in six.iteritems(by_name): + self.single_prop[prop_type][prop_name] -= fields + + for index_id, fields in six.iteritems(other.composite): + self.composite[index_id] -= fields + + return self + + +class CompositeStats(object): + __slots__ = ['stats'] + + SECTION_ID = u'composite-indexes' + + def __init__(self): + # By namespace/(index_id, kind) + self.stats = defaultdict(lambda: defaultdict(CountBytes)) + + @property + def empty(self): + return not self.stats + + def update(self, namespace, kind, index_stats): + for index_id, count_bytes in six.iteritems(index_stats.composite): + self.stats[namespace][(index_id, kind)] += count_bytes + + def update_from_kv(self, path, encoded_value): + namespace, index_id, kind, field = path + value = decode_delta(encoded_value) + setattr(self.stats[namespace][(index_id, kind)], field, value) + + def apply(self, tr, stats_dir): + apply_fields = create_apply_fields(tr, stats_dir) + for namespace, by_index in six.iteritems(self.stats): + for (index_id, kind), fields in six.iteritems(by_index): + apply_fields((self.SECTION_ID, namespace, index_id, kind), fields) + + +class EntityStats(object): + __slots__ = ['builtin_indexes_root', 'builtin_indexes_notroot', + 'entities_root', 'entities_notroot'] + + BUILTINS_SECTION = u'builtin-indexes' + + ENTITY_SECTION = u'entities' + + def __init__(self): + # By namespace/kind + self.builtin_indexes_root = defaultdict(lambda: defaultdict(CountBytes)) + self.builtin_indexes_notroot = defaultdict(lambda: defaultdict(CountBytes)) + self.entities_root = defaultdict(lambda: defaultdict(CountBytes)) + self.entities_notroot = defaultdict(lambda: defaultdict(CountBytes)) + + @property + def empty(self): + return not any((self.builtin_indexes_root, self.builtin_indexes_notroot, + self.entities_root, self.entities_notroot)) + + def update(self, old_entry, new_entry, index_stats): + delta = CountBytes() + if new_entry is not None: + delta.count += 1 + delta.bytes += len(new_entry.encoded) + + if old_entry.present: + delta.count -= 1 + delta.bytes -= len(old_entry.encoded) + + namespace = old_entry.namespace + kind = old_entry.kind + if len(old_entry.path) == 2: + self.builtin_indexes_root[namespace][kind] += index_stats.builtin + self.entities_root[namespace][kind] += delta + else: + self.builtin_indexes_notroot[namespace][kind] += index_stats.builtin + self.entities_notroot[namespace][kind] += delta + + def update_builtins_from_kv(self, path, encoded_value): + namespace, kind, is_root, field = path + value = decode_delta(encoded_value) + if is_root: + setattr(self.builtin_indexes_root[namespace][kind], field, value) + else: + setattr(self.builtin_indexes_notroot[namespace][kind], field, value) + + def update_entities_from_kv(self, path, encoded_value): + namespace, kind, is_root, field = path + value = decode_delta(encoded_value) + if is_root: + setattr(self.entities_root[namespace][kind], field, value) + else: + setattr(self.entities_notroot[namespace][kind], field, value) + + def apply(self, tr, stats_dir): + apply_fields = create_apply_fields(tr, stats_dir) + for namespace, by_kind in six.iteritems(self.builtin_indexes_root): + for kind, fields in six.iteritems(by_kind): + apply_fields((self.BUILTINS_SECTION, namespace, kind, True), fields) + + for namespace, by_kind in six.iteritems(self.builtin_indexes_notroot): + for kind, fields in six.iteritems(by_kind): + apply_fields((self.BUILTINS_SECTION, namespace, kind, False), fields) + + for namespace, by_kind in six.iteritems(self.entities_root): + for kind, fields in six.iteritems(by_kind): + apply_fields((self.ENTITY_SECTION, namespace, kind, True), fields) + + for namespace, by_kind in six.iteritems(self.entities_notroot): + for kind, fields in six.iteritems(by_kind): + apply_fields((self.ENTITY_SECTION, namespace, kind, False), fields) + + +class SinglePropStats(object): + __slots__ = ['entity_stats', 'index_stats'] + + ENTITY_SECTION = u'entity-properties' + + INDEX_SECTION = u'index-properties' + + def __init__(self): + # By namespace/kind/prop_type/prop_name + self.entity_stats = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(CountBytes)))) + self.index_stats = defaultdict( + lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(CountBytes)))) + + @property + def empty(self): + return not any((self.entity_stats, self.index_stats)) + + def update(self, old_entry, new_entry, index_stats): + namespace = old_entry.namespace + kind = old_entry.kind + apply_props = create_apply_props(self.entity_stats, namespace, kind) + if old_entry.present: + apply_props(old_entry.decoded.property_list(), subtract=True) + apply_props(old_entry.decoded.raw_property_list(), subtract=True) + + if new_entry is not None: + apply_props(new_entry.decoded.property_list()) + apply_props(new_entry.decoded.raw_property_list()) + + for prop_type, by_name in six.iteritems(index_stats.single_prop): + for prop_name, fields in six.iteritems(by_name): + self.index_stats[namespace][kind][prop_type][prop_name] += fields + + def update_entity_props_from_kv(self, path, encoded_value): + namespace, kind, prop_type, prop_name, field = path + value = decode_delta(encoded_value) + setattr(self.entity_stats[namespace][kind][prop_type][prop_name], + field, value) + + def update_index_props_from_kv(self, path, encoded_value): + namespace, kind, prop_type, prop_name, field = path + value = decode_delta(encoded_value) + setattr(self.index_stats[namespace][kind][prop_type][prop_name], + field, value) + + def apply(self, tr, stats_dir): + apply_fields = create_apply_fields(tr, stats_dir) + for namespace, by_kind in six.iteritems(self.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + for prop_name, fields in six.iteritems(by_name): + apply_fields((self.ENTITY_SECTION, namespace, kind, prop_type, + prop_name), fields) + + for namespace, by_kind in six.iteritems(self.index_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + for prop_name, fields in six.iteritems(by_name): + apply_fields((self.INDEX_SECTION, namespace, kind, prop_type, + prop_name), fields) + + +class ProjectStats(object): + def __init__(self): + self.composite_stats = CompositeStats() + self.entity_stats = EntityStats() + self.property_stats = SinglePropStats() + + @property + def empty(self): + return all((self.composite_stats.empty, self.entity_stats.empty, + self.property_stats.empty)) + + def update(self, old_entry, new_entry, index_stats): + self.composite_stats.update(old_entry.namespace, old_entry.kind, + index_stats) + self.entity_stats.update(old_entry, new_entry, index_stats) + self.property_stats.update(old_entry, new_entry, index_stats) + + def update_from_kv(self, section, path, encoded_value): + if section == CompositeStats.SECTION_ID: + self.composite_stats.update_from_kv(path, encoded_value) + elif section == EntityStats.BUILTINS_SECTION: + self.entity_stats.update_builtins_from_kv(path, encoded_value) + elif section == EntityStats.ENTITY_SECTION: + self.entity_stats.update_entities_from_kv(path, encoded_value) + elif section == SinglePropStats.ENTITY_SECTION: + self.property_stats.update_entity_props_from_kv(path, encoded_value) + elif section == SinglePropStats.INDEX_SECTION: + self.property_stats.update_index_props_from_kv(path, encoded_value) + + def apply(self, tr, stats_dir): + self.composite_stats.apply(tr, stats_dir) + self.entity_stats.apply(tr, stats_dir) + self.property_stats.apply(tr, stats_dir) diff --git a/AppDB/appscale/datastore/fdb/stats/entities.py b/AppDB/appscale/datastore/fdb/stats/entities.py new file mode 100644 index 0000000000..77a726bfc2 --- /dev/null +++ b/AppDB/appscale/datastore/fdb/stats/entities.py @@ -0,0 +1,477 @@ +""" +Each stat kind is populated from one or more stat sections (which are described +in the containers module). +Ns_Kind_CompositeIndex -> composite-indexes +Kind_CompositeIndex -> composite-indexes +Ns_Kind_IsRootEntity -> entities + builtin-indexes +Ns_Kind_NotRootEntity -> entities + builtin-indexes +Kind_IsRootEntity -> entities + builtin-indexes +Kind_NotRootEntity -> entities + builtin-indexes +Ns_PropertyType_PropertyName_Kind -> entity-properties + index-properties +Ns_PropertyName_Kind -> entity-properties + index-properties +Ns_PropertyType_Kind -> entity-properties + index-properties +PropertyType_PropertyName_Kind -> entity-properties + index-properties +Ns_PropertyType -> entity-properties + index-properties +PropertyName_Kind -> entity-properties + index-properties +PropertyType_Kind -> entity-properties + index-properties +PropertyType -> entity-properties + index-properties +Ns_Kind -> entities + builtin-indexes + composite-indexes +Kind -> entities + builtin-indexes + composite-indexes +Namespace -> entities + builtin-indexes + composite-indexes +Ns_Total -> entities + builtin-indexes + composite-indexes +Total -> entities + builtin-indexes + composite-indexes +""" +import datetime +import logging +import sys +import time +from collections import defaultdict + +import six + +from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.fdb.stats.containers import CountBytes, StatsPropTypes + +sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.datastore import entity_pb + +# The value the datastore uses to populate the meaning field for timestammps. +GD_WHEN = 7 + +logger = logging.getLogger(__name__) + + +def fill_entity(project_id, kind, properties, name=None, id_=None, + namespace=''): + entity = entity_pb.EntityProto() + key = entity.mutable_key() + key.set_app(project_id) + if namespace: + key.set_name_space(namespace) + + path = key.mutable_path() + element = path.add_element() + element.set_type(kind) + if name is not None: + element.set_name(name) + else: + element.set_id(id_) + + group = entity.mutable_entity_group() + group.add_element().CopyFrom(element) + for prop_name, value in six.iteritems(properties): + prop = entity.add_property() + prop.set_name(prop_name) + prop.set_multiple(False) + value_pb = prop.mutable_value() + if isinstance(value, datetime.datetime): + value_pb.set_int64value( + int(time.mktime(value.timetuple()) * 1000000 + value.microsecond)) + prop.set_meaning(GD_WHEN) + elif isinstance(value, int): + value_pb.set_int64value(value) + else: + value_pb.set_stringvalue(value.encode('utf-8')) + + return entity + + +def fill_entities(project_id, project_stats, timestamp): + entities = [] + + composite_stats = project_stats.composite_stats.stats + stats_kind = u'__Stat_Ns_Kind_CompositeIndex__' + for namespace, by_index in six.iteritems(composite_stats): + for (index_id, kind), fields in six.iteritems(by_index): + name = u'_'.join([kind, six.text_type(index_id)]) + props = {'index_id': index_id, 'kind_name': kind, 'timestamp': timestamp, + 'count': fields.count, 'bytes': fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name, + namespace=namespace)) + + stats_kind = u'__Stat_Kind_CompositeIndex__' + composite_stats_by_kind = defaultdict(CountBytes) + for namespace, by_index in six.iteritems(composite_stats): + for key, fields in six.iteritems(by_index): + composite_stats_by_kind[key] += fields + + for (index_id, kind), fields in six.iteritems(composite_stats_by_kind): + name = u'_'.join([kind, six.text_type(index_id)]) + props = {'index_id': index_id, 'kind_name': kind, 'timestamp': timestamp, + 'count': fields.count, 'bytes': fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name)) + + entity_stats = project_stats.entity_stats + stats_kind = u'__Stat_Ns_Kind_IsRootEntity__' + for namespace, by_kind in six.iteritems(entity_stats.entities_root): + for kind, entity_fields in six.iteritems(by_kind): + builtin_fields = entity_stats.builtin_indexes_root[namespace][kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind, + namespace=namespace)) + + stats_kind = u'__Stat_Ns_Kind_NotRootEntity__' + for namespace, by_kind in six.iteritems(entity_stats.entities_notroot): + for kind, entity_fields in six.iteritems(by_kind): + builtin_fields = entity_stats.builtin_indexes_notroot[namespace][kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind, + namespace=namespace)) + + stats_kind = u'__Stat_Ns_Kind__' + entity_stats_by_ns_kind = entity_stats.entities_root.copy() + for namespace, by_kind in six.iteritems(entity_stats.entities_notroot): + for kind, fields in six.iteritems(by_kind): + entity_stats_by_ns_kind[namespace][kind] += fields + + builtin_stats_by_ns_kind = entity_stats.builtin_indexes_root.copy() + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_notroot): + for kind, fields in six.iteritems(by_kind): + builtin_stats_by_ns_kind[namespace][kind] += fields + + for namespace, by_kind in six.iteritems(entity_stats_by_ns_kind): + for kind, entity_fields in six.iteritems(by_kind): + builtin_fields = builtin_stats_by_ns_kind[namespace][kind] + composite_fields = composite_stats[namespace][kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'composite_index_count': composite_fields.count, + 'composite_index_bytes': composite_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes + + composite_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind, + namespace=namespace)) + + stats_kind = u'__Stat_Kind_IsRootEntity__' + root_entity_stats_by_kind = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.entities_root): + for kind, fields in six.iteritems(by_kind): + root_entity_stats_by_kind[kind] += fields + + root_builtin_stats_by_kind = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_root): + for kind, fields in six.iteritems(by_kind): + root_builtin_stats_by_kind[kind] += fields + + for kind, entity_fields in six.iteritems(root_entity_stats_by_kind): + builtin_fields = root_builtin_stats_by_kind[kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind)) + + stats_kind = u'__Stat_Kind_NotRootEntity__' + notroot_entity_stats_by_kind = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.entities_notroot): + for kind, fields in six.iteritems(by_kind): + notroot_entity_stats_by_kind[kind] += fields + + notroot_builtin_stats_by_kind = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_notroot): + for kind, fields in six.iteritems(by_kind): + notroot_builtin_stats_by_kind[kind] += fields + + for kind, entity_fields in six.iteritems(notroot_entity_stats_by_kind): + builtin_fields = notroot_builtin_stats_by_kind[kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind)) + + stats_kind = u'__Stat_Kind__' + entity_stats_by_kind = defaultdict(CountBytes) + for kind, fields in six.iteritems(root_entity_stats_by_kind): + entity_stats_by_kind[kind] += fields + + for kind, fields in six.iteritems(notroot_entity_stats_by_kind): + entity_stats_by_kind[kind] += fields + + builtin_stats_by_kind = defaultdict(CountBytes) + for kind, fields in six.iteritems(root_builtin_stats_by_kind): + builtin_stats_by_kind[kind] += fields + + for kind, fields in six.iteritems(notroot_builtin_stats_by_kind): + builtin_stats_by_kind[kind] += fields + + for kind, entity_fields in six.iteritems(entity_stats_by_kind): + builtin_fields = builtin_stats_by_kind[kind] + composite_fields = composite_stats_by_kind[kind] + props = {'kind_name': kind, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'composite_index_count': composite_fields.count, + 'composite_index_bytes': composite_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes + + composite_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, kind)) + + stats_kind = u'__Stat_Namespace__' + composite_stats_by_ns = { + namespace: sum(six.itervalues(by_kind), CountBytes()) + for namespace, by_kind in six.iteritems(composite_stats)} + + entity_stats_by_ns = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.entities_root): + entity_stats_by_ns[namespace] += sum(six.itervalues(by_kind), CountBytes()) + + for namespace, by_kind in six.iteritems(entity_stats.entities_notroot): + entity_stats_by_ns[namespace] += sum(six.itervalues(by_kind), CountBytes()) + + builtin_stats_by_ns = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_root): + builtin_stats_by_ns[namespace] += sum(six.itervalues(by_kind), CountBytes()) + + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_notroot): + builtin_stats_by_ns[namespace] += sum(six.itervalues(by_kind), CountBytes()) + + for namespace, entity_fields in six.iteritems(entity_stats_by_ns): + builtin_fields = builtin_stats_by_ns[namespace] + composite_fields = composite_stats_by_ns[namespace] + props = {'subject_namespace': namespace, 'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'composite_index_count': composite_fields.count, + 'composite_index_bytes': composite_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes + + composite_fields.bytes} + if namespace: + entities.append(fill_entity(project_id, stats_kind, props, namespace)) + else: + entities.append(fill_entity(project_id, stats_kind, props, id_=1)) + + stats_kind = u'__Stat_Ns_Total__' + name = u'total_entity_usage' + for namespace, entity_fields in six.iteritems(entity_stats_by_ns): + builtin_fields = builtin_stats_by_ns[namespace] + composite_fields = composite_stats_by_ns[namespace] + props = {'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'composite_index_count': composite_fields.count, + 'composite_index_bytes': composite_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes + + composite_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name, + namespace=namespace)) + + stats_kind = u'__Stat_Total__' + name = u'total_entity_usage' + entity_fields = sum(six.itervalues(entity_stats_by_ns), CountBytes()) + builtin_fields = sum(six.itervalues(builtin_stats_by_ns), CountBytes()) + composite_fields = sum(six.itervalues(composite_stats_by_ns), CountBytes()) + props = {'timestamp': timestamp, + 'builtin_index_count': builtin_fields.count, + 'builtin_index_bytes': builtin_fields.bytes, + 'count': entity_fields.count, 'entity_bytes': entity_fields.bytes, + 'composite_index_count': composite_fields.count, + 'composite_index_bytes': composite_fields.bytes, + 'bytes': entity_fields.bytes + builtin_fields.bytes + + composite_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name)) + + prop_stats = project_stats.property_stats + stats_kind = u'__Stat_Ns_PropertyType_PropertyName_Kind__' + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + type_name = StatsPropTypes.NAMES[prop_type] + for prop_name, entity_fields in six.iteritems(by_name): + name = u'_'.join([type_name, prop_name, kind]) + index_fields = prop_stats.index_stats[namespace][kind][prop_type]\ + [prop_name] + props = {'kind_name': kind, 'timestamp': timestamp, + 'property_type': type_name, 'property_name': prop_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name, + namespace=namespace)) + + stats_kind = u'__Stat_Ns_PropertyType_Kind__' + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + type_name = StatsPropTypes.NAMES[prop_type] + name = u'_'.join([type_name, kind]) + entity_fields = sum(six.itervalues(by_name), CountBytes()) + index_fields = sum( + six.itervalues(prop_stats.index_stats[namespace][kind][prop_type]), + CountBytes()) + props = {'kind_name': kind, 'timestamp': timestamp, + 'property_type': type_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name, + namespace=namespace)) + + stats_kind = u'__Stat_Ns_PropertyName_Kind__' + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + combined_entities = defaultdict(CountBytes) + combined_indexes = defaultdict(CountBytes) + for prop_type, by_name in six.iteritems(by_type): + for prop_name, fields in six.iteritems(by_name): + combined_entities[prop_name] += fields + combined_indexes[prop_name] += prop_stats.index_stats[namespace]\ + [kind][prop_type][prop_name] + + for prop_name, entity_fields in six.iteritems(combined_entities): + name = u'_'.join([prop_name, kind]) + index_fields = combined_indexes[prop_name] + props = {'kind_name': kind, 'timestamp': timestamp, + 'property_name': prop_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name, + namespace=namespace)) + + stats_kind = u'__Stat_Ns_PropertyType__' + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + combined_entities = defaultdict(CountBytes) + combined_indexes = defaultdict(CountBytes) + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + combined_entities[prop_type] += sum( + six.itervalues(by_name), CountBytes()) + combined_indexes[prop_type] += sum( + six.itervalues(prop_stats.index_stats[namespace][kind][prop_type]), + CountBytes()) + + for prop_type, entity_fields in six.iteritems(combined_entities): + type_name = StatsPropTypes.NAMES[prop_type] + index_fields = combined_indexes[prop_type] + props = {'timestamp': timestamp, 'property_type': type_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, type_name, + namespace=namespace)) + + stats_kind = u'__Stat_PropertyName_Kind__' + combined_entities = defaultdict(lambda: defaultdict(CountBytes)) + combined_indexes = defaultdict(lambda: defaultdict(CountBytes)) + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + for prop_name, fields in six.iteritems(by_name): + combined_entities[prop_name][kind] += fields + combined_indexes[prop_name][kind] += prop_stats.index_stats\ + [namespace][kind][prop_type][prop_name] + + for prop_name, by_kind in six.iteritems(combined_entities): + for kind, entity_fields in six.iteritems(by_kind): + index_fields = combined_indexes[prop_name][kind] + name = u'_'.join([prop_name, kind]) + props = {'timestamp': timestamp, 'kind_name': kind, + 'property_name': prop_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name)) + + stats_kind = u'__Stat_PropertyType_Kind__' + combined_entities = defaultdict(lambda: defaultdict(CountBytes)) + combined_indexes = defaultdict(lambda: defaultdict(CountBytes)) + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + combined_entities[prop_type][kind] += sum(six.itervalues(by_name), + CountBytes()) + combined_indexes[prop_type][kind] += sum( + six.itervalues(prop_stats.index_stats[namespace][kind][prop_type]), + CountBytes()) + + for prop_type, by_kind in six.iteritems(combined_entities): + type_name = StatsPropTypes.NAMES[prop_type] + for kind, entity_fields in six.iteritems(by_kind): + index_fields = combined_indexes[prop_type][kind] + name = u'_'.join([type_name, kind]) + props = {'timestamp': timestamp, 'kind_name': kind, + 'property_type': type_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name)) + + stats_kind = u'__Stat_PropertyType_PropertyName_Kind__' + entity_props_by_type_name_kind = defaultdict( + lambda: defaultdict(lambda: defaultdict(CountBytes))) + index_props_by_type_name_kind = defaultdict( + lambda: defaultdict(lambda: defaultdict(CountBytes))) + for namespace, by_kind in six.iteritems(prop_stats.entity_stats): + for kind, by_type in six.iteritems(by_kind): + for prop_type, by_name in six.iteritems(by_type): + for prop_name, entity_fields in six.iteritems(by_name): + entity_props_by_type_name_kind[prop_type][prop_name][kind] += \ + entity_fields + index_props_by_type_name_kind[prop_type][prop_name][kind] += \ + prop_stats.index_stats[namespace][kind][prop_type][prop_name] + + for prop_type, by_name in six.iteritems(entity_props_by_type_name_kind): + type_name = StatsPropTypes.NAMES[prop_type] + for prop_name, by_kind in six.iteritems(by_name): + for kind, entity_fields in six.iteritems(by_kind): + index_fields = index_props_by_type_name_kind[prop_type][prop_name][kind] + name = u'_'.join([type_name, prop_name, kind]) + props = {'timestamp': timestamp, 'kind_name': kind, + 'property_type': type_name, 'property_name': prop_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, name)) + + stats_kind = u'__Stat_PropertyType__' + for prop_type, by_name in six.iteritems(entity_props_by_type_name_kind): + type_name = StatsPropTypes.NAMES[prop_type] + entity_fields = sum( + (sum(six.itervalues(by_kind), CountBytes()) + for by_kind in six.itervalues(by_name)), CountBytes()) + index_fields = sum( + (sum(six.itervalues(by_kind), CountBytes()) + for by_kind in six.itervalues(index_props_by_type_name_kind[prop_type])), + CountBytes()) + props = {'timestamp': timestamp, 'property_type': type_name, + 'builtin_index_count': index_fields.count, + 'builtin_index_bytes': index_fields.bytes, + 'count': entity_fields.count, + 'entity_bytes': entity_fields.bytes, + 'bytes': entity_fields.bytes + index_fields.bytes} + entities.append(fill_entity(project_id, stats_kind, props, type_name)) + + return entities diff --git a/AppDB/appscale/datastore/fdb/utils.py b/AppDB/appscale/datastore/fdb/utils.py index 9a2c0b47fe..147fc04e78 100644 --- a/AppDB/appscale/datastore/fdb/utils.py +++ b/AppDB/appscale/datastore/fdb/utils.py @@ -5,6 +5,7 @@ import json import logging import random +import struct import time import fdb @@ -298,3 +299,12 @@ def format_prop_val(prop_value): return Path.flatten(prop_value.referencevalue()) else: return None + + +def encode_delta(value): + """ Encodes a value suitable for use with tr.add. """ + return struct.pack(' Date: Tue, 10 Sep 2019 18:42:09 -0700 Subject: [PATCH 106/221] Install erlang, hold the X --- debian/control.bionic | 2 +- debian/control.stretch | 2 +- debian/control.xenial | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/debian/control.bionic b/debian/control.bionic index 6ea6466857..7f3af74885 100644 --- a/debian/control.bionic +++ b/debian/control.bionic @@ -9,7 +9,7 @@ Build-Depends: ant, debhelper (>= 7), dh-make, dpkg-dev, - erlang, + erlang-nox, fakeroot, flex, libbz2-dev, diff --git a/debian/control.stretch b/debian/control.stretch index 642c02f698..72a0a68886 100644 --- a/debian/control.stretch +++ b/debian/control.stretch @@ -9,7 +9,7 @@ Build-Depends: ant, debhelper (>= 7), dh-make, dpkg-dev, - erlang, + erlang-nox, fakeroot, flex, libbz2-dev, diff --git a/debian/control.xenial b/debian/control.xenial index 99d1f187c0..d8b9475e07 100644 --- a/debian/control.xenial +++ b/debian/control.xenial @@ -9,7 +9,7 @@ Build-Depends: ant, debhelper (>= 7), dh-make, dpkg-dev, - erlang, + erlang-nox, fakeroot, flex, libbz2-dev, From 5a998023f55c21cc5f74441d72e0aa6da3f10c6f Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Thu, 12 Sep 2019 14:55:25 -0700 Subject: [PATCH 107/221] Fixed variable out of scope --- AppController/djinn.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 78d34fb1ff..5cb7c592d4 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -2532,10 +2532,9 @@ def get_all_compute_nodes # can be easily terminated. def can_we_scale_down?(min_machines) @state_change_lock.synchronize { - nodes_to_check = @nodes.drop(min_machines) - } - nodes_to_check.each { |node| - return false if node['jobs'] != ['compute'] + @nodes.drop(min_machines).each { |node| + return false if node['jobs'] != ['compute'] + } } return true end From 1fedcbd459d27f17c3f992731b6c6161dcad16d1 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 12 Sep 2019 16:15:02 -0700 Subject: [PATCH 108/221] Updates for systemd, hermes stats fix typo --- Hermes/appscale/hermes/producers/process_stats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hermes/appscale/hermes/producers/process_stats.py b/Hermes/appscale/hermes/producers/process_stats.py index 47e54647e9..f0e7b93039 100644 --- a/Hermes/appscale/hermes/producers/process_stats.py +++ b/Hermes/appscale/hermes/producers/process_stats.py @@ -67,7 +67,7 @@ class ProcessStats(object): Every Hermes node collects its processes statistics, but Master node also requests this statistics of all nodes in cluster. - AppSsale services started by systemd should be profiled. + AppScale services started by systemd should be profiled. """ pid = attr.ib() monit_name = attr.ib() # Monit / external name From da5f5ea5e4ca911ed664c2bdfca6be490f233449 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Thu, 12 Sep 2019 22:02:19 -0700 Subject: [PATCH 109/221] Look through all nodes when downscaling When looking to downscale instances, make sure you look though all nodes for autoscaled node. This is needed since min_machines could have been changed few times. --- AppController/djinn.rb | 63 +++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 5cb7c592d4..3801589853 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -892,7 +892,8 @@ def enforce_options if Integer(@options['min_machines']) > @nodes.length msg = 'min_machines is bigger than the number of nodes!' Djinn.log_warn(msg) - raise AppScaleException.new(msg) + # No exception raised here since we may be lowering the number + # of min_machines, just a warning in the logs will suffice. end if Integer(@options['max_machines']) < Integer(@options['min_machines']) msg = 'min_machines is bigger than max_machines!' @@ -2533,7 +2534,7 @@ def get_all_compute_nodes def can_we_scale_down?(min_machines) @state_change_lock.synchronize { @nodes.drop(min_machines).each { |node| - return false if node['jobs'] != ['compute'] + return false if node.roles != ['compute'] } } return true @@ -5057,7 +5058,6 @@ def scale_up_instances(needed_nodes) # any AppServers and the minimum number of user specified machines are still # running in the deployment. def scale_down_instances - num_scaled_down = 0 # If we are already at the minimum number of machines that the user specified, # then we do not have the capacity to scale down. max_scale_down_capacity = @nodes.length - Integer(@options['min_machines']) @@ -5082,38 +5082,39 @@ def scale_down_instances Thread.new { SCALE_LOCK.synchronize { - # Look through an array of autoscaled nodes and check if any of the - # machines are not running any AppServers and need to be downscaled. - get_autoscaled_nodes.reverse_each { |node| - break if num_scaled_down == max_scale_down_capacity - - hosted_apps = [] - @versions_loaded.each { |version_key| - @app_info_map[version_key]['appservers'].each { |location| - host, port = location.split(":") - if host == node.private_ip - hosted_apps << "#{version_key}:#{port}" - end + # Look through the nodes and check if any of the machines was + # autoscaled (compute role only)a and are not running any + # AppServers and need to be downscaled. + nodes_to_remove = [] + @state_change_lock.synchronize { + @nodes.reverse_each { |node| + break if nodes_to_remove.length == max_scale_down_capacity + + hosted_apps = [] + @versions_loaded.each { |version_key| + @app_info_map[version_key]['appservers'].each { |location| + host, port = location.split(":") + hosted_apps << "#{version_key}:#{port}" if host == node.private_ip + } } - } - unless hosted_apps.empty? - Djinn.log_debug("The node #{node.private_ip} has these AppServers " \ - "running: #{hosted_apps}") - next - end + unless hosted_apps.empty? + Djinn.log_debug("The node #{node.private_ip} has these AppServers " \ + "running: #{hosted_apps}") + next + end - # Right now, only the autoscaled machines are started with just the - # compute role, so we check specifically for that during downscaling - # to make sure we only downscale the new machines added. - node_to_remove = nil - if node.roles == ['compute'] - Djinn.log_info("Removing node #{node}") - node_to_remove = node - end + # Right now, only the autoscaled machines are started with just the + # compute role, so we check specifically for that during downscaling + # to make sure we only downscale the new machines added. + nodes_to_remove << node if node.roles == ['compute'] + } + } - num_terminated = terminate_node_from_deployment(node_to_remove) - num_scaled_down += num_terminated + # Now we remove the nodes marked for deletion. + nodes_to_remove.each { |node| + Djinn.log_info("Removing node #{node}.") + APPS_LOCK.synchronize { terminate_node_from_deployment(node) } } } From dcb38ea2818f13b30efd974a61f927258d483830 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 13 Sep 2019 16:34:16 +0300 Subject: [PATCH 110/221] Addressing PR comments - Using releases/latest API method. - Adding agents and thirdparties parameters (undo removal). - Using `... | python -m json.tool` to prevent inconsistent reading of github response. --- bootstrap.sh | 12 +++++++++--- upgrade.sh | 8 ++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index ddb49f1aea..89994df311 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -23,7 +23,9 @@ TAG_PARAM_SPECIFIED="N" usage() { echo "Usage: ${0} [--repo ] [--branch ]" echo " [--tools-repo ] [--tools-branch ]" - echo " [--tag ] [-t]" + echo " [--agents-repo ] [--agents-branch ]" + echo " [--thirdparties-repo ] [--thirdparties-branch ]" + echo " [--tag ]" echo echo "Be aware that tag parameter has priority over repo and branch parameters." echo "So if no tag, repos and branches are specified, tag 'last' will be used." @@ -34,6 +36,10 @@ usage() { echo " --branch Specify appscale branch (default $APPSCALE_BRANCH)" echo " --tools-repo Specify appscale-tools repo (default $APPSCALE_TOOLS_REPO" echo " --tools-branch Specify appscale-tools branch (default $APPSCALE_TOOLS_BRANCH)" + echo " --agents-repo Specify appscale-agents repo (default $AGENTS_REPO" + echo " --agents-branch Specify appscale-agents branch (default $AGENTS_BRANCH)" + echo " --thirdparties-repo Specify appscale-thirdparties repo (default $THIRDPARTIES_REPO" + echo " --thirdparties-branch Specify appscale-thirdparties branch (default $THIRDPARTIES_BRANCH)" echo " --tag Use git tag (ie 3.7.2) or 'last' to use the latest release" echo " or 'dev' for HEAD (default ${GIT_TAG})" echo " -t Run unit tests" @@ -131,8 +137,8 @@ else RELY_ON_TAG="Y" if [ "${GIT_TAG}" = "last" ]; then echo "Determining the latest tag in AppScale/appscale repo" - GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ - | grep '"name"' | head -1 \ + GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/releases/latest \ + | python -m json.tool | grep '"tag_name"' \ | awk -F ':' '{ print $2 }' | tr --delete ' ,"') fi VERSION="${GIT_TAG}" diff --git a/upgrade.sh b/upgrade.sh index f0f9c95738..136d3fd73a 100755 --- a/upgrade.sh +++ b/upgrade.sh @@ -62,12 +62,12 @@ done # Determine the latest git tag on the AppScale/appscale repo if [ "$GIT_TAG" = "last" ]; then echo "Determining the latest tag in AppScale/appscale repo" - GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/tags \ - | grep '"name"' | head -1 \ + GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/releases/latest \ + | python -m json.tool | grep '"tag_name"' \ | awk -F ':' '{ print $2 }' | tr --delete ' ,"') elif ! curl --fail https://api.github.com/repos/appscale/appscale/tags \ - | grep '"name"' | awk -F ':' '{ print $2 }' | tr --delete ' ,"' \ - | grep "^${GIT_TAG}$"; then + | python -m json.tool | grep '"name"' | awk -F ':' '{ print $2 }' \ + | tr --delete ' ,"' | grep "^${GIT_TAG}$"; then echo "Tag '${GIT_TAG}' not recognized" echo "Use --tag to specify existing appscale repo tag to upgrade to." exit 1 From 8f4c61f5bf33a63d8ace058164e027fd6f8f8425 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 13 Sep 2019 17:06:47 +0300 Subject: [PATCH 111/221] Use 127.0.0.1 as a public address --- scripts/fast-start.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/fast-start.sh b/scripts/fast-start.sh index 70b57603d0..ebd78e9cdf 100755 --- a/scripts/fast-start.sh +++ b/scripts/fast-start.sh @@ -207,6 +207,7 @@ esac echo "Configuring local foundationdb" /root/appscale-thirdparties/foundationdb/configure-and-start-fdb.sh \ + --public-address 127.0.0.1 \ --data-dir /opt/appscale/fdb-data/ \ --fdbcli-command 'configure new single ssd' FDB_CLUSTERFILE_CONTENT=$(cat /etc/foundationdb/fdb.cluster) From 84de04ab263f7bb1a0e134cfc2f333f5a94678f9 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 12 Sep 2019 16:37:17 -0700 Subject: [PATCH 112/221] Updates for systemd, common helper explicit module calls --- common/appscale/common/service_helper.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/common/appscale/common/service_helper.py b/common/appscale/common/service_helper.py index 455ad49510..6dcb7cf1cc 100644 --- a/common/appscale/common/service_helper.py +++ b/common/appscale/common/service_helper.py @@ -1,3 +1,4 @@ +import importlib import logging import subprocess @@ -230,6 +231,11 @@ def __name_match(name): class ServiceOperator(object): """ Handles Service operations. """ + def __init__(self): + """ Creates a new ServiceOperator. + """ + self.helper = importlib.import_module(self.__module__) + @gen.coroutine def list_async(self): """ Retrieves the status for each service. @@ -245,7 +251,7 @@ def list(self): Returns: A dictionary mapping services to their state. """ - return list() + return self.helper.list() @gen.coroutine def start_async(self, name, enable=None, wants=None, properties=None): @@ -273,7 +279,8 @@ def start(self, name, enable=None, wants=None, properties=None): Returns: True if the service was started, else False. """ - return start(name, enable=enable, wants=wants, properties=properties) + return self.helper.start(name, enable=enable, wants=wants, + properties=properties) @gen.coroutine def stop_async(self, name): @@ -294,7 +301,7 @@ def stop(self, name): Returns: True if the named services were stopped. """ - return stop(name) + return self.helper.stop(name) @gen.coroutine def restart_async(self, name): @@ -315,4 +322,4 @@ def restart(self, name): Returns: True if services were restarted. """ - return restart(name) \ No newline at end of file + return self.helper.restart(name) \ No newline at end of file From 87363efe4eb708688b4b73fe20ed966cb51f8db5 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 13 Sep 2019 08:26:54 -0700 Subject: [PATCH 113/221] Updates for systemd, controller unit needs kill capability --- system/units/appscale-controller.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/units/appscale-controller.service b/system/units/appscale-controller.service index d12d06508b..d09b068786 100644 --- a/system/units/appscale-controller.service +++ b/system/units/appscale-controller.service @@ -11,7 +11,7 @@ Environment=HOME=/root ExecStart=/usr/bin/ruby -w /root/appscale/AppController/djinnServer.rb SyslogIdentifier=%p # Security -CapabilityBoundingSet=CAP_DAC_OVERRIDE CAP_SETGID CAP_SETUID CAP_CHOWN CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_NET_ADMIN CAP_NET_RAW +CapabilityBoundingSet=CAP_DAC_OVERRIDE CAP_SETGID CAP_SETUID CAP_CHOWN CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_NET_ADMIN CAP_NET_RAW CAP_KILL [Install] WantedBy=appscale-control.target multi-user.target \ No newline at end of file From 55315a86a32f95d01250316ae8789947a2a7f9f4 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 13 Sep 2019 08:43:37 -0700 Subject: [PATCH 114/221] Updates for systemd, common helper clean up exceptions and returns --- common/appscale/common/service_helper.py | 94 ++++++++---------------- 1 file changed, 30 insertions(+), 64 deletions(-) diff --git a/common/appscale/common/service_helper.py b/common/appscale/common/service_helper.py index 6dcb7cf1cc..0d5eb3cf27 100644 --- a/common/appscale/common/service_helper.py +++ b/common/appscale/common/service_helper.py @@ -23,26 +23,19 @@ logger = logging.getLogger(__name__) -class NonZeroReturnStatus(Exception): - """ Indicates that command returned non-zero return status """ - pass - -def systemctl_run(args): +def __systemctl_run(args): """ Runs the given systemctl command. Args: args: A list of strs, where each str is an argument for systemctl. Raises: - NonZeroReturnStatus if command returned status different from 0. + subprocess.CalledProcessError if command returned status different from 0. """ - return_status = subprocess.call([SYSTEMCTL] + args) - if return_status != 0: - raise NonZeroReturnStatus('Command {0} return non-zero status: {1}' - .format(' '.join(args), return_status)) + subprocess.check_call([SYSTEMCTL] + args) -def systemctl_out(args): +def __systemctl_out(args): """ Runs the given systemctl command, returns output. Args: @@ -50,28 +43,21 @@ def systemctl_out(args): Returns: The output from the systemctl command Raises: - NonZeroReturnStatus if command returned status different from 0. + subprocess.CalledProcessError if command returned status different from 0. """ - try: - return subprocess.check_output([SYSTEMCTL] + args) - except subprocess.CalledProcessError as err: - raise NonZeroReturnStatus('Command {0} return non-zero status: {1}' - .format(' '.join(args), err.returncode)) + return subprocess.check_output([SYSTEMCTL] + args) + -def safe_systemctl_run(args): +def __safe_systemctl_run(args): """ Runs the given systemctl command, logging any error. Args: args: A list of strs, where each str is an argument for systemctl. - Returns: - True if command succeeded, False otherwise. """ try: - systemctl_run(args) - return True - except NonZeroReturnStatus as err: + __systemctl_run(args) + except subprocess.CalledProcessError as err: logger.error(err) - return False def start(name, background=False, enable=None, wants=None, properties=None): @@ -83,8 +69,6 @@ def start(name, background=False, enable=None, wants=None, properties=None): enable: True to enable, False to start only, None for default. wants: services required by this service properties: properties to set for the service - Returns: - True if the service was started, else False. """ logger.info('Starting service {0}'.format(name)) expanded_name = __expand_name(name) @@ -93,19 +77,19 @@ def start(name, background=False, enable=None, wants=None, properties=None): logger.info('Service {0} wants {1}'.format(name, ' '.join(wants))) wants_args = ['--runtime', 'add-wants', expanded_name] wants_args.extend([__expand_name(want) for want in wants]) - safe_systemctl_run(wants_args) + __safe_systemctl_run(wants_args) if properties: logger.info('Service {0} properties {1}'.format( name, ' '.join('='.join(item) for item in properties.items()))) properties_args = ['--runtime', 'set-property', expanded_name] properties_args.extend(['='.join(item) for item in properties.items()]) - safe_systemctl_run(properties_args) + __safe_systemctl_run(properties_args) - return safe_systemctl_run(__build_command('start', - expanded_name, - background=background, - enable=enable)) + __safe_systemctl_run(__build_command('start', + expanded_name, + background=background, + enable=enable)) def stop(name, background=False): @@ -114,13 +98,11 @@ def stop(name, background=False): Args: name: A str representing the name of the service(s) to stop. background: True to start without blocking - Returns: - True if the named services were stopped. """ logger.info('Stopping service(s) {0}'.format(name)) - return safe_systemctl_run(__build_command('stop', - __name_match(name), - background=background)) + __safe_systemctl_run(__build_command('stop', + __name_match(name), + background=background)) def restart(name, background=False, start=True): @@ -130,16 +112,14 @@ def restart(name, background=False, start=True): name: A str representing the name of the service(s) to restart. background: True to start without blocking start: True to start services if not already running (use False with name pattern) - Returns: - True if services were restarted. """ logger.info('Restarting service(s) {0}'.format(name)) command = 'try-restart' if start: command = 'restart' - return safe_systemctl_run(__build_command(command, - __name_match(name), - background=background)) + __safe_systemctl_run(__build_command(command, + __name_match(name), + background=background)) def list(running=False): @@ -157,7 +137,7 @@ def list(running=False): try: services = {} - output = systemctl_out(args) + output = __systemctl_out(args) for output_line in output.split('\n'): if not output_line: continue @@ -166,7 +146,7 @@ def list(running=False): continue services[service[:-8]] = STATUS_MAP.get(active, 'stopped') return services - except NonZeroReturnStatus: + except subprocess.CalledProcessError: return {} @@ -262,11 +242,8 @@ def start_async(self, name, enable=None, wants=None, properties=None): enable: True to enable, False to start only, None for default. wants: services required by this service properties: properties to set for the service - Returns: - True if the service was started, else False. """ - raise gen.Return(self.start(name, enable=enable, wants=wants, - properties=properties)) + self.start(name, enable=enable, wants=wants, properties=properties) def start(self, name, enable=None, wants=None, properties=None): """ Start the given service. @@ -276,11 +253,8 @@ def start(self, name, enable=None, wants=None, properties=None): enable: True to enable, False to start only, None for default. wants: services required by this service properties: properties to set for the service - Returns: - True if the service was started, else False. """ - return self.helper.start(name, enable=enable, wants=wants, - properties=properties) + self.helper.start(name, enable=enable, wants=wants, properties=properties) @gen.coroutine def stop_async(self, name): @@ -288,20 +262,16 @@ def stop_async(self, name): Args: name: A str representing the name of the service(s) to stop. - Returns: - True if the named services were stopped. """ - raise gen.Return(self.stop(name)) + self.stop(name) def stop(self, name): """ Stop the given service(s). Args: name: A str representing the name of the service(s) to stop. - Returns: - True if the named services were stopped. """ - return self.helper.stop(name) + self.helper.stop(name) @gen.coroutine def restart_async(self, name): @@ -309,17 +279,13 @@ def restart_async(self, name): Args: name: A str representing the name of the service(s) to restart. - Returns: - True if services were restarted. """ - raise gen.Return(self.restart(name)) + self.restart(name) def restart(self, name): """ Restart the given service(s). Args: name: A str representing the name of the service(s) to restart. - Returns: - True if services were restarted. """ - return self.helper.restart(name) \ No newline at end of file + self.helper.restart(name) \ No newline at end of file From f7f320cdbf5a767d942fa36f04065a4c1f630fb7 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 13 Sep 2019 08:45:31 -0700 Subject: [PATCH 115/221] Updates for systemd, common operator more async stop services less async --- AdminServer/appscale/admin/__init__.py | 2 +- .../appscale/admin/instance_manager/server.py | 2 +- AdminServer/appscale/admin/stop_services.py | 28 ++++++------------- common/appscale/common/service_helper.py | 16 +++++++---- 4 files changed, 22 insertions(+), 26 deletions(-) diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py index 725bcf4a2d..002de967d9 100644 --- a/AdminServer/appscale/admin/__init__.py +++ b/AdminServer/appscale/admin/__init__.py @@ -1394,7 +1394,7 @@ def main(): zk_client.start() version_update_lock = zk_client.Lock(constants.VERSION_UPDATE_LOCK_NODE) thread_pool = ThreadPoolExecutor(4) - service_operator = ServiceOperator() + service_operator = ServiceOperator(thread_pool) all_resources = { 'acc': acc, 'ua_client': ua_client, diff --git a/AdminServer/appscale/admin/instance_manager/server.py b/AdminServer/appscale/admin/instance_manager/server.py index ab5d39acbb..6c2f3b212e 100644 --- a/AdminServer/appscale/admin/instance_manager/server.py +++ b/AdminServer/appscale/admin/instance_manager/server.py @@ -32,7 +32,7 @@ def main(): thread_pool = ThreadPoolExecutor(MAX_BACKGROUND_WORKERS) source_manager = SourceManager(zk_client, thread_pool) source_manager.configure_automatic_fetch(projects_manager) - service_operator = ServiceOperator() + service_operator = ServiceOperator(thread_pool) options.define('private_ip', appscale_info.get_private_ip()) options.define('syslog_server', appscale_info.get_headnode_ip()) diff --git a/AdminServer/appscale/admin/stop_services.py b/AdminServer/appscale/admin/stop_services.py index 187bacbfb5..baabebe680 100644 --- a/AdminServer/appscale/admin/stop_services.py +++ b/AdminServer/appscale/admin/stop_services.py @@ -1,14 +1,11 @@ """ Tries to stop all services until they are stopped. """ import argparse import logging - -from tornado import gen, ioloop +import time from appscale.common import service_helper -from appscale.common.async_retrying import retry_coroutine from appscale.common.constants import LOG_FORMAT from appscale.common.retrying import retry -from appscale.common.service_helper import ServiceOperator logger = logging.getLogger(__name__) @@ -31,21 +28,19 @@ def stop_service(): service_helper.stop(args.service) -@gen.coroutine -def main_async(): +def stop_services(): """ Tries to stop all appscale services until they are stopped. """ - @retry_coroutine(max_retries=3) + @retry(max_retries=3) def stop_with_retries(): - logger.debug('Stopping AppScale services') - yield service_operator.start_async('appscale-down.target', enable=False) + logger.debug('Stopping AppScale services') + service_helper.start('appscale-down.target', enable=False) logger.info('Waiting for services to stop') - service_operator = ServiceOperator() stop_requested = False original_services_count = None stopped_count = 0 while True: - services = yield service_operator.list_async() + services = service_helper.list() if original_services_count is None: original_services_count = len(services) @@ -63,10 +58,10 @@ def stop_with_retries(): 'Stopped {}/{} services'.format(stopped_count, original_services_count)) if not stop_requested: - yield stop_with_retries() + stop_with_retries() stop_requested = True - yield gen.sleep(min(0.3 * len(running), 5)) + time.sleep(min(0.3 * len(running), 5)) def main(): @@ -81,9 +76,4 @@ def main(): if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) - # Like synchronous HTTPClient, create separate IOLoop for sync code - io_loop = ioloop.IOLoop(make_current=False) - try: - return io_loop.run_sync(lambda: main_async()) - finally: - io_loop.close() + stop_services() diff --git a/common/appscale/common/service_helper.py b/common/appscale/common/service_helper.py index 0d5eb3cf27..4b0d101ae7 100644 --- a/common/appscale/common/service_helper.py +++ b/common/appscale/common/service_helper.py @@ -211,9 +211,13 @@ def __name_match(name): class ServiceOperator(object): """ Handles Service operations. """ - def __init__(self): + def __init__(self, thread_pool): """ Creates a new ServiceOperator. + + Args: + thread_pool: A ThreadPoolExecutor. """ + self.thread_pool = thread_pool self.helper = importlib.import_module(self.__module__) @gen.coroutine @@ -223,7 +227,8 @@ def list_async(self): Returns: A dictionary mapping services to their state. """ - raise gen.Return(self.list()) + listing = yield self.thread_pool.submit(self.list) + raise gen.Return(listing) def list(self): """ Retrieves the status for each service. @@ -243,7 +248,8 @@ def start_async(self, name, enable=None, wants=None, properties=None): wants: services required by this service properties: properties to set for the service """ - self.start(name, enable=enable, wants=wants, properties=properties) + yield self.thread_pool.submit(self.start, name, enable=enable, + wants=wants, properties=properties) def start(self, name, enable=None, wants=None, properties=None): """ Start the given service. @@ -263,7 +269,7 @@ def stop_async(self, name): Args: name: A str representing the name of the service(s) to stop. """ - self.stop(name) + yield self.thread_pool.submit(self.stop, name) def stop(self, name): """ Stop the given service(s). @@ -280,7 +286,7 @@ def restart_async(self, name): Args: name: A str representing the name of the service(s) to restart. """ - self.restart(name) + yield self.thread_pool.submit(self.restart, name) def restart(self, name): """ Restart the given service(s). From 8d766b3735189d42ebf77ca11751fb5963ee2c41 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 13 Sep 2019 18:00:54 -0700 Subject: [PATCH 116/221] Collapse mutations to the same key This only keeps the final mutation for a given key. It fixes a bug where orphaned index entries could be written. --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index e3157e5fa6..cbef9502e5 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -83,17 +83,18 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) - writes = [(VersionEntry.from_key(entity.key()), - VersionEntry.from_key(entity.key())) - for entity in put_request.entity_list()] + writes = {entity.key().Encode(): (VersionEntry.from_key(entity.key()), + VersionEntry.from_key(entity.key())) + for entity in put_request.entity_list()} else: - futures = [] - for entity in put_request.entity_list(): - futures.append(self._upsert(tr, entity)) - - writes = yield futures - - old_entries = [old_entry for old_entry, _ in writes if old_entry.present] + # Eliminate multiple puts to the same key. + puts_by_key = {entity.key().Encode(): entity + for entity in put_request.entity_list()} + writes = yield {key: self._upsert(tr, entity) + for key, entity in six.iteritems(puts_by_key)} + + old_entries = [old_entry for old_entry, _ in six.itervalues(writes) + if old_entry.present] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() @@ -114,10 +115,11 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - for _, new_entry in writes: - put_response.add_key().CopyFrom(new_entry.key) - if new_entry.version != ABSENT_VERSION: - put_response.add_version(new_entry.version) + for entity in put_request.entity_list(): + write_entry = writes[entity.key().Encode()][1] + put_response.add_key().CopyFrom(entity.key()) + if write_entry.version != ABSENT_VERSION: + put_response.add_version(write_entry.version) #logger.debug('put_response:\n{}'.format(put_response)) @@ -173,11 +175,10 @@ def dynamic_delete(self, project_id, delete_request, retries=5): deletes = [(VersionEntry.from_key(key), None) for key in delete_request.key_list()] else: - futures = [] - for key in delete_request.key_list(): - futures.append(self._delete(tr, key)) - - deletes = yield futures + # Eliminate multiple deletes to the same key. + deletes_by_key = {key.Encode(): key for key in delete_request.key_list()} + deletes = yield [self._delete(tr, key) + for key in six.itervalues(deletes_by_key)] old_entries = [old_entry for old_entry, _ in deletes if old_entry.present] versionstamp_future = None From 7912a068fa144185446474caae4ca593cf59dbcf Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Mon, 16 Sep 2019 17:16:10 +0300 Subject: [PATCH 117/221] Update version verification - verify version only if tag is specified. - use `git checkout "tags/${GIT_TAG}"`. --- bootstrap.sh | 20 ++++++++++---------- upgrade.sh | 24 +++++++++--------------- 2 files changed, 19 insertions(+), 25 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 89994df311..0f10eb36c5 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -152,8 +152,8 @@ if [ "${RELY_ON_TAG}" = "Y" ]; then echo "Will be using the following github repos:" echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" - if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi - if version_ge ${VERSION} 3.8.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi + if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi + if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" else echo "Will be using the following github repos:" @@ -213,10 +213,10 @@ done if [ "${RELY_ON_TAG}" = "Y" ]; then - APPSCALE_TARGET="${GIT_TAG}" - TOOLS_TARGET="${GIT_TAG}" - AGENTS_TARGET="${GIT_TAG}" - THIRDPARTIES_TARGET="${GIT_TAG}" + APPSCALE_TARGET="tags/${GIT_TAG}" + TOOLS_TARGET="tags/${GIT_TAG}" + AGENTS_TARGET="tags/${GIT_TAG}" + THIRDPARTIES_TARGET="tags/${GIT_TAG}" else APPSCALE_TARGET="${APPSCALE_BRANCH}" TOOLS_TARGET="${APPSCALE_TOOLS_BRANCH}" @@ -235,11 +235,11 @@ VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") git clone ${APPSCALE_TOOLS_REPO} appscale-tools (cd appscale-tools; git checkout "${TOOLS_TARGET}") -if version_ge "${VERSION}" 3.7.0; then +if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 3.8.0; then git clone ${AGENTS_REPO} appscale-agents (cd appscale-agents; git checkout "${AGENTS_TARGET}") fi -if version_ge "${VERSION}" 3.8.0; then +if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 4.0.0; then git clone ${THIRDPARTIES_REPO} appscale-thirdparties (cd appscale-thirdparties; git checkout "${THIRDPARTIES_TARGET}") fi @@ -251,7 +251,7 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge "${VERSION}" 3.7.0; then +if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 3.8.0; then echo -n "Installing AppScale Agents..." if ! (cd appscale-agents/; make install-no-venv) ; then echo "Failed to install AppScale Agents" @@ -265,7 +265,7 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge "${VERSION}" 3.8.0; then +if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 4.0.0; then echo -n "Installing Thirdparty software..." if ! (cd appscale-thirdparties/; bash install_all.sh) ; then echo "Failed to install Thirdparties software" diff --git a/upgrade.sh b/upgrade.sh index 136d3fd73a..0d620ae66a 100755 --- a/upgrade.sh +++ b/upgrade.sh @@ -65,12 +65,6 @@ if [ "$GIT_TAG" = "last" ]; then GIT_TAG=$(curl --fail https://api.github.com/repos/appscale/appscale/releases/latest \ | python -m json.tool | grep '"tag_name"' \ | awk -F ':' '{ print $2 }' | tr --delete ' ,"') -elif ! curl --fail https://api.github.com/repos/appscale/appscale/tags \ - | python -m json.tool | grep '"name"' | awk -F ':' '{ print $2 }' \ - | tr --delete ' ,"' | grep "^${GIT_TAG}$"; then - echo "Tag '${GIT_TAG}' not recognized" - echo "Use --tag to specify existing appscale repo tag to upgrade to." - exit 1 fi VERSION="${GIT_TAG}" @@ -78,8 +72,8 @@ echo echo "Will be using the following github repos:" echo "AppScale: ${APPSCALE_REPO} - Tag ${GIT_TAG}" echo "AppScale-Tools: ${APPSCALE_TOOLS_REPO} - Tag ${GIT_TAG}" -if version_ge ${VERSION} 3.7.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi -if version_ge ${VERSION} 3.8.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi +if version_ge ${VERSION} 3.8.0; then echo "Cloud-Agents: ${AGENTS_REPO} - Tag ${GIT_TAG}"; fi +if version_ge ${VERSION} 4.0.0; then echo "Thirdparties: ${THIRDPARTIES_REPO} - Tag ${GIT_TAG}"; fi echo "Exit now (ctrl-c) if this is incorrect" echo sleep 5 @@ -171,8 +165,8 @@ declare -A REPOS=( ["appscale"]="${APPSCALE_REPO}" ["appscale-tools"]="${APPSCALE_TOOLS_REPO}" ) -if version_ge "${VERSION}" 3.7.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi -if version_ge "${VERSION}" 3.8.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi +if version_ge "${VERSION}" 3.8.0; then REPOS+=(["appscale-agents"]="${AGENTS_REPO}"); fi +if version_ge "${VERSION}" 4.0.0; then REPOS+=(["appscale-thirdparties"]="${THIRDPARTIES_REPO}"); fi # At this time we expect to be installed in $HOME. cd $HOME @@ -192,18 +186,18 @@ for repo_name in "${!REPOS[@]}"; do git fetch ${remote} -t current_branch="$(git branch --no-color | grep '^*' | cut -f 2 -d ' ')" echo "Checking out /root/${repo_name} from '${current_branch}' to '${GIT_TAG}'" - if ! git checkout "${GIT_TAG}"; then + if ! git checkout "tags/${GIT_TAG}"; then echo "Please stash your local unsaved changes at "\ "/root/${repo_name} and checkout the version of AppScale "\ "you are currently using to fix this error." - echo "e.g.: git stash; git checkout ${GIT_TAG}" + echo "e.g.: git stash; git checkout tags/${GIT_TAG}" exit 1 fi # ... cd $HOME else git clone "${repo}" ${repo_name} - (cd ${repo_name}; git checkout "${GIT_TAG}") + (cd ${repo_name}; git checkout "tags/${GIT_TAG}") fi done @@ -214,7 +208,7 @@ if ! (cd appscale/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 3.7.0; then +if version_ge ${VERSION} 3.8.0; then echo -n "Installing AppScale Agents..." if ! (cd appscale-agents/; make install-no-venv) ; then echo "Failed to upgrade AppScale Agents" @@ -228,7 +222,7 @@ if ! (cd appscale-tools/debian; bash appscale_build.sh) ; then exit 1 fi -if version_ge ${VERSION} 3.8.0; then +if version_ge ${VERSION} 4.0.0; then echo -n "Downloading Thirdparty artifacts..." if ! (cd appscale-thirdparties/; bash install_all.sh) ; then echo "Failed to upgrade Thirdparties software" From 976731f936a6f7aeb26d268ff972ebae815be16a Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 16 Sep 2019 13:14:43 -0700 Subject: [PATCH 118/221] Deployment open port check should not use login ip (admin_server) --- AdminServer/appscale/admin/admin_server.py | 38 ++++++++++++---------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/AdminServer/appscale/admin/admin_server.py b/AdminServer/appscale/admin/admin_server.py index a4b1649717..33ffd1434e 100644 --- a/AdminServer/appscale/admin/admin_server.py +++ b/AdminServer/appscale/admin/admin_server.py @@ -101,30 +101,32 @@ def wait_for_port_to_open(http_port, operation_id, timeout): raise OperationTimeout('Operation no longer in cache') deadline = monotonic.monotonic() + timeout + all_lbs = set(appscale_info.get_load_balancer_ips()) + passed_lbs = set() while True: - if monotonic.monotonic() > deadline: - message = 'Deploy operation took too long.' - operation.set_error(message) - raise OperationTimeout(message) - - if utils.port_is_open(options.login_ip, http_port): - break + for load_balancer in all_lbs: + if load_balancer in passed_lbs or monotonic.monotonic() > deadline: + continue - yield gen.sleep(1) + if utils.port_is_open(load_balancer, http_port): + passed_lbs.add(load_balancer) - for load_balancer in appscale_info.get_load_balancer_ips(): - while True: - if monotonic.monotonic() > deadline: - # The version is reachable from the login IP, but it's not reachable - # from every registered load balancer. It makes more sense to mark the - # operation as a success than a failure because the lagging load - # balancers should eventually reflect the registered instances. - break + if len(passed_lbs) == len(all_lbs): + break - if utils.port_is_open(load_balancer, http_port): + if monotonic.monotonic() > deadline: + # If the version is reachable, but it's not reachable from every + # registered load balancer. It makes more sense to mark the + # operation as a success than a failure because the lagging load + # balancers should eventually reflect the registered instances. + if not passed_lbs: + message = 'Deploy operation took too long.' + operation.set_error(message) + raise OperationTimeout(message) + else: break - yield gen.sleep(1) + yield gen.sleep(1) @gen.coroutine From 02927f3fce1b96fae6a959835040d489c7d2d7c7 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 16 Sep 2019 17:59:30 -0700 Subject: [PATCH 119/221] Replace init file This was mistakenly removed during a merge commit. --- AdminServer/appscale/admin/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 AdminServer/appscale/admin/__init__.py diff --git a/AdminServer/appscale/admin/__init__.py b/AdminServer/appscale/admin/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 8cad5d3ea67c00f23b54fc4f2ffd13964a298ad8 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 17 Sep 2019 12:28:07 +0300 Subject: [PATCH 120/221] Add missing ')' --- bootstrap.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 0f10eb36c5..f32ed0fead 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -34,11 +34,11 @@ usage() { echo "Options:" echo " --repo Specify appscale repo (default $APPSCALE_REPO)" echo " --branch Specify appscale branch (default $APPSCALE_BRANCH)" - echo " --tools-repo Specify appscale-tools repo (default $APPSCALE_TOOLS_REPO" + echo " --tools-repo Specify appscale-tools repo (default $APPSCALE_TOOLS_REPO)" echo " --tools-branch Specify appscale-tools branch (default $APPSCALE_TOOLS_BRANCH)" - echo " --agents-repo Specify appscale-agents repo (default $AGENTS_REPO" + echo " --agents-repo Specify appscale-agents repo (default $AGENTS_REPO)" echo " --agents-branch Specify appscale-agents branch (default $AGENTS_BRANCH)" - echo " --thirdparties-repo Specify appscale-thirdparties repo (default $THIRDPARTIES_REPO" + echo " --thirdparties-repo Specify appscale-thirdparties repo (default $THIRDPARTIES_REPO)" echo " --thirdparties-branch Specify appscale-thirdparties branch (default $THIRDPARTIES_BRANCH)" echo " --tag Use git tag (ie 3.7.2) or 'last' to use the latest release" echo " or 'dev' for HEAD (default ${GIT_TAG})" From c765de6be1be78f3a18a09ae23f3c7d5dc92293d Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 17 Sep 2019 13:01:34 +0300 Subject: [PATCH 121/221] Use ... --- upgrade.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/upgrade.sh b/upgrade.sh index 0d620ae66a..79f7810380 100755 --- a/upgrade.sh +++ b/upgrade.sh @@ -52,9 +52,7 @@ while [ $# -gt 0 ]; do UNIT_TEST="Y" shift; continue fi - echo - echo "Parameter '$1' is not recognized" - echo + echo -e "\nParameter '$1' is not recognized\n" usage done From d90c6b95fbaa8231e7bf3cd0e7ff96635b617ae1 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 17 Sep 2019 14:45:28 -0700 Subject: [PATCH 122/221] Add test for batch put side effects This makes sure that only the final mutation's index entries are written during a batch put. --- AppDB/test/e2e/test_queries.py | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/AppDB/test/e2e/test_queries.py b/AppDB/test/e2e/test_queries.py index 474c1923d1..473f9a9f44 100644 --- a/AppDB/test/e2e/test_queries.py +++ b/AppDB/test/e2e/test_queries.py @@ -84,6 +84,46 @@ def test_separator_in_kind(self): raise Exception('Expected BadRequest. No error was thrown.') +class TestBatchPutEffects(AsyncTestCase): + def setUp(self): + super(TestBatchPutEffects, self).setUp() + locations = os.environ['DATASTORE_LOCATIONS'].split() + self.datastore = Datastore(locations, PROJECT_ID) + + def tearDown(self): + self.tear_down_helper() + super(TestBatchPutEffects, self).tearDown() + + @gen_test + def tear_down_helper(self): + query = Query('Greeting', _app=PROJECT_ID) + results = yield self.datastore.run_query(query) + yield self.datastore.delete([entity.key() for entity in results]) + + @gen_test + def test_batch_put_index_entries(self): + entities = [] + + entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID) + entity['content'] = 'first entry' + entities.append(entity) + + entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID) + entity['content'] = 'second entry' + entities.append(entity) + + yield self.datastore.put_multi(entities) + + # Ensure the last specified mutation is the one that matters. + query = Query('Greeting', projection=['content'], _app=PROJECT_ID) + response = yield self.datastore.run_query(query) + print('response: {}'.format(response)) + self.assertEqual(len(response), 1) + + entity = response[0] + self.assertEqual(entity['content'], 'second entry') + + class TestQueryLimit(AsyncTestCase): CASSANDRA_PAGE_SIZE = 5000 BATCH_SIZE = 20 From 6143bf58345db69f1fcd30a36dd1e111cc620128 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 18 Sep 2019 16:52:33 +0300 Subject: [PATCH 123/221] Allow up to 100 signs in pull queue name --- AppTaskQueue/appscale/taskqueue/queue.py | 110 ++++++++++++++++------- 1 file changed, 77 insertions(+), 33 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 5b11683fdf..4d62d04757 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -6,6 +6,8 @@ import time import uuid +import psycopg2 + from appscale.common import appscale_info from appscale.common import retrying from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER @@ -242,6 +244,7 @@ def __ne__(self, other): """ return not self.__eq__(other) + class PushQueue(Queue): # The default rate for push queues. DEFAULT_RATE = '5/s' @@ -307,15 +310,13 @@ def __repr__(self): def is_connection_error(err): """ This function is used as retry criteria. - It also makes possible lazy load of psycopg2 package. Args: err: an instance of Exception. Returns: True if error is related to connection, False otherwise. """ - from psycopg2 import InterfaceError - return isinstance(err, InterfaceError) + return isinstance(err, psycopg2.InterfaceError) class PostgresPullQueue(Queue): @@ -339,43 +340,87 @@ def __init__(self, queue_info, app, pg_connection_wrapper): app: A string containing the application ID. pg_connection_wrapper: A psycopg2 connection wrapper. """ - from psycopg2 import IntegrityError # Import psycopg2 lazily super(PostgresPullQueue, self).__init__(queue_info, app) self.connection_key = self.app self.pg_connection_wrapper = pg_connection_wrapper + self.queue_id = self.ensure_queue_registered() + self.ensure_tasks_table_created() - # When multiple TQ servers are notified by ZK about new queue - # they sometimes get IntegrityError despite 'IF NOT EXISTS' - @retrying.retry(max_retries=5, retry_on_exception=IntegrityError) - def ensure_tables_created(): - pg_connection = self.pg_connection_wrapper.get_connection() - with pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'CREATE TABLE IF NOT EXISTS "{table_name}" (' - ' task_name varchar(500) NOT NULL,' - ' time_deleted timestamp DEFAULT NULL,' - ' time_enqueued timestamp NOT NULL,' - ' lease_count integer NOT NULL,' - ' lease_expires timestamp NOT NULL,' - ' payload bytea,' - ' tag varchar(500),' - ' PRIMARY KEY (task_name)' - ');' - 'CREATE INDEX IF NOT EXISTS "{table_name}-eta-retry-tag-index" ' - ' ON "{table_name}" USING BTREE (lease_expires, lease_count, tag) ' - ' WHERE time_deleted IS NULL;' - 'CREATE INDEX IF NOT EXISTS "{table_name}-retry-eta-tag-index" ' - ' ON "{table_name}" (lease_count, lease_expires, tag) ' - ' WHERE time_deleted IS NULL;' - .format(table_name=self.tasks_table_name) - ) + # When multiple TQ servers are notified by ZK about new queue + # they sometimes get IntegrityError despite 'IF NOT EXISTS' + @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) + def ensure_queue_registered(self): + pg_connection = self.pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{}" table is created' + .format(self.queues_table_name)) + pg_cursor.execute( + 'CREATE TABLE IF NOT EXISTS "{queues_table}" (' + ' id SERIAL,' + ' queue_name varchar(100) NOT NULL UNIQUE' + ');' + .format(queues_table=self.queues_table_name) + ) + pg_cursor.execute( + 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' + .format(queues_table=self.queues_table_name), + vars={'queue_name': self.name} + ) + row = pg_cursor.fetchone() + if row: + return row[0] + + logger.info('Registering queue "{}" in "{}" table' + .format(self.name, self.queues_table_name)) + pg_cursor.execute( + 'INSERT INTO "{queues_table}" (queue_name) ' + 'VALUES (%(queue_name)s) ON CONFLICT DO NOTHING;' + 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' + .format(queues_table=self.queues_table_name), + vars={'queue_name': self.name} + ) + row = pg_cursor.fetchone() + logger.info('Queue "{}" was registered with ID "{}"' + .format(self.name, row[0])) + return row[0] + + # When multiple TQ servers are notified by ZK about new queue + # they sometimes get IntegrityError despite 'IF NOT EXISTS' + @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) + def ensure_tasks_table_created(self): + pg_connection = self.pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{}" table is created' + .format(self.tasks_table_name)) + pg_cursor.execute( + 'CREATE TABLE IF NOT EXISTS "{table_name}" (' + ' task_name varchar(500) NOT NULL,' + ' time_deleted timestamp DEFAULT NULL,' + ' time_enqueued timestamp NOT NULL,' + ' lease_count integer NOT NULL,' + ' lease_expires timestamp NOT NULL,' + ' payload bytea,' + ' tag varchar(500),' + ' PRIMARY KEY (task_name)' + ');' + 'CREATE INDEX IF NOT EXISTS "{table_name}_eta_retry_tag_index" ' + ' ON "{table_name}" USING BTREE (lease_expires, lease_count, tag) ' + ' WHERE time_deleted IS NULL;' + 'CREATE INDEX IF NOT EXISTS "{table_name}_retry_eta_tag_index" ' + ' ON "{table_name}" (lease_count, lease_expires, tag) ' + ' WHERE time_deleted IS NULL;' + .format(table_name=self.tasks_table_name) + ) - ensure_tables_created() + @property + def queues_table_name(self): + return 'appscale_queues_{}'.format(self.app) @property def tasks_table_name(self): - return 'pullqueue-{}'.format(self.name) + return 'appscale_tasks_{}_{}'.format(self.app, self.queue_id) @retry_pg_connection def add_task(self, task): @@ -387,7 +432,6 @@ def add_task(self, task): InvalidTaskInfo if the task ID already exists in the queue or it doesn't have payloadBase64 attribute. """ - import psycopg2 # Import psycopg2 lazily if not hasattr(task, 'payloadBase64'): raise InvalidTaskInfo('{} is missing a payload.'.format(task)) From 683bf2bf17c771736decac6db2912bc39a11b04f Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 18 Sep 2019 09:44:04 -0700 Subject: [PATCH 124/221] Infrastructure manager agents cleanup --- InfrastructureManager/agents/__init__.py | 2 - InfrastructureManager/agents/factory.py | 53 ------ .../agents/openstack_agent.py | 151 ------------------ .../tests/test_agent_factory.py | 25 --- 4 files changed, 231 deletions(-) delete mode 100644 InfrastructureManager/agents/__init__.py delete mode 100644 InfrastructureManager/agents/factory.py delete mode 100644 InfrastructureManager/agents/openstack_agent.py delete mode 100644 InfrastructureManager/tests/test_agent_factory.py diff --git a/InfrastructureManager/agents/__init__.py b/InfrastructureManager/agents/__init__.py deleted file mode 100644 index 1cb2840642..0000000000 --- a/InfrastructureManager/agents/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -__author__ = 'hiranya' -__email__ = 'hiranya@appscale.com' \ No newline at end of file diff --git a/InfrastructureManager/agents/factory.py b/InfrastructureManager/agents/factory.py deleted file mode 100644 index ebfbc57cf8..0000000000 --- a/InfrastructureManager/agents/factory.py +++ /dev/null @@ -1,53 +0,0 @@ -import logging -import struct - -from appscale.agents.ec2_agent import EC2Agent -from appscale.agents.euca_agent import EucalyptusAgent -from appscale.agents.gce_agent import GCEAgent -from appscale.agents.openstack_agent import OpenStackAgent - -logger = logging.getLogger(__name__) - -try: - from appscale.agents.azure_agent import AzureAgent -except (ImportError, struct.error): - logger.exception('AzureAgent disabled') - AzureAgent = None - -__author__ = 'hiranya' -__email__ = 'hiranya@appscale.com' - -class InfrastructureAgentFactory: - """ - Factory implementation which can be used to instantiate concrete infrastructure - agents. - """ - - agents = { - 'ec2': EC2Agent, - 'euca': EucalyptusAgent, - 'gce': GCEAgent, - 'openstack': OpenStackAgent - } - if AzureAgent is not None: - agents['azure'] = AzureAgent - - def create_agent(self, infrastructure): - """ - Instantiate a new infrastructure agent. - - Args: - infrastructure A string indicating the type of infrastructure - agent to be initialized. - - Returns: - An infrastructure agent instance that implements the BaseAgent API - - Raises: - NameError If the given input string does not map to any known - agent type. - """ - if self.agents.has_key(infrastructure): - return self.agents[infrastructure]() - else: - raise NameError('Unrecognized infrastructure: ' + infrastructure) diff --git a/InfrastructureManager/agents/openstack_agent.py b/InfrastructureManager/agents/openstack_agent.py deleted file mode 100644 index 4ac9dad660..0000000000 --- a/InfrastructureManager/agents/openstack_agent.py +++ /dev/null @@ -1,151 +0,0 @@ -""" Agent class for OpenStack. """ -from agents.ec2_agent import EC2Agent - -from boto.exception import EC2ResponseError -import boto -import os -from urlparse import urlparse -from utils import utils - -from appscale.common.constants import KEY_DIRECTORY - -__author__ = 'dario nascimento' -__email__ = 'dario.nascimento@tecnico.ulisboa.pt' - -class OpenStackAgent(EC2Agent): - """ - OpenStack infrastructure agent which can be used to spawn and terminate - VMs in an OpenStack based environment. - """ - - # The version of OpenStack API used to interact with Boto - # OpenStack_API_VERSION = 'ICE-HOUSE-2014.1' - - # The default region. - DEFAULT_REGION = "nova" - - def configure_instance_security(self, parameters): - """ - Setup OpenStack security keys and groups. Required input values are - read from the parameters dictionary. More specifically, this method - expects tofind a 'keyname' parameter and a 'group' parameter in the - parameters dictionary. Using these provided values, this method will - create a new OpenStack key-pair and a security group. Security group - will be granted permissions to access any port on the instantiated - VMs. (Also see documentation for the BaseAgent class). - - This method differs from its OpenStack counterpart because in OpenStack - the security group definition for icmp must include the port range. - - Args: - parameters: A dictionary of parameters. - Returns: - False if the SSH keys already exist, True if successful. - """ - keyname = parameters[self.PARAM_KEYNAME] - group = parameters[self.PARAM_GROUP] - - key_path = '{}/{}.key'.format(KEY_DIRECTORY, keyname) - ssh_key = os.path.abspath(key_path) - utils.log('About to spawn OpenStack instances - ' \ - 'Expecting to find a key at {0}'.format(ssh_key)) - if os.path.exists(ssh_key): - utils.log('SSH keys found in the local system - ' - 'Not initializing OpenStack security') - return False - - try: - conn = self.open_connection(parameters) - key_pair = conn.get_key_pair(keyname) - if key_pair is None: - utils.log('Creating key pair: {0}'.format(keyname)) - key_pair = conn.create_key_pair(keyname) - utils.write_key_file(ssh_key, key_pair.material) - - security_groups = conn.get_all_security_groups() - group_exists = False - for security_group in security_groups: - if security_group.name == group: - group_exists = True - break - - if not group_exists: - utils.log('Creating security group: {0}'.format(group)) - conn.create_security_group(group, 'AppScale security group') - conn.authorize_security_group(group, from_port=1,\ - to_port=65535, ip_protocol='udp') - conn.authorize_security_group(group, from_port=1,\ - to_port=65535, ip_protocol='tcp') - #TODO: Check if ec2_agent can be change to include the from_port - # and the to_port. If yes, remove this method. - conn.authorize_security_group(group, from_port=-1, to_port=-1, \ - ip_protocol='icmp', cidr_ip='0.0.0.0/0') - return True - - except EC2ResponseError as exception: - self.handle_failure('OpenStack response error while initializing ' - 'security: {0}'.format(exception.error_message)) - except Exception as exception: - self.handle_failure('Error while initializing OpenStack ' - 'security: {0}'.format(exception.message)) - - def run_instances(self, count, parameters, security_configured): - """ - Spawns the specified number of OpenStack instances using the parameters - provided. This method is blocking in that it waits until the - requested VMs are properly booted up. However if the requested - VMs cannot be procured within 1800 seconds, this method will treat - it as an error and return. (Also see documentation for the BaseAgent - class). - - This method differs from its OpenStack counterpart because OpenStack - does not support spot instances. - - Args: - count: Number of VMs to spawn. - parameters: A dictionary of parameters. This must contain 'keyname', - 'group', 'image_id' and 'instance_type' parameters. - security_configured: Uses this boolean value as an heuristic to - detect brand new AppScale deployments. - - Returns: - A tuple of the form (instances, public_ips, private_ips). - """ - if parameters[self.PARAM_SPOT] == "True": - parameters[self.PARAM_SPOT] = 'False' - utils.log("OpenStack does not support spot instances") - - super.run_instances(self, count, parameters, security_configured) - - def open_connection(self, parameters): - """ - Initialize a connection to the back-end OpenStack APIs. - The expected url is http://192.168.2.12:8773/services/Cloud - - Args: - parameters: A dictionary containing the 'credentials' parameter. - - Returns: - An instance of Boto EC2Connection. - """ - credentials = parameters[self.PARAM_CREDENTIALS] - region_str = self.DEFAULT_REGION - access_key = str(credentials['EC2_ACCESS_KEY']) - secret_key = str(credentials['EC2_SECRET_KEY']) - ec2_url = str(credentials['EC2_URL']) - result = urlparse(ec2_url) - - if result.port is None or result.hostname is None or result.path is None: - self.handle_failure('Unknown scheme in Openstack_URL: {0}' - ' : expected like http://:8773/services/Cloud'.\ - format(result.scheme)) - return None - - region = boto.ec2.regioninfo.RegionInfo(name=region_str,\ - endpoint=result.hostname) - return boto.connect_ec2(aws_access_key_id=access_key, - aws_secret_access_key=secret_key, - is_secure=(result.scheme == 'https'), - region=region, - port=result.port, - path=result.path, debug=2) diff --git a/InfrastructureManager/tests/test_agent_factory.py b/InfrastructureManager/tests/test_agent_factory.py deleted file mode 100644 index dc688741ab..0000000000 --- a/InfrastructureManager/tests/test_agent_factory.py +++ /dev/null @@ -1,25 +0,0 @@ -from appscale.agents.ec2_agent import EC2Agent -from appscale.agents.euca_agent import EucalyptusAgent -from agents.factory import InfrastructureAgentFactory -try: - from unittest import TestCase -except ImportError: - from unittest.case import TestCase - -class TestAgentFactory(TestCase): - - def test_create_agent(self): - factory = InfrastructureAgentFactory() - agent = factory.create_agent('ec2') - self.assertEquals(type(agent), type(EC2Agent())) - - agent = factory.create_agent('euca') - self.assertEquals(type(agent), type(EucalyptusAgent())) - - try: - factory.create_agent('bogus') - self.fail('No exception thrown for invalid infrastructure') - except NameError: - pass - except Exception: - self.fail('Unexpected exception thrown for invalid infrastructure') From 1996096897db018a7edee6b6be1e3524b47a2f64 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 18 Sep 2019 18:29:45 -0700 Subject: [PATCH 125/221] Account for versionstamp index size for key length --- AppDB/appscale/datastore/fdb/indexes.py | 9 ++--- .../datastore/fdb/stats/containers.py | 34 +++++++++++++------ AppDB/appscale/datastore/fdb/utils.py | 3 ++ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 5e60294a6b..830430cc45 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -1193,6 +1193,7 @@ def get_iterator(self, tr, query, read_versionstamp=None): @gen.coroutine def _get_index_keys(self, tr, entity, commit_versionstamp=None): + has_index = commit_versionstamp is None project_id = decode_str(entity.key().app()) namespace = decode_str(entity.key().name_space()) path = Path.flatten(entity.key().path()) @@ -1206,8 +1207,8 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): kindless_key = kindless_index.encode_key(path, commit_versionstamp) kind_key = kind_index.encode_key(path, commit_versionstamp) - stats.add_kindless_key(kindless_key) - stats.add_kind_key(kind_key) + stats.add_kindless_key(kindless_key, has_index) + stats.add_kind_key(kind_key, has_index) all_keys = [kindless_key, kind_key] entity_prop_names = [] for prop in entity.property_list(): @@ -1216,7 +1217,7 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): index = yield self._single_prop_index( tr, project_id, namespace, kind, prop_name) prop_key = index.encode_key(prop.value(), path, commit_versionstamp) - stats.add_prop_key(prop, prop_key) + stats.add_prop_key(prop, prop_key, has_index) all_keys.append(prop_key) scatter_val = get_scatter_val(path) @@ -1233,7 +1234,7 @@ def _get_index_keys(self, tr, entity, commit_versionstamp=None): composite_keys = index.encode_keys(entity.property_list(), path, commit_versionstamp) - stats.add_composite_keys(index.id, composite_keys) + stats.add_composite_keys(index.id, composite_keys, has_index) all_keys.extend(composite_keys) raise gen.Return((all_keys, stats)) diff --git a/AppDB/appscale/datastore/fdb/stats/containers.py b/AppDB/appscale/datastore/fdb/stats/containers.py index fe492fffb1..cb2a96d19c 100644 --- a/AppDB/appscale/datastore/fdb/stats/containers.py +++ b/AppDB/appscale/datastore/fdb/stats/containers.py @@ -14,7 +14,8 @@ from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER from appscale.datastore.fdb.codecs import decode_str -from appscale.datastore.fdb.utils import decode_delta, encode_delta +from appscale.datastore.fdb.utils import ( + decode_delta, encode_delta, VERSIONSTAMP_INDEX_SIZE) sys.path.append(APPSCALE_PYTHON_APPSERVER) from google.appengine.datastore.entity_pb import Property as Meaning @@ -190,20 +191,31 @@ def __repr__(self): for prop_name, prop_types in six.iteritems(self.single_prop)}, dict(self.composite)) - def add_kindless_key(self, key): - self.kindless += CountBytes(1, len(key)) + def add_kindless_key(self, key, has_index): + self.kindless += CountBytes(1, self._stored_length(key, has_index)) - def add_kind_key(self, key): - self.kind += CountBytes(1, len(key)) + def add_kind_key(self, key, has_index): + self.kind += CountBytes(1, self._stored_length(key, has_index)) - def add_prop_key(self, prop_pb, key): + def add_prop_key(self, prop_pb, key, has_index): prop_type = stats_prop_type(prop_pb) prop_name = decode_str(prop_pb.name()) - self.single_prop[prop_type][prop_name] += CountBytes(1, len(key)) - - def add_composite_keys(self, index_id, keys): - self.composite[index_id] += CountBytes(len(keys), - sum(len(key) for key in keys)) + self.single_prop[prop_type][prop_name] += CountBytes( + 1, self._stored_length(key, has_index)) + + def add_composite_keys(self, index_id, keys, has_index): + self.composite[index_id] += CountBytes( + len(keys), + sum(self._stored_length(key, has_index) for key in keys)) + + def _stored_length(self, key, has_index): + """ + Removes the versionstamp index suffix from the key length calculation. + """ + if has_index: + return len(key) - VERSIONSTAMP_INDEX_SIZE + else: + return len(key) def __sub__(self, other): self.kindless -= other.kindless diff --git a/AppDB/appscale/datastore/fdb/utils.py b/AppDB/appscale/datastore/fdb/utils.py index 847a408551..64d59ddb61 100644 --- a/AppDB/appscale/datastore/fdb/utils.py +++ b/AppDB/appscale/datastore/fdb/utils.py @@ -44,6 +44,9 @@ # The number of bytes used to store a commit versionstamp. VERSIONSTAMP_SIZE = 10 +# The number of bytes used to indicate the position of a commit versionstamp. +VERSIONSTAMP_INDEX_SIZE = 4 + MAX_ENTITY_SIZE = 1048572 # The FDB directory used for the datastore. From 562e636c3ef36fe0efa7db01fc5c258440296335 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 18 Sep 2019 18:30:48 -0700 Subject: [PATCH 126/221] Respect transaction time limit for stat entities --- AppDB/appscale/datastore/fdb/stats/buffer.py | 28 ++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/stats/buffer.py b/AppDB/appscale/datastore/fdb/stats/buffer.py index b5bb684277..547537388d 100644 --- a/AppDB/appscale/datastore/fdb/stats/buffer.py +++ b/AppDB/appscale/datastore/fdb/stats/buffer.py @@ -1,5 +1,6 @@ import datetime import logging +import monotonic import random import time from collections import defaultdict @@ -12,7 +13,8 @@ from appscale.datastore.fdb.polling_lock import PollingLock from appscale.datastore.fdb.stats.containers import ProjectStats from appscale.datastore.fdb.stats.entities import fill_entities -from appscale.datastore.fdb.utils import fdb, ResultIterator +from appscale.datastore.fdb.utils import ( + fdb, MAX_FDB_TX_DURATION, ResultIterator) logger = logging.getLogger(__name__) @@ -63,6 +65,8 @@ def directory_path(cls, project_id): class StatsBuffer(object): AVG_FLUSH_INTERVAL = 30 + BATCH_SIZE = 20 + SUMMARY_INTERVAL = 120 _LOCK_KEY = u'stats-lock' @@ -133,15 +137,18 @@ def _periodic_summary(self): try: yield self._summary_lock.acquire() tr = self._db.create_transaction() + deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 last_summarized = {} # TODO: This can be made async. project_ids = self._directory_cache.root_dir.list(tr) + summarized_projects = [] for project_id in project_ids: stats_dir = yield self._project_stats_dir(tr, project_id) last_vs_key = stats_dir.encode_last_versionstamp()[0] - last_versionstamp = yield self._tornado_fdb.get(tr, last_vs_key) + last_versionstamp = yield self._tornado_fdb.get( + tr, last_vs_key, snapshot=True) if (not last_versionstamp.present() or last_versionstamp.value == self._last_summarized.get(project_id)): continue @@ -152,11 +159,22 @@ def _periodic_summary(self): snapshot=True).list() project_stats, last_timestamp = stats_dir.decode(results) entities = fill_entities(project_id, project_stats, last_timestamp) - yield [self._ds_access._upsert(tr, entity) for entity in entities] + for pos in range(0, len(entities), self.BATCH_SIZE): + yield [self._ds_access._upsert(tr, entity) + for entity in entities[pos:pos + self.BATCH_SIZE]] + if monotonic.monotonic() > deadline: + yield self._tornado_fdb.commit(tr) + tr = self._db.create_transaction() + deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 + + summarized_projects.append(project_id) yield self._tornado_fdb.commit(tr) - self._last_summarized = last_summarized - logger.debug(u'Finished summarizing stats') + self._last_summarized.update(last_summarized) + if summarized_projects: + logger.debug(u'Finished summarizing stats for ' + u'{}'.format(summarized_projects)) + yield gen.sleep(self.SUMMARY_INTERVAL) except Exception: logger.exception(u'Unexpected error while summarizing stats') From ae9ac153bcc1e9c496b1cb14aabd6260b4ccdbd6 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 18 Sep 2019 18:32:25 -0700 Subject: [PATCH 127/221] Fix stat entity field values --- .../appscale/datastore/fdb/stats/entities.py | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/stats/entities.py b/AppDB/appscale/datastore/fdb/stats/entities.py index 77a726bfc2..e4250108c8 100644 --- a/AppDB/appscale/datastore/fdb/stats/entities.py +++ b/AppDB/appscale/datastore/fdb/stats/entities.py @@ -90,12 +90,12 @@ def fill_entities(project_id, project_stats, timestamp): namespace=namespace)) stats_kind = u'__Stat_Kind_CompositeIndex__' - composite_stats_by_kind = defaultdict(CountBytes) + composite_stats_by_index = defaultdict(CountBytes) for namespace, by_index in six.iteritems(composite_stats): for key, fields in six.iteritems(by_index): - composite_stats_by_kind[key] += fields + composite_stats_by_index[key] += fields - for (index_id, kind), fields in six.iteritems(composite_stats_by_kind): + for (index_id, kind), fields in six.iteritems(composite_stats_by_index): name = u'_'.join([kind, six.text_type(index_id)]) props = {'index_id': index_id, 'kind_name': kind, 'timestamp': timestamp, 'count': fields.count, 'bytes': fields.bytes} @@ -129,20 +129,33 @@ def fill_entities(project_id, project_stats, timestamp): namespace=namespace)) stats_kind = u'__Stat_Ns_Kind__' - entity_stats_by_ns_kind = entity_stats.entities_root.copy() + entity_stats_by_ns_kind = defaultdict(lambda: defaultdict(CountBytes)) + for namespace, by_kind in six.iteritems(entity_stats.entities_root): + for kind, fields in six.iteritems(by_kind): + entity_stats_by_ns_kind[namespace][kind] += fields + for namespace, by_kind in six.iteritems(entity_stats.entities_notroot): for kind, fields in six.iteritems(by_kind): entity_stats_by_ns_kind[namespace][kind] += fields - builtin_stats_by_ns_kind = entity_stats.builtin_indexes_root.copy() + builtin_stats_by_ns_kind = defaultdict(lambda: defaultdict(CountBytes)) + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_root): + for kind, fields in six.iteritems(by_kind): + builtin_stats_by_ns_kind[namespace][kind] += fields + for namespace, by_kind in six.iteritems(entity_stats.builtin_indexes_notroot): for kind, fields in six.iteritems(by_kind): builtin_stats_by_ns_kind[namespace][kind] += fields + composite_stats_by_ns_kind = defaultdict(lambda: defaultdict(CountBytes)) + for namespace, by_index in six.iteritems(composite_stats): + for (index_id, kind), fields in six.iteritems(by_index): + composite_stats_by_ns_kind[namespace][kind] += fields + for namespace, by_kind in six.iteritems(entity_stats_by_ns_kind): for kind, entity_fields in six.iteritems(by_kind): builtin_fields = builtin_stats_by_ns_kind[namespace][kind] - composite_fields = composite_stats[namespace][kind] + composite_fields = composite_stats_by_ns_kind[namespace][kind] props = {'kind_name': kind, 'timestamp': timestamp, 'builtin_index_count': builtin_fields.count, 'builtin_index_bytes': builtin_fields.bytes, @@ -210,6 +223,10 @@ def fill_entities(project_id, project_stats, timestamp): for kind, fields in six.iteritems(notroot_builtin_stats_by_kind): builtin_stats_by_kind[kind] += fields + composite_stats_by_kind = defaultdict(CountBytes) + for (index_id, kind), fields in six.iteritems(composite_stats_by_index): + composite_stats_by_kind[kind] += fields + for kind, entity_fields in six.iteritems(entity_stats_by_kind): builtin_fields = builtin_stats_by_kind[kind] composite_fields = composite_stats_by_kind[kind] @@ -224,9 +241,10 @@ def fill_entities(project_id, project_stats, timestamp): entities.append(fill_entity(project_id, stats_kind, props, kind)) stats_kind = u'__Stat_Namespace__' - composite_stats_by_ns = { - namespace: sum(six.itervalues(by_kind), CountBytes()) - for namespace, by_kind in six.iteritems(composite_stats)} + composite_stats_by_ns = defaultdict(CountBytes) + for namespace, by_kind in six.iteritems(composite_stats): + composite_stats_by_ns[namespace] += sum(six.itervalues(by_kind), + CountBytes()) entity_stats_by_ns = defaultdict(CountBytes) for namespace, by_kind in six.iteritems(entity_stats.entities_root): From 770b791700593e855d7fd3b9f3f5f1a68ddc558f Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 18 Sep 2019 18:44:43 -0700 Subject: [PATCH 128/221] Add support for namespace metadata queries --- AppDB/appscale/datastore/fdb/data.py | 2 +- AppDB/appscale/datastore/fdb/indexes.py | 29 ++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index aa0837b977..73f62fb24a 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -430,7 +430,7 @@ def get_entry(self, tr, index_entry, snapshot=False): Returns: A VersionEntry or None. """ - if index_entry.kind == u'__kind__': + if index_entry.kind in (u'__namespace__', u'__kind__'): entity = entity_pb.EntityProto() entity.mutable_key().MergeFrom(index_entry.key) entity.mutable_entity_group().MergeFrom(index_entry.group) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 8c5b585e85..43d2f93eb4 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -335,6 +335,30 @@ def _usable(self, entry): return entry.deleted_versionstamp is None +class NamespaceIterator(object): + def __init__(self, tr, project_dir): + self._tr = tr + self._project_dir = project_dir + self._done = False + + @gen.coroutine + def next_page(self): + if self._done: + raise gen.Return(([], False)) + + # TODO: This can be made async. + ns_dir = self._project_dir.open(self._tr, (KindIndex.DIR_NAME,)) + namespaces = ns_dir.list(self._tr) + + # The API uses an ID of 1 to label the default namespace. + results = [IndexEntry(self._project_dir.get_path()[-1], u'', + (u'__namespace__', namespace or 1), None, None) + for namespace in namespaces] + + self._done = True + raise gen.Return((results, False)) + + class KindIterator(object): def __init__(self, tr, project_dir, namespace): self._tr = tr @@ -1113,7 +1137,10 @@ def get_iterator(self, tr, query, read_versionstamp=None): if check_more_results: fetch_limit += 1 - if query.has_kind() and query.kind() == u'__kind__': + if query.has_kind() and query.kind() == u'__namespace__': + project_dir = yield self._directory_cache.get(tr, (project_id,)) + raise gen.Return(NamespaceIterator(tr, project_dir)) + elif query.has_kind() and query.kind() == u'__kind__': project_dir = yield self._directory_cache.get(tr, (project_id,)) raise gen.Return(KindIterator(tr, project_dir, namespace)) From 224e7f0a7187ad8199800c426146d8189ba2b303 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 18 Sep 2019 18:45:30 -0700 Subject: [PATCH 129/221] Omit entity groups from metadata query response --- AppDB/appscale/datastore/fdb/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index 73f62fb24a..976e898707 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -433,7 +433,7 @@ def get_entry(self, tr, index_entry, snapshot=False): if index_entry.kind in (u'__namespace__', u'__kind__'): entity = entity_pb.EntityProto() entity.mutable_key().MergeFrom(index_entry.key) - entity.mutable_entity_group().MergeFrom(index_entry.group) + entity.mutable_entity_group() version_entry = VersionEntry( index_entry.project_id, index_entry.namespace, index_entry.path, encoded_entity=entity.Encode()) From 0d2fb1dd3664f49e88164d96dd7e24fc68346e88 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 18 Sep 2019 20:00:38 +0300 Subject: [PATCH 130/221] Allow global postgres_dsn --- .../appscale/taskqueue/queue_manager.py | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/queue_manager.py b/AppTaskQueue/appscale/taskqueue/queue_manager.py index 2b425d775a..e578eaeb04 100644 --- a/AppTaskQueue/appscale/taskqueue/queue_manager.py +++ b/AppTaskQueue/appscale/taskqueue/queue_manager.py @@ -2,9 +2,10 @@ import json -from kazoo.exceptions import ZookeeperError, NoNodeError +from kazoo.exceptions import ZookeeperError from tornado.ioloop import IOLoop, PeriodicCallback +from appscale.taskqueue.pg_connection_wrapper import PostgresConnectionWrapper from appscale.taskqueue.queue import PostgresPullQueue from appscale.taskqueue.utils import create_celery_for_app from .queue import PullQueue @@ -28,23 +29,32 @@ def __init__(self, zk_client, project_id): super(ProjectQueueManager, self).__init__() self.zk_client = zk_client self.project_id = project_id - pg_dns_node = '/appscale/projects/{}/postgres_dsn'.format(project_id) - try: - pg_dsn = self.zk_client.get(pg_dns_node) - logger.info('Using PostgreSQL as a backend for Pull Queues of "{}"' + + project_dsn_node = '/appscale/projects/{}/postgres_dsn'.format(project_id) + global_dsn_node = '/appscale/tasks/postgres_dsn' + if self.zk_client.exists(project_dsn_node): + pg_dsn = self.zk_client.get(project_dsn_node) + logger.info('Using project-specific PostgreSQL as a backend for ' + 'Pull Queues of project "{}" '.format(project_id)) + elif self.zk_client.exists(global_dsn_node): + pg_dsn = self.zk_client.get(global_dsn_node) + logger.info('Using deployment-wide PostgreSQL as a backend for ' + 'Pull Queues"'.format(project_id)) + else: + pg_dsn = None + logger.info('Using Cassandra as a backend for Pull Queues of "{}"' .format(project_id)) - # Import pg_connection_wrapper (and psycopg2) lazily - from appscale.taskqueue import pg_connection_wrapper + + if pg_dsn: # TODO: PostgresConnectionWrapper may need an update when # TaskQueue becomes concurrent - self.pg_connection_wrapper = ( - pg_connection_wrapper.PostgresConnectionWrapper(dsn=pg_dsn[0].decode('utf-8')) + self.pg_connection_wrapper = PostgresConnectionWrapper( + dsn=pg_dsn[0].decode('utf-8') ) self._configure_periodical_flush() - except NoNodeError: - logger.info('Using Cassandra as a backend for Pull Queues of "{}"' - .format(project_id)) + else: self.pg_connection_wrapper = None + self.queues_node = '/appscale/projects/{}/queues'.format(project_id) self.watch = zk_client.DataWatch(self.queues_node, self._update_queues_watch) From 24f51ca5dafe4f6b52c62fbc84793bcfffc83687 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 19 Sep 2019 16:00:06 +0300 Subject: [PATCH 131/221] Add postgres_dsn option to djinn --- AppController/djinn.rb | 11 +++++++---- AppController/lib/zkinterface.rb | 7 +++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index a36e8fd716..90448ab662 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -483,6 +483,7 @@ class Djinn 'verbose' => [TrueClass, 'False', true], 'zone' => [String, nil, true], 'fdb_clusterfile_content' => [String, nil, true], + 'postgres_dsn' => [String, nil, true], 'update' => [Array, [], false] }.freeze @@ -840,7 +841,9 @@ def check_options(options) # Strings may need to be sanitized. if PARAMETERS_AND_CLASS[key][PARAMETER_CLASS] == String # Some options shouldn't be sanitize. - if ['user_commands', 'azure_app_secret_key', 'fdb_clusterfile_content'].include? key + raw_options = ['user_commands', 'azure_app_secret_key', + 'fdb_clusterfile_content', 'postgres_dsn'] + if raw_options.include? key newval = val # Keys have a relaxed sanitization process. elsif key.include? "_key" or key.include? "EC2_SECRET_KEY" @@ -1350,10 +1353,10 @@ def set_property(property_name, property_value, secret) project_id = version_key.split(VERSION_PATH_SEPARATOR).first update_cron(project_id, @@secret) } - end - - if key == 'fdb_clusterfile_content' + elsif key == 'fdb_clusterfile_content' ZKInterface.set_fdb_clusterfile_content(val) + elsif key == 'postgres_dsn' + ZKInterface.set_postgres_dsn(val) end Djinn.log_info("Successfully set #{key} to #{val}.") diff --git a/AppController/lib/zkinterface.rb b/AppController/lib/zkinterface.rb index 34b3824a4c..43be393344 100644 --- a/AppController/lib/zkinterface.rb +++ b/AppController/lib/zkinterface.rb @@ -342,6 +342,13 @@ def self.set_fdb_clusterfile_content(content) set(clusterfile_node, content, NOT_EPHEMERAL) end + # Writes Postgres DSN string to zookeeper + def self.set_postgres_dsn(postgres_dsn) + dsn_node = '/appscale/tasks/postgres_dsn' + ensure_path(dsn_node) + set(dsn_node, postgres_dsn, NOT_EPHEMERAL) + end + def self.run_zookeeper_operation(&block) begin yield From fbfaea06624b51b2b9f38dfcee5db3ccb58af727 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 19 Sep 2019 16:00:06 +0300 Subject: [PATCH 132/221] Add postgres_dsn option to djinn --- AppController/djinn.rb | 11 +++++++---- AppController/lib/zkinterface.rb | 7 +++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index a36e8fd716..90448ab662 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -483,6 +483,7 @@ class Djinn 'verbose' => [TrueClass, 'False', true], 'zone' => [String, nil, true], 'fdb_clusterfile_content' => [String, nil, true], + 'postgres_dsn' => [String, nil, true], 'update' => [Array, [], false] }.freeze @@ -840,7 +841,9 @@ def check_options(options) # Strings may need to be sanitized. if PARAMETERS_AND_CLASS[key][PARAMETER_CLASS] == String # Some options shouldn't be sanitize. - if ['user_commands', 'azure_app_secret_key', 'fdb_clusterfile_content'].include? key + raw_options = ['user_commands', 'azure_app_secret_key', + 'fdb_clusterfile_content', 'postgres_dsn'] + if raw_options.include? key newval = val # Keys have a relaxed sanitization process. elsif key.include? "_key" or key.include? "EC2_SECRET_KEY" @@ -1350,10 +1353,10 @@ def set_property(property_name, property_value, secret) project_id = version_key.split(VERSION_PATH_SEPARATOR).first update_cron(project_id, @@secret) } - end - - if key == 'fdb_clusterfile_content' + elsif key == 'fdb_clusterfile_content' ZKInterface.set_fdb_clusterfile_content(val) + elsif key == 'postgres_dsn' + ZKInterface.set_postgres_dsn(val) end Djinn.log_info("Successfully set #{key} to #{val}.") diff --git a/AppController/lib/zkinterface.rb b/AppController/lib/zkinterface.rb index 34b3824a4c..43be393344 100644 --- a/AppController/lib/zkinterface.rb +++ b/AppController/lib/zkinterface.rb @@ -342,6 +342,13 @@ def self.set_fdb_clusterfile_content(content) set(clusterfile_node, content, NOT_EPHEMERAL) end + # Writes Postgres DSN string to zookeeper + def self.set_postgres_dsn(postgres_dsn) + dsn_node = '/appscale/tasks/postgres_dsn' + ensure_path(dsn_node) + set(dsn_node, postgres_dsn, NOT_EPHEMERAL) + end + def self.run_zookeeper_operation(&block) begin yield From 4c6e09a6728abc1655c3ecbe5b991178fe34bca1 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Fri, 20 Sep 2019 13:01:54 +0300 Subject: [PATCH 133/221] Fix modules issue --- AppTaskQueue/appscale/taskqueue/distributed_tq.py | 7 +++---- AppTaskQueue/appscale/taskqueue/push_worker.py | 2 +- AppTaskQueue/appscale/taskqueue/service_manager.py | 1 + 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/distributed_tq.py b/AppTaskQueue/appscale/taskqueue/distributed_tq.py index b39b85b88c..50069a99f7 100644 --- a/AppTaskQueue/appscale/taskqueue/distributed_tq.py +++ b/AppTaskQueue/appscale/taskqueue/distributed_tq.py @@ -408,7 +408,7 @@ def __bulk_add(self, source_info, request, response): except ApplicationError as error: task_result.result = error.application_error except InvalidTarget as e: - logger.error(e.message) + logger.error(e.args[0]) task_result.result = TaskQueueServiceError.INVALID_REQUEST else: task_result.result = TaskQueueServiceError.OK @@ -612,11 +612,11 @@ def get_task_args(self, source_info, headers, request): port=self.get_module_port(app_id, source_info, target_info=[])) try: - host = headers['Host'] + host = headers[b'Host'].decode('utf-8') except KeyError: host = None else: - host = host if TARGET_REGEX.match(host) else None + host = host if TARGET_REGEX.match(host) else None # Try to set target based on queue config. if queue.target: @@ -627,7 +627,6 @@ def get_task_args(self, source_info, headers, request): elif host: target_url = self.get_target_url(app_id, source_info, host) - args['url'] = "{target}{url}".format(target=target_url, url=request.url.decode('utf-8')) return args diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index b6ff31d879..3ffef2d159 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -166,7 +166,7 @@ def execute_task(task, headers, args): args['task_name'], url.scheme)) skip_host = False - if 'host' in headers or 'Host' in headers: + if b'host' in headers or b'Host' in headers: skip_host = True skip_accept_encoding = False diff --git a/AppTaskQueue/appscale/taskqueue/service_manager.py b/AppTaskQueue/appscale/taskqueue/service_manager.py index adbddbc0f0..75c9578049 100644 --- a/AppTaskQueue/appscale/taskqueue/service_manager.py +++ b/AppTaskQueue/appscale/taskqueue/service_manager.py @@ -4,6 +4,7 @@ from tornado.ioloop import IOLoop + class VersionPortManager(dict): """ Keeps track of version port details for a single service. """ def __init__(self, zk_client, project_id, service_id): From 4dff2df3637a8a91a3ddce82401a26bfd16eff05 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 20 Sep 2019 17:02:12 -0700 Subject: [PATCH 134/221] Move service routing to RoutingManager This takes the responsibility of routing services away from the AppController. The result is a decrease in latency between when a service's location list gets updated and when the load balancers update their HAProxy process. --- AdminServer/appscale/admin/routing/haproxy.py | 166 ++++++---- .../appscale/admin/routing/routing_manager.py | 70 ++++- .../appscale/admin/routing/templates/base.cfg | 2 +- .../admin/routing/templates/listen_block.cfg | 3 + .../admin/routing/templates/version.cfg | 4 - AppController/djinn.rb | 160 +--------- AppController/lib/app_dashboard.rb | 3 - AppController/lib/datastore_server.rb | 4 - AppController/lib/haproxy.rb | 294 ------------------ AppController/lib/search.rb | 4 - AppDB/appscale/datastore/scripts/blobstore.py | 5 +- AppDB/appscale/datastore/scripts/datastore.py | 6 +- AppDB/appscale/datastore/scripts/ua_server.py | 6 +- .../appscale/taskqueue/appscale_taskqueue.py | 4 +- SearchService2/appscale/search/constants.py | 5 +- .../appscale/search/search_server.py | 5 +- common/appscale/common/constants.py | 21 ++ 17 files changed, 205 insertions(+), 557 deletions(-) create mode 100644 AdminServer/appscale/admin/routing/templates/listen_block.cfg delete mode 100644 AdminServer/appscale/admin/routing/templates/version.cfg diff --git a/AdminServer/appscale/admin/routing/haproxy.py b/AdminServer/appscale/admin/routing/haproxy.py index aac2dff79f..fd07ceafcf 100644 --- a/AdminServer/appscale/admin/routing/haproxy.py +++ b/AdminServer/appscale/admin/routing/haproxy.py @@ -4,55 +4,77 @@ import monotonic import os import pkgutil +import signal import subprocess from tornado import gen from appscale.common.appscale_info import get_private_ip -logger = logging.getLogger('appscale-admin') +logger = logging.getLogger(__name__) # The directory that contains HAProxy config files. CONFIG_DIR = os.path.join('/', 'etc', 'haproxy') +# The location of the combined HAProxy config file for AppServer instances. +APP_CONFIG = os.path.join(CONFIG_DIR, 'app-haproxy.cfg') + +# The location of the combined HAProxy config file for AppScale services. +SERVICE_CONFIG = os.path.join(CONFIG_DIR, 'service-haproxy.cfg') + +# The location of the pidfile for instance-related HAProxy processes. +APP_PID = os.path.join('/', 'var', 'run', 'appscale', 'app-haproxy.pid') + +# The location of the pidfile for service-related HAProxy processes. +SERVICE_PID = os.path.join('/', 'var', 'run', 'appscale', 'service-haproxy.pid') + +# The location of the unix socket used for reporting application stats. +APP_STATS_SOCKET = os.path.join(CONFIG_DIR, 'stats') + +# The location of the unix socket used for reporting service stats. +SERVICE_STATS_SOCKET = os.path.join(CONFIG_DIR, 'service-stats') + class InvalidConfig(Exception): """ Indicates that a given HAProxy configuration cannot be enforced. """ pass -class HAProxyAppVersion(object): - """ Represents a version's HAProxy configuration. """ +class HAProxyListenBlock(object): + """ Represents an HAProxy configuration block. """ # The template for a server config line. - SERVER_TEMPLATE = ('server gae_{version}-{server} {server} ' + SERVER_TEMPLATE = ('server {block_id}-{location} {location} ' 'maxconn {max_connections} check') - # The template for a version block. - VERSION_TEMPLATE = pkgutil.get_data('appscale.admin.routing', - 'templates/version.cfg') + # The template for a listen block. + BLOCK_TEMPLATE = pkgutil.get_data('appscale.admin.routing', + 'templates/listen_block.cfg') - def __init__(self, version_key, port, max_connections): - """ Creates a new HAProxyAppVersion instance. + def __init__(self, block_id, port, max_connections, servers=()): + """ Creates a new HAProxyListenBlock instance. Args: - version_key: A string specifying a version + block_id: A string specifying the name of the listen block. + port: An integer specifying the listen port. + max_connections: An integer specifying the max number of connections. + servers: An iterable specifying server locations. """ - self.version_key = version_key + self.block_id = block_id self.port = port self.max_connections = max_connections - self.servers = [] + self.servers = servers self._private_ip = get_private_ip() def __repr__(self): """ Returns a print-friendly representation of the version config. """ - return 'HAProxyAppVersion<{}:{}, maxconn:{}, servers:{}>'.format( - self.version_key, self.port, self.max_connections, self.servers) + return 'HAProxyListenBlock({!r}, {!r}, {!r}, {!r})'.format( + self.block_id, self.port, self.max_connections, self.servers) @property def block(self): - """ Represents the version as a configuration block. + """ Generates the configuration block. Returns: A string containing the configuration block or None. @@ -61,27 +83,18 @@ def block(self): return None server_lines = [ - self.SERVER_TEMPLATE.format(version=self.version_key, server=server, + self.SERVER_TEMPLATE.format(block_id=self.block_id, location=server, max_connections=self.max_connections) for server in self.servers] server_lines.sort() bind_location = ':'.join([self._private_ip, str(self.port)]) - return self.VERSION_TEMPLATE.format( - version=self.version_key, bind_location=bind_location, + return self.BLOCK_TEMPLATE.format( + block_id=self.block_id, bind_location=bind_location, servers='\n '.join(server_lines)) class HAProxy(object): - """ Manages HAProxy operations. """ - - # The location of the combined HAProxy config file for AppServer instances. - APP_CONFIG = os.path.join(CONFIG_DIR, 'app-haproxy.cfg') - - # The location of the pidfile for instance-related HAProxy processes. - APP_PID = os.path.join('/', 'var', 'run', 'appscale', 'app-haproxy.pid') - - # The location of the unix socket used for reporting stats. - APP_STATS_SOCKET = os.path.join(CONFIG_DIR, 'stats') + """ Manages an HAProxy process. """ # The template for the configuration file. BASE_TEMPLATE = pkgutil.get_data('appscale.admin.routing', @@ -99,14 +112,18 @@ class HAProxy(object): # The minimum number of seconds to wait between each reload operation. RELOAD_COOLDOWN = .1 - def __init__(self): + def __init__(self, config_location, pid_location, stats_socket): """ Creates a new HAProxy operator. """ self.connect_timeout_ms = self.DEFAULT_CONNECT_TIMEOUT * 1000 self.client_timeout_ms = self.DEFAULT_CLIENT_TIMEOUT * 1000 self.server_timeout_ms = self.DEFAULT_SERVER_TIMEOUT * 1000 - self.versions = {} + self.blocks = {} self.reload_future = None + self._config_location = config_location + self._pid_location = pid_location + self._stats_socket = stats_socket + # Given the arbitrary base of the monotonic clock, it doesn't make sense # for outside functions to access this attribute. self._last_reload = monotonic.monotonic() @@ -119,22 +136,25 @@ def config(self): A string containing a complete HAProxy configuration. """ unique_ports = set() - for version in self.versions.values(): - if version.port in unique_ports: + for block in self.blocks.values(): + if block.port in unique_ports: raise InvalidConfig('Port {} is used by more than one ' - 'version'.format(version.port)) + 'block'.format(block.port)) - unique_ports.add(version.port) + unique_ports.add(block.port) + + listen_blocks = [self.blocks[key].block + for key in sorted(self.blocks.keys()) + if self.blocks[key].block] + if not listen_blocks: + return None - version_blocks = [self.versions[key].block - for key in sorted(self.versions.keys()) - if self.versions[key].block] return self.BASE_TEMPLATE.format( - stats_socket=self.APP_STATS_SOCKET, + stats_socket=self._stats_socket, connect_timeout=self.connect_timeout_ms, client_timeout=self.client_timeout_ms, server_timeout=self.server_timeout_ms, - versions='\n'.join(version_blocks)) + listen_blocks='\n'.join(listen_blocks)) @gen.coroutine def reload(self): @@ -144,6 +164,36 @@ def reload(self): yield self.reload_future + def _get_pid(self): + try: + with open(self._pid_location) as pid_file: + pid = int(pid_file.read()) + except IOError as error: + if error.errno != errno.ENOENT: + raise + + pid = None + + # Check if the process is running. + if pid is not None: + try: + os.kill(pid, 0) + except OSError: + pid = None + + return pid + + def _stop(self): + pid = self._get_pid() + if pid is not None: + os.kill(pid, signal.SIGUSR1) + + try: + os.remove(self._config_location) + except OSError as error: + if error.errno != errno.ENOENT: + raise + @gen.coroutine def _reload(self): """ Updates the routing entries if they've changed. """ @@ -158,9 +208,13 @@ def _reload(self): logger.error(str(error)) return + # Ensure process is not running if there is nothing to route. + if new_content is None: + self._stop() + try: - with open(self.APP_CONFIG, 'r') as app_config_file: - existing_content = app_config_file.read() + with open(self._config_location, 'r') as config_file: + existing_content = config_file.read() except IOError as error: if error.errno != errno.ENOENT: raise @@ -170,31 +224,15 @@ def _reload(self): if new_content == existing_content: return - with open(self.APP_CONFIG, 'w') as app_config_file: - app_config_file.write(new_content) - - try: - with open(self.APP_PID) as pid_file: - pid = int(pid_file.read()) - - except IOError as error: - if error.errno != errno.ENOENT: - raise - - pid = None - - # Check if the process is running. - if pid is not None: - try: - os.kill(pid, 0) - except OSError: - pid = None + with open(self._config_location, 'w') as config_file: + config_file.write(new_content) + pid = self._get_pid() if pid is None: - subprocess.check_call(['haproxy', '-f', self.APP_CONFIG, '-D', - '-p', self.APP_PID]) + subprocess.check_call(['haproxy', '-f', self._config_location, '-D', + '-p', self._pid_location]) else: - subprocess.check_call(['haproxy', '-f', self.APP_CONFIG, '-D', - '-p', self.APP_PID, '-sf', str(pid)]) + subprocess.check_call(['haproxy', '-f', self._config_location, '-D', + '-p', self._pid_location, '-sf', str(pid)]) logger.info('Updated HAProxy config') diff --git a/AdminServer/appscale/admin/routing/routing_manager.py b/AdminServer/appscale/admin/routing/routing_manager.py index adc866c3d9..c87421f362 100644 --- a/AdminServer/appscale/admin/routing/routing_manager.py +++ b/AdminServer/appscale/admin/routing/routing_manager.py @@ -1,19 +1,36 @@ """ Configures routing for AppServer instances. """ import json import logging +from collections import namedtuple +import six from tornado import gen from tornado.ioloop import IOLoop from appscale.admin.constants import CONTROLLER_STATE_NODE -from appscale.admin.routing.haproxy import HAProxy, HAProxyAppVersion +from appscale.admin.routing.haproxy import ( + APP_CONFIG, APP_PID, APP_STATS_SOCKET, HAProxy, HAProxyListenBlock, + SERVICE_CONFIG, SERVICE_PID, SERVICE_STATS_SOCKET) from appscale.common.async_retrying import ( retry_children_watch_coroutine, retry_data_watch_coroutine) -from appscale.common.constants import (VERSION_PATH_SEPARATOR, - VERSION_REGISTRATION_NODE) +from appscale.common.constants import ( + BLOBSTORE_SERVERS_NODE, BLOBSTORE_PORT, DATASTORE_SERVERS_NODE, + DB_SERVER_PORT, SEARCH_SERVERS_NODE, SEARCH_SERVICE_PORT, + TASKQUEUE_SERVICE_PORT, TQ_SERVERS_NODE, UA_SERVERS_NODE, UA_SERVER_PORT, + VERSION_PATH_SEPARATOR, VERSION_REGISTRATION_NODE) logger = logging.getLogger('appscale-admin') +ServiceInfo = namedtuple('ServiceInfo', ['registration_node', 'port', 'max_connections']) + +SERVICE_DETAILS = { + 'as_blob_server': ServiceInfo(BLOBSTORE_SERVERS_NODE, BLOBSTORE_PORT, 1), + 'appscale-datastore_server': ServiceInfo(DATASTORE_SERVERS_NODE, DB_SERVER_PORT, 2), + 'appscale-search_server': ServiceInfo(SEARCH_SERVERS_NODE, SEARCH_SERVICE_PORT, 2), + 'TaskQueue': ServiceInfo(TQ_SERVERS_NODE, TASKQUEUE_SERVICE_PORT, 1), + 'UserAppServer': ServiceInfo(UA_SERVERS_NODE, UA_SERVER_PORT, 1) +} + class VersionRoutingManager(object): """ Configures routing for an AppServer instance. """ @@ -115,15 +132,15 @@ def _update_version_block(self): # the version. if (self._port is None or self._max_connections is None or not self._instances): - self._haproxy.versions.pop(self._version_key, None) + self._haproxy.blocks.pop(self._version_key, None) yield self._haproxy.reload() return - if self._version_key not in self._haproxy.versions: - self._haproxy.versions[self._version_key] = HAProxyAppVersion( - self._version_key, self._port, self._max_connections) + if self._version_key not in self._haproxy.blocks: + self._haproxy.blocks[self._version_key] = HAProxyListenBlock( + 'gae_' + self._version_key, self._port, self._max_connections) - haproxy_app_version = self._haproxy.versions[self._version_key] + haproxy_app_version = self._haproxy.blocks[self._version_key] haproxy_app_version.port = self._port haproxy_app_version.max_connections = self._max_connections haproxy_app_version.servers = self._instances @@ -149,7 +166,9 @@ def __init__(self, zk_client): Args: zk_client: A KazooClient. """ - self._haproxy = HAProxy() + self._app_haproxy = HAProxy(APP_CONFIG, APP_PID, APP_STATS_SOCKET) + self._service_haproxy = HAProxy(SERVICE_CONFIG, SERVICE_PID, + SERVICE_STATS_SOCKET) self._versions = {} self._zk_client = zk_client @@ -164,6 +183,11 @@ def start(self): self._zk_client.ChildrenWatch(VERSION_REGISTRATION_NODE, self._update_versions_watch) + for service_id, service_info in six.iteritems(SERVICE_DETAILS): + self._zk_client.ensure_path(service_info.registration_node) + self._zk_client.ChildrenWatch(service_info.registration_node, + self._create_service_watch(service_id)) + @gen.coroutine def _update_versions(self, new_version_list): """ Handles changes to list of registered versions. @@ -182,7 +206,7 @@ def _update_versions(self, new_version_list): for version_key in new_version_list: if version_key not in self._versions: self._versions[version_key] = VersionRoutingManager( - version_key, self._zk_client, self._haproxy) + version_key, self._zk_client, self._app_haproxy) def _update_versions_watch(self, versions): """ Handles changes to list of registered versions. @@ -194,6 +218,26 @@ def _update_versions_watch(self, versions): VERSION_REGISTRATION_NODE, self._update_versions) IOLoop.instance().add_callback(persistent_update_versions, versions) + @gen.coroutine + def _update_service(self, service_id, new_locations): + if service_id not in self._service_haproxy.blocks: + service = SERVICE_DETAILS[service_id] + self._service_haproxy.blocks[service_id] = HAProxyListenBlock( + service_id, service.port, service.max_connections) + + block = self._service_haproxy.blocks[service_id] + block.servers = new_locations + yield self._service_haproxy.reload() + + def _create_service_watch(self, service_id): + def update_service_watch(locations): + persistent_update_service = retry_children_watch_coroutine( + SERVICE_DETAILS[service_id][0], self._update_service) + IOLoop.instance().add_callback(persistent_update_service, service_id, + locations) + + return update_service_watch + @gen.coroutine def _update_controller_state(self, encoded_controller_state): """ Handles updates to controller state. @@ -210,9 +254,9 @@ def _update_controller_state(self, encoded_controller_state): connect_timeout_ms = int(controller_state.get('@options', {}).\ get('lb_connect_timeout', HAProxy.DEFAULT_CONNECT_TIMEOUT * 1000)) - if connect_timeout_ms != self._haproxy.connect_timeout_ms: - self._haproxy.connect_timeout_ms = connect_timeout_ms - yield self._haproxy.reload() + if connect_timeout_ms != self._app_haproxy.connect_timeout_ms: + self._app_haproxy.connect_timeout_ms = connect_timeout_ms + yield self._app_haproxy.reload() def _controller_state_watch(self, encoded_controller_state, _): """ Handles updates to controller state. diff --git a/AdminServer/appscale/admin/routing/templates/base.cfg b/AdminServer/appscale/admin/routing/templates/base.cfg index fce1a7b4a9..705359e2f5 100644 --- a/AdminServer/appscale/admin/routing/templates/base.cfg +++ b/AdminServer/appscale/admin/routing/templates/base.cfg @@ -49,4 +49,4 @@ defaults # Amount of time after which a health check is considered to have timed out timeout check 5000 -{versions} +{listen_blocks} diff --git a/AdminServer/appscale/admin/routing/templates/listen_block.cfg b/AdminServer/appscale/admin/routing/templates/listen_block.cfg new file mode 100644 index 0000000000..295c84e6ba --- /dev/null +++ b/AdminServer/appscale/admin/routing/templates/listen_block.cfg @@ -0,0 +1,3 @@ +listen {block_id} + bind {bind_location} + {servers} diff --git a/AdminServer/appscale/admin/routing/templates/version.cfg b/AdminServer/appscale/admin/routing/templates/version.cfg deleted file mode 100644 index c1dbfb17a6..0000000000 --- a/AdminServer/appscale/admin/routing/templates/version.cfg +++ /dev/null @@ -1,4 +0,0 @@ -# Create a load balancer for {version}. -listen gae_{version} - bind {bind_location} - {servers} diff --git a/AppController/djinn.rb b/AppController/djinn.rb index a36e8fd716..986c7aa36b 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -74,10 +74,6 @@ # exposed SOAP method but provide an incorrect secret. BAD_SECRET_MSG = 'false: bad secret'.freeze -# The String that should be returned to callers if they attempt to add or remove -# AppServers from an HAProxy config file at a node where HAProxy is not running. -NO_HAPROXY_PRESENT = 'false: haproxy not running'.freeze - # The String that should be returned to callers if they attempt to add # AppServers for an app that does not yet have nginx and haproxy set up. NOT_READY = 'false: not ready yet'.freeze @@ -1619,31 +1615,6 @@ def get_all_private_ips(secret) def check_api_services has_search2 = !get_search2.empty? - # Wait for required services to be registered. - if my_node.is_load_balancer? - until update_db_haproxy - Djinn.log_info('Waiting for Datastore servers') - sleep(SMALL_WAIT) - end - - until update_tq_haproxy - Djinn.log_info('Waiting for TaskQueue servers') - sleep(SMALL_WAIT) - end - - until update_blob_servers - Djinn.log_info('Waiting for blobstore servers') - sleep(SMALL_WAIT) - end - - if has_search2 - until update_search2_haproxy - Djinn.log_info('Waiting for Search2 servers') - sleep(SMALL_WAIT) - end - end - end - # Wait till the Datastore is functional. loop do break if HelperFunctions.is_port_open?(get_load_balancer.private_ip, @@ -1914,12 +1885,7 @@ def job_start(secret) APPS_LOCK.synchronize { check_stopped_apps } end if my_node.is_load_balancer? - # Load balancers need to regenerate nginx/haproxy configuration if needed. - update_ua_haproxy - update_db_haproxy - update_tq_haproxy - update_blob_servers - update_search2_haproxy unless get_search2.empty? + # Load balancers need to regenerate nginx configuration if needed. APPS_LOCK.synchronize { regenerate_routing_config } end @state = "Done starting up AppScale, now in heartbeat mode" @@ -2605,22 +2571,6 @@ def gather_logs(secret) return uuid end - # Updates the list of blob_server in haproxy. - def update_blob_servers - begin - servers = ZKInterface.get_blob_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} - } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of datastore servers') - return false - end - - HAProxy.create_app_config(servers, my_node.private_ip, - BlobServer::HAPROXY_PORT, BlobServer::NAME) - return true - end - # Creates an Nginx configuration file for the Users/Apps soap server. def configure_uaserver Nginx.add_service_location( @@ -2628,78 +2578,6 @@ def configure_uaserver UserAppClient::HAPROXY_SERVER_PORT, UserAppClient::SSL_SERVER_PORT) end - def update_db_haproxy - begin - servers = ZKInterface.get_datastore_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} - } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of datastore servers') - return false - end - - HAProxy.create_app_config(servers, '*', DatastoreServer::PROXY_PORT, - DatastoreServer::NAME) - return true - end - - def update_search2_haproxy - begin - servers = ZKInterface.get_search2_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} - } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of search2 servers') - return false - end - - HAProxy.create_app_config(servers, '*', Search2::PROXY_PORT, Search2::NAME) - return true - end - - def update_tq_haproxy - begin - servers = ZKInterface.get_taskqueue_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} - } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of taskqueue servers') - return false - end - - HAProxy.create_app_config(servers, my_node.private_ip, - TaskQueue::HAPROXY_PORT, TaskQueue::NAME) - return true - end - - def update_ua_haproxy - if ZKInterface.is_connected? - begin - servers = ZKInterface.get_ua_servers.map { |machine_ip, port| - {'ip' => machine_ip, 'port' => port} - } - rescue FailedZooKeeperOperationException - Djinn.log_warn('Unable to fetch list of UA servers') - return false - end - else - # If there is no ZK connection, guess the locations for now. - servers = [] - @state_change_lock.synchronize { - servers = @nodes.map { |node| - if node.is_db_master? or node.is_db_slave? - {'ip' => node.private_ip, 'port' => UserAppClient::SERVER_PORT} - end - }.compact - } - end - - HAProxy.create_app_config( - servers, my_node.private_ip, UserAppClient::HAPROXY_SERVER_PORT, - UserAppClient::NAME) - return true - end - # Creates nginx configuration for TaskQueue. def configure_tq_routing # TaskQueue REST API routing. @@ -3358,13 +3236,13 @@ def start_stop_api_services end if my_node.is_load_balancer? - until update_ua_haproxy - Djinn.log_info('Waiting for UA servers') - sleep(SMALL_WAIT) - end configure_uaserver end + # The AdminServer is started before checking the availability of the + # UAServer because it is responsible for routing it. + start_admin_server + # All nodes wait for the UserAppServer now. The call here is just to # ensure the UserAppServer is talking to the persistent state. HelperFunctions.sleep_until_port_is_open( @@ -3397,8 +3275,6 @@ def start_stop_api_services } end - threads << Thread.new { start_admin_server } - if my_node.is_memcache? threads << Thread.new { start_memcache } else @@ -4221,10 +4097,9 @@ def write_locations end end - # Writes new nginx and haproxy configuration files for the App Engine - # applications hosted in this deployment. Callers should invoke this - # method whenever there is a change in the number of machines hosting - # App Engine apps. + # Writes new nginx configuration files for the App Engine applications hosted + # in this deployment. Callers should invoke this method whenever there is a + # change in the number of machines hosting App Engine apps. def regenerate_routing_config Djinn.log_debug("Regenerating nginx config files for apps.") my_private = my_node.private_ip @@ -4382,9 +4257,6 @@ def mount_persistent_storage # This function performs basic setup ahead of starting the API services. def initialize_server - HAProxy.initialize_config(@options['lb_connect_timeout']) - Djinn.log_info("HAProxy configured.") - if not Nginx.is_running? Nginx.initialize_config Nginx.start @@ -4393,13 +4265,6 @@ def initialize_server Djinn.log_info("Nginx already configured and running.") end - # The HAProxy process needs at least one configured service to start. The - # UAServer is configured first to satisfy this condition. - update_ua_haproxy - - # This ensures HAProxy gets started after a machine reboot. - HAProxy.services_start - # Volume is mounted, let's finish the configuration of static files. if my_node.is_shadow? and not my_node.is_compute? write_app_logrotate @@ -4676,7 +4541,6 @@ def prep_app_dashboard @state = "Preparing AppDashboard" Djinn.log_info("Preparing AppDashboard") - my_public = my_node.public_ip my_private = my_node.private_ip datastore_location = [get_load_balancer.private_ip, @@ -4793,8 +4657,7 @@ def check_stopped_apps removed_versions << version_key } - # Load balancers have to adjust nginx and haproxy to remove the - # application routings. + # Load balancers have to adjust nginx to remove the application routings. if my_node.is_load_balancer? MonitInterface.running_xmpp.each { |xmpp_app| match = xmpp_app.match(/xmpp-(.*)/) @@ -6045,7 +5908,6 @@ def get_node_stats_json(secret) # def get_application_load_stats(version_key) total_requests, requests_in_queue, sessions = 0, 0, 0 - pxname = "#{HelperFunctions::GAE_PREFIX}#{version_key}" time = :no_stats lb_nodes = [] @state_change_lock.synchronize { @@ -6054,7 +5916,7 @@ def get_application_load_stats(version_key) lb_nodes.each { |node| begin ip = node.private_ip - load_stats = HermesClient.get_proxy_load_stats(ip, @@secret, pxname) + load_stats = HermesClient.get_proxy_load_stats(ip, @@secret, version_key) total_requests += load_stats[0] requests_in_queue += load_stats[1] sessions += load_stats[2] @@ -6065,7 +5927,7 @@ def get_application_load_stats(version_key) } if lb_nodes.length > 1 # Report total HAProxy stats if there are multiple LB nodes. - Djinn.log_debug("Summarized HAProxy load stats for #{pxname}: " \ + Djinn.log_debug("Summarized HAProxy load stats for #{version_key}: " \ "req_tot=#{total_requests}, qcur=#{requests_in_queue}, scur=#{sessions}") end return total_requests, requests_in_queue, sessions, time diff --git a/AppController/lib/app_dashboard.rb b/AppController/lib/app_dashboard.rb index 8c87e26b7e..b18096bdb8 100644 --- a/AppController/lib/app_dashboard.rb +++ b/AppController/lib/app_dashboard.rb @@ -6,9 +6,6 @@ # implementation of the Google App Engine Users API. This module provides # methods that abstract away its configuration and deployment. module AppDashboard - # The port which nginx will use to send requests to haproxy - PROXY_PORT = 8060 - # The port which requests to this app will be served from LISTEN_PORT = 1080 diff --git a/AppController/lib/datastore_server.rb b/AppController/lib/datastore_server.rb index a94b393879..9c57bf1b7c 100644 --- a/AppController/lib/datastore_server.rb +++ b/AppController/lib/datastore_server.rb @@ -12,8 +12,4 @@ module DatastoreServer # The name that nginx should use as the identifier for the DatastoreServer when it # we write its configuration files. NAME = 'appscale-datastore_server'.freeze - - # Maximum number of concurrent requests that can be served - # by instance of datastore - MAXCONN = 2 end diff --git a/AppController/lib/haproxy.rb b/AppController/lib/haproxy.rb index c8d879626a..98b1b27968 100644 --- a/AppController/lib/haproxy.rb +++ b/AppController/lib/haproxy.rb @@ -1,300 +1,6 @@ #!/usr/bin/ruby -w -require 'fileutils' -require 'posixpsutil' - -$:.unshift File.join(File.dirname(__FILE__)) -require 'helperfunctions' -require 'app_dashboard' -require 'monit_interface' -require 'user_app_client' -require 'datastore_server' -require 'taskqueue' -require 'blobstore' -require 'search' - -# As AppServers within AppScale are usually single-threaded, we run multiple -# copies of them and load balance traffic to them. Since nginx (our first -# load balancer) doesn't do health checks on the AppServer before it dispatches -# traffic to it, we employ haproxy, an open source load balancer that does -# provide this capability. This module abstracts away configuration and -# deployment for haproxy. module HAProxy - # We do have 2 haproxy, one that is used for AppServers, and the other - # for internal AppScale services (Datastore, TaskQueue etc...). We keep - # them separate to be able to control when reload is necessary. - HAPROXY_PATH = File.join('/', 'etc', 'haproxy') - CONFIG_EXTENSION = 'cfg'.freeze - HAPROXY_BIN = `which haproxy`.chomp - BASH_BIN = `which bash`.chomp - - # These are for the AppScale internal services haproxy. - SERVICE_SITES_PATH = File.join(HAPROXY_PATH, 'service-sites-enabled') - SERVICE_MAIN_FILE = File.join(HAPROXY_PATH, "service-haproxy.#{CONFIG_EXTENSION}") - SERVICE_BASE_FILE = File.join(HAPROXY_PATH, "service-base.#{CONFIG_EXTENSION}") - SERVICE_PIDFILE = '/var/run/appscale/service-haproxy.pid'.freeze - # Maximum AppServer threaded connections MAX_APPSERVER_CONN = 7 - - # The first port that haproxy will bind to for App Engine apps. - START_PORT = 10000 - - # The default server timeout for the dashboard (apploadbalancer) - ALB_SERVER_TIMEOUT = 300000 - - # The position in the haproxy profiling information where the name of - # of the application is (ie the GAE app, or datastore etc..). - APP_NAME_INDEX = 0 - - # The position in the haproxy profiling information where the name of - # the service (e.g., the frontend or backend) is specified. - SERVICE_NAME_INDEX = 1 - - # The position in the haproxy profiling information where the number of - # enqueued requests is specified. - REQ_IN_QUEUE_INDEX = 2 - - # The position in the haproxy profiling information where the number of - # current sessions is specified. - CURRENT_SESSIONS_INDEX = 4 - - # The position in the haproxy profiling information where the status of - # the specific server is specified. - SERVER_STATUS_INDEX = 17 - - # The position in the haproxy profiling information where the total - # number of requests seen for a given app is specified. - TOTAL_REQUEST_RATE_INDEX = 48 - - # The String haproxy returns when we try to set a parameter on a - # non defined server or backend. - HAPROXY_ERROR_PREFIX = 'No such'.freeze - - # The number of seconds HAProxy should wait for a server response. - HAPROXY_SERVER_TIMEOUT = 600 - - # The version key regex. - VERSION_KEY_REGEX = /#{HelperFunctions::GAE_PREFIX}(.*_.*_.*).#{CONFIG_EXTENSION}/ - - # Start HAProxy for API services. - def self.services_start - if !valid_config?(SERVICE_MAIN_FILE) - Djinn.log_warn('Invalid configuration for HAProxy services.') - return - end - return if MonitInterface.is_running?(:service_haproxy) - - start_cmd = "#{HAPROXY_BIN} -f #{SERVICE_MAIN_FILE} -D " \ - "-p #{SERVICE_PIDFILE}" - stop_cmd = "#{BASH_BIN} -c 'kill $(cat #{SERVICE_PIDFILE})'" - restart_cmd = "#{BASH_BIN} -c '#{start_cmd} -sf $(cat #{SERVICE_PIDFILE})'" - MonitInterface.start_daemon( - :service_haproxy, start_cmd, stop_cmd, SERVICE_PIDFILE, nil, restart_cmd) - end - - # A generic function for creating HAProxy config files used by AppScale services. - # - # Arguments: - # servers : list of hashes containing server IPs and respective ports - # listen_ip : the IP HAProxy should listen for - # listen_port : the port to listen to - # name : the name of the server - # Returns: - # Boolean : true if config was good, false if parameters were - # incorrect - def self.create_app_config(servers, my_private_ip, listen_port, name) - if servers.empty? - Djinn.log_warn('create_app_config called with no running servers.') - return false - end - - # We only serve internal services here. - unless [TaskQueue::NAME, DatastoreServer::NAME, Search2::NAME, - UserAppClient::NAME, BlobServer::NAME].include?(name) - Djinn.log_warn("create_app_config called for unknown service: #{name}.") - return false - end - - full_version_name = "#{name}" - config_path = File.join(SERVICE_SITES_PATH, - "#{full_version_name}.#{CONFIG_EXTENSION}") - config = "# Create a load balancer for #{name}.\n" - config << "listen #{full_version_name}\n" - config << " bind #{my_private_ip}:#{listen_port}\n" - servers.each do |server| - config << HAProxy.server_config(full_version_name, - "#{server['ip']}:#{server['port']}") + - "\n" - end - - # If it is the dashboard app, increase the server timeout because - # uploading apps can take some time. - if name.split(Djinn::VERSION_PATH_SEPARATOR)[0] == AppDashboard::APP_NAME - config << "\n timeout server #{ALB_SERVER_TIMEOUT}\n" - end - - # Let's overwrite configuration for 'name' only if anything changed. - current = '' - current = File.read(config_path) if File.exists?(config_path) - File.open(config_path, 'w+') { |f| f.write(config) } if current != config - - # This will reload haproxy if anything changed. - HAProxy.regenerate_config - true - end - - # Generates a load balancer configuration file. Since HAProxy doesn't provide - # a `file include` option we emulate that functionality here. - def self.regenerate_config_file(config_dir, base_config_file, config_file) - # Remove any files that are not configs - sites = Dir.entries(config_dir) - sites.delete_if { |site| !site.end_with?(CONFIG_EXTENSION) } - sites.sort! - - # Build the configuration in memory first. - config = File.read(base_config_file) - sites.each do |site| - config << File.read(File.join(config_dir, site)) - config << "\n" - end - - # We overwrite only if something changed. - current = '' - current = File.read(config_file) if File.exists?(config_file) - if current == config - Djinn.log_debug("No need to restart haproxy for #{config_file}:" \ - " configuration didn't change.") - return false - end - - # Update config file. - File.open(config_file, 'w+') { |dest_file| dest_file.write(config) } - unless valid_config?(config_file) - Djinn.log_warn("Invalid haproxy configuration at #{config_file}.") - return false - end - - Djinn.log_info("Updated haproxy configuration at #{config_file}.") - true - end - - # Checks if a given HAProxy config file is valid. - def self.valid_config?(config_file) - return false unless File.file?(config_file) - system("#{HAPROXY_BIN} -c -f #{config_file}") - end - - # Regenerate the configuration file for HAProxy (if anything changed) - # then starts or reload haproxy as needed. - def self.regenerate_config - # Regenerate configuration for the AppScale serices haproxy. - if regenerate_config_file(SERVICE_SITES_PATH, - SERVICE_BASE_FILE, - SERVICE_MAIN_FILE) - # Ensure the service is monitored and running. - services_start - Djinn::RETRIES.downto(0) { - break if MonitInterface.is_running?(:service_haproxy) - sleep(Djinn::SMALL_WAIT) - } - - # Reload with the new configuration file. - MonitInterface.restart(:service_haproxy) - end - end - - # Generate the server configuration line for the provided inputs. GAE - # applications that are thread safe will have a higher connection limit. - def self.server_config(server_name, location) - if server_name.start_with?(HelperFunctions::GAE_PREFIX) - version_key = server_name[HelperFunctions::GAE_PREFIX.length..-1] - threadsafe = HelperFunctions.get_version_thread_safe(version_key) - maxconn = threadsafe ? MAX_APPSERVER_CONN : 1 - elsif server_name == DatastoreServer::NAME - # Allow custom number of connections at a time for datastore. - maxconn = DatastoreServer::MAXCONN - elsif server_name == Search2::NAME - # Allow custom number of connections at a time for search2. - maxconn = Search2::MAXCONN - else - # Allow only one connection at a time for other services. - maxconn = 1 - end - - " server #{server_name}-#{location} #{location} maxconn #{maxconn} check" - end - - # Set up the folder structure and creates the configuration files necessary for haproxy - # - # Args: - # connect_timeout: Number of milliseconds for a request to wait before - # a backend server will accept connection. - def self.initialize_config(connect_timeout) - base_config = < Date: Sat, 21 Sep 2019 13:55:17 -0700 Subject: [PATCH 135/221] Re-add prefix when requesting version stats --- AppController/djinn.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 986c7aa36b..ec0e0539dc 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -5908,6 +5908,7 @@ def get_node_stats_json(secret) # def get_application_load_stats(version_key) total_requests, requests_in_queue, sessions = 0, 0, 0 + pxname = "#{HelperFunctions::GAE_PREFIX}#{version_key}" time = :no_stats lb_nodes = [] @state_change_lock.synchronize { @@ -5916,7 +5917,7 @@ def get_application_load_stats(version_key) lb_nodes.each { |node| begin ip = node.private_ip - load_stats = HermesClient.get_proxy_load_stats(ip, @@secret, version_key) + load_stats = HermesClient.get_proxy_load_stats(ip, @@secret, pxname) total_requests += load_stats[0] requests_in_queue += load_stats[1] sessions += load_stats[2] @@ -5927,7 +5928,7 @@ def get_application_load_stats(version_key) } if lb_nodes.length > 1 # Report total HAProxy stats if there are multiple LB nodes. - Djinn.log_debug("Summarized HAProxy load stats for #{version_key}: " \ + Djinn.log_debug("Summarized HAProxy load stats for #{pxname}: " \ "req_tot=#{total_requests}, qcur=#{requests_in_queue}, scur=#{sessions}") end return total_requests, requests_in_queue, sessions, time From 0a19b4d9e6201ce4757042b99ac3049b8fa2a072 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 23 Sep 2019 16:16:19 -0700 Subject: [PATCH 136/221] Remove unused functions and constants --- AppController/lib/zkinterface.rb | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/AppController/lib/zkinterface.rb b/AppController/lib/zkinterface.rb index 34b3824a4c..fd8be3d042 100644 --- a/AppController/lib/zkinterface.rb +++ b/AppController/lib/zkinterface.rb @@ -45,21 +45,9 @@ class ZKInterface # and where other nodes will recover that state from. APPCONTROLLER_STATE_PATH = "#{APPCONTROLLER_PATH}/state".freeze - # The ZooKeeper node where blobstore servers register themselves. - BLOBSTORE_REGISTRY_PATH = '/appscale/blobstore/servers' - # The ZooKeeper node where datastore servers register themselves. DATASTORE_REGISTRY_PATH = '/appscale/datastore/servers' - # The ZooKeeper node where search servers register themselves. - SEARCH2_REGISTRY_PATH = '/appscale/search/live_nodes' - - # The ZooKeeper node where taskqueue servers register themselves. - TASKQUEUE_REGISTRY_PATH = '/appscale/tasks/servers' - - # The ZooKeeper node where UA servers register themselves. - UA_REGISTRY_PATH = '/appscale/iam/servers' - # The location in ZooKeeper that AppControllers write information about their # node to, so that others can poll to see if they are alive and what roles # they've taken on. @@ -263,22 +251,6 @@ def self.get_datastore_servers list_registered(DATASTORE_REGISTRY_PATH) end - def self.get_search2_servers - list_registered(SEARCH2_REGISTRY_PATH) - end - - def self.get_taskqueue_servers - list_registered(TASKQUEUE_REGISTRY_PATH) - end - - def self.get_ua_servers - list_registered(UA_REGISTRY_PATH) - end - - def self.get_blob_servers - list_registered(BLOBSTORE_REGISTRY_PATH) - end - def self.set_machine_assignments(machine_ip, assignments) assignments_node = '/appscale/assignments' ensure_path(assignments_node) From be619543209b0d2578e1c454378955d786a9422d Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 23 Sep 2019 16:16:56 -0700 Subject: [PATCH 137/221] Rename ZK node for consistency with other services --- common/appscale/common/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/appscale/common/constants.py b/common/appscale/common/constants.py index 8f07188e07..ab41f7fdf4 100644 --- a/common/appscale/common/constants.py +++ b/common/appscale/common/constants.py @@ -48,7 +48,7 @@ def non_negative_int(value): DATASTORE_SERVERS_NODE = '/appscale/datastore/servers' # The ZooKeeper path where a list of active search servers is stored. -SEARCH_SERVERS_NODE = '/appscale/search/live_nodes' +SEARCH_SERVERS_NODE = '/appscale/search/servers' # The ZooKeeper path where a list of active taskqueue servers is stored. TQ_SERVERS_NODE = '/appscale/tasks/servers' From f076069e82f7a66152038489e95903147e1a2fe9 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 23 Sep 2019 17:09:36 -0700 Subject: [PATCH 138/221] Use /run directly instead of /var/run symlink --- AdminServer/appscale/admin/routing/haproxy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AdminServer/appscale/admin/routing/haproxy.py b/AdminServer/appscale/admin/routing/haproxy.py index fd07ceafcf..9b83bf0b87 100644 --- a/AdminServer/appscale/admin/routing/haproxy.py +++ b/AdminServer/appscale/admin/routing/haproxy.py @@ -23,10 +23,10 @@ SERVICE_CONFIG = os.path.join(CONFIG_DIR, 'service-haproxy.cfg') # The location of the pidfile for instance-related HAProxy processes. -APP_PID = os.path.join('/', 'var', 'run', 'appscale', 'app-haproxy.pid') +APP_PID = os.path.join('/', 'run', 'appscale', 'app-haproxy.pid') # The location of the pidfile for service-related HAProxy processes. -SERVICE_PID = os.path.join('/', 'var', 'run', 'appscale', 'service-haproxy.pid') +SERVICE_PID = os.path.join('/', 'run', 'appscale', 'service-haproxy.pid') # The location of the unix socket used for reporting application stats. APP_STATS_SOCKET = os.path.join(CONFIG_DIR, 'stats') From 24125246231190fb5dfa6c92c83ce6c1f8c39726 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 23 Sep 2019 17:25:05 -0700 Subject: [PATCH 139/221] Consider error code when checking PID --- AdminServer/appscale/admin/routing/haproxy.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/AdminServer/appscale/admin/routing/haproxy.py b/AdminServer/appscale/admin/routing/haproxy.py index 9b83bf0b87..89c84ff817 100644 --- a/AdminServer/appscale/admin/routing/haproxy.py +++ b/AdminServer/appscale/admin/routing/haproxy.py @@ -178,15 +178,23 @@ def _get_pid(self): if pid is not None: try: os.kill(pid, 0) - except OSError: - pid = None + except OSError as error: + if error.errno == errno.ESRCH: + pid = None + else: + logger.warning('Encountered unexpected error when checking haproxy ' + 'process: {}'.format(str(error))) return pid def _stop(self): pid = self._get_pid() if pid is not None: - os.kill(pid, signal.SIGUSR1) + try: + os.kill(pid, signal.SIGUSR1) + except OSError as error: + if error.errno != errno.ESRCH: + logger.error('Unable to stop haproxy process') try: os.remove(self._config_location) From 1dbe694ae175399bcd3090751151bba59dd790e0 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 24 Sep 2019 17:01:27 -0700 Subject: [PATCH 140/221] Pass update_indexes request to datastore service This allows the datastore service to handle the implementation details for how index metadata is stored. This does not change the implementation yet, but future work in this area includes storing index definitions in FDB for that backend. --- AdminServer/appscale/admin/appengine_api.py | 33 +++++++++++++++++-- .../datastore/datastore_distributed.py | 10 ++++++ AppDB/appscale/datastore/fdb/fdb_datastore.py | 14 ++++++++ AppDB/appscale/datastore/scripts/datastore.py | 19 +++++++++++ 4 files changed, 73 insertions(+), 3 deletions(-) diff --git a/AdminServer/appscale/admin/appengine_api.py b/AdminServer/appscale/admin/appengine_api.py index 8d60e3d7ca..0a0cf584c4 100644 --- a/AdminServer/appscale/admin/appengine_api.py +++ b/AdminServer/appscale/admin/appengine_api.py @@ -5,14 +5,27 @@ import json import logging +import random + +try: + import urllib.request + urlopen = urllib.request.urlopen + from urllib.error import URLError +except ImportError: + import urllib2 + urlopen = urllib2.urlopen + from urllib2 import URLError + import six import yaml from kazoo.exceptions import NoNodeError +from tornado.options import options from yaml.parser import ParserError from appscale.appcontroller_client import AppControllerException -from appscale.common.constants import HTTPCodes, InvalidIndexConfiguration -from appscale.common.datastore_index import DatastoreIndex, merge_indexes +from appscale.common.constants import ( + DB_SERVER_PORT, HTTPCodes, InvalidIndexConfiguration) +from appscale.common.datastore_index import DatastoreIndex from .base_handler import BaseHandler from .constants import ( CustomHTTPError, @@ -68,7 +81,21 @@ def post(self): raise CustomHTTPError(HTTPCodes.BAD_REQUEST, message=six.text_type(error)) - merge_indexes(self.zk_client, project_id, given_indexes) + datastore_location = ':'.join( + [random.choice(options.load_balancers), str(DB_SERVER_PORT)]) + url = 'http://{}/index/add?project={}'.format( + datastore_location, project_id) + payload = json.dumps([index.to_dict() for index in given_indexes]) + try: + response = urlopen(url, payload) + except URLError: + raise CustomHTTPError(HTTPCodes.INTERNAL_ERROR, + message='Unable to forward request to datastore') + + if response.code != 200: + message = 'Unable to add indexes: {}'.format(response.read()) + raise CustomHTTPError(HTTPCodes.INTERNAL_ERROR, message=message) + logger.info('Updated indexes for {}'.format(project_id)) diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py index 8534695a1c..6cdccd876e 100644 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ b/AppDB/appscale/datastore/datastore_distributed.py @@ -3272,6 +3272,16 @@ def get_indexes(self, project_id): return indexes + def add_indexes(self, project_id, indexes): + """ Adds composite index definitions to a project. + + Only indexes that do not already exist will be created. + Args: + project_id: A string specifying a project ID. + indexes: An iterable containing index definitions. + """ + merge_indexes(self.zookeeper.handle, project_id, indexes) + def _zk_state_listener(self, state): """ Handles changes to the ZooKeeper connection state. diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index ce85be6de3..79a653147d 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -19,6 +19,7 @@ from tornado.ioloop import IOLoop from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.common.datastore_index import merge_indexes from appscale.datastore.dbconstants import ( BadRequest, ConcurrentModificationException, InternalError) from appscale.datastore.fdb.cache import DirectoryCache @@ -367,6 +368,19 @@ def update_composite_index(self, project_id, index): project_id = decode_str(project_id) yield self.index_manager.update_composite_index(project_id, index) + def add_indexes(self, project_id, indexes): + """ Adds composite index definitions to a project. + + Only indexes that do not already exist will be created. + Args: + project_id: A string specifying a project ID. + indexes: An iterable containing index definitions. + """ + # This is a temporary workaround to get a ZooKeeper client. This method + # will not use ZooKeeper in the future. + zk_client = self.index_manager.composite_index_manager._zk_client + merge_indexes(zk_client, project_id, indexes) + @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): last_element = entity.key().path().element(-1) diff --git a/AppDB/appscale/datastore/scripts/datastore.py b/AppDB/appscale/datastore/scripts/datastore.py index 76f611144b..5f262a02bd 100644 --- a/AppDB/appscale/datastore/scripts/datastore.py +++ b/AppDB/appscale/datastore/scripts/datastore.py @@ -21,6 +21,7 @@ from appscale.common.appscale_info import get_load_balancer_ips from appscale.common.async_retrying import retry_data_watch_coroutine from appscale.common.constants import ZK_PERSISTENT_RECONNECTS +from appscale.common.datastore_index import DatastoreIndex from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER from kazoo.client import KazooState from kazoo.exceptions import NodeExistsError, NoNodeError @@ -127,6 +128,23 @@ def post(self): yield datastore_access.reserve_ids(project_id, ids) +class AddIndexesHandler(tornado.web.RequestHandler): + def post(self): + """ + At this time, there does not seem to be a public API method for creating + datastore indexes. This is a custom handler to facilitate requests to + /api/datastore/index/add. + + Requests to this handler must define 'project' as a URL parameter. The body + must be a JSON-encoded list of objects containing the details for each + index definition. + """ + project_id = self.get_argument('project') + indexes = [DatastoreIndex.from_dict(project_id, index) + for index in json.loads(self.request.body)] + datastore_access.add_indexes(project_id, indexes) + + class MainHandler(tornado.web.RequestHandler): """ Defines what to do when the webserver receives different types of @@ -863,6 +881,7 @@ def update_servers_watch(new_servers): ('/clear', ClearHandler), ('/read-only', ReadOnlyHandler), ('/reserve-keys', ReserveKeysHandler), + ('/index/add', AddIndexesHandler), (r'/*', MainHandler), ]) From f391409bf6ace7e0674ebf88f9184388bfdbfa62 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 25 Sep 2019 13:29:42 +0300 Subject: [PATCH 141/221] Use SQL shcema per project --- AppTaskQueue/appscale/taskqueue/queue.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 4d62d04757..3a0d5bf962 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -343,9 +343,24 @@ def __init__(self, queue_info, app, pg_connection_wrapper): super(PostgresPullQueue, self).__init__(queue_info, app) self.connection_key = self.app self.pg_connection_wrapper = pg_connection_wrapper + self.ensure_project_schema_created() self.queue_id = self.ensure_queue_registered() self.ensure_tasks_table_created() + # When multiple TQ servers are notified by ZK about new queue + # they sometimes get IntegrityError despite 'IF NOT EXISTS' + @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) + def ensure_project_schema_created(self): + pg_connection = self.pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{schema_name}" schema is created' + .format(schema_name=self.schema_name)) + pg_cursor.execute( + 'CREATE SCHEMA IF NOT EXISTS "{schema_name};' + .format(schema_name=self.schema_name) + ) + # When multiple TQ servers are notified by ZK about new queue # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) @@ -414,13 +429,17 @@ def ensure_tasks_table_created(self): .format(table_name=self.tasks_table_name) ) + @property + def schema_name(self): + return 'appscale_{}'.format(self.app) + @property def queues_table_name(self): - return 'appscale_queues_{}'.format(self.app) + return '{}.queues'.format(self.schema_name) @property def tasks_table_name(self): - return 'appscale_tasks_{}_{}'.format(self.app, self.queue_id) + return '{}.tasks_{}'.format(self.schema_name, self.queue_id) @retry_pg_connection def add_task(self, task): From 4d537b0054b8339a94583beb15b467612059ef56 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Wed, 25 Sep 2019 13:29:42 +0300 Subject: [PATCH 142/221] Use SQL shcema per project --- AppTaskQueue/appscale/taskqueue/queue.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 4d62d04757..7893220f6f 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -343,9 +343,24 @@ def __init__(self, queue_info, app, pg_connection_wrapper): super(PostgresPullQueue, self).__init__(queue_info, app) self.connection_key = self.app self.pg_connection_wrapper = pg_connection_wrapper + self.ensure_project_schema_created() self.queue_id = self.ensure_queue_registered() self.ensure_tasks_table_created() + # When multiple TQ servers are notified by ZK about new queue + # they sometimes get IntegrityError despite 'IF NOT EXISTS' + @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) + def ensure_project_schema_created(self): + pg_connection = self.pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{schema_name}" schema is created' + .format(schema_name=self.schema_name)) + pg_cursor.execute( + 'CREATE SCHEMA IF NOT EXISTS "{schema_name}";' + .format(schema_name=self.schema_name) + ) + # When multiple TQ servers are notified by ZK about new queue # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) @@ -414,13 +429,17 @@ def ensure_tasks_table_created(self): .format(table_name=self.tasks_table_name) ) + @property + def schema_name(self): + return 'appscale_{}'.format(self.app) + @property def queues_table_name(self): - return 'appscale_queues_{}'.format(self.app) + return '{}.queues'.format(self.schema_name) @property def tasks_table_name(self): - return 'appscale_tasks_{}_{}'.format(self.app, self.queue_id) + return '{}.tasks_{}'.format(self.schema_name, self.queue_id) @retry_pg_connection def add_task(self, task): From e2bf66b7a9693778aa544616ba5bc3717594f253 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 26 Sep 2019 19:33:22 +0300 Subject: [PATCH 143/221] Modify TaskQueue tests to forget Cassandra --- AppTaskQueue/test/e2e/README.md | 5 +- AppTaskQueue/test/e2e/conftest.py | 24 ---- AppTaskQueue/test/e2e/test_protobuffer_api.py | 106 +--------------- .../test/helpers/prepare-cassandra.sh | 88 ------------- AppTaskQueue/test/helpers/prepare-postgres.sh | 47 +++---- .../test/helpers/prepare-zookeeper.sh | 5 +- .../test/helpers/restart-taskqueue.sh | 17 +-- AppTaskQueue/test/load/README.md | 23 +--- AppTaskQueue/test/load/prepare_queues.py | 1 - AppTaskQueue/test/load/worker_locust.py | 4 - AppTaskQueue/test/suites/layout-example.txt | 4 +- AppTaskQueue/test/suites/run-e2e-tests.sh | 37 +----- AppTaskQueue/test/suites/run-load-test.sh | 119 ++---------------- AppTaskQueue/test/unit/test_service_stats.py | 2 +- 14 files changed, 53 insertions(+), 429 deletions(-) delete mode 100755 AppTaskQueue/test/helpers/prepare-cassandra.sh diff --git a/AppTaskQueue/test/e2e/README.md b/AppTaskQueue/test/e2e/README.md index b80c24b570..d8f14e93a2 100644 --- a/AppTaskQueue/test/e2e/README.md +++ b/AppTaskQueue/test/e2e/README.md @@ -57,12 +57,9 @@ if you have a VM started from AppScale image with SSH access to it. The script: - Ensures that needed version of python is installed on the local machine; - Creates virtualenv on the local machine and installs needed python packages; - - Starts Cassandra on the VM and creates needed appscale tables; - Starts Zookeeper on the VM and creates needed nodes there; - Installs, starts and configures Postgres on the VM; - - Starts TaskQueue on the VM using Cassandra as a backend for pull queues; - - Runs e2e tests; - - Restarts TaskQueue on the VM using Postgres as a backend for pull queues; + - Starts TaskQueue on the VM using Postgres as a backend for pull queues; - Runs e2e tests; diff --git a/AppTaskQueue/test/e2e/conftest.py b/AppTaskQueue/test/e2e/conftest.py index 574c22fb39..62463bdbf2 100644 --- a/AppTaskQueue/test/e2e/conftest.py +++ b/AppTaskQueue/test/e2e/conftest.py @@ -6,14 +6,11 @@ import kazoo.client import pytest -from kazoo.exceptions import NoNodeError from helpers import api_helper, taskqueue_service_pb2 TEST_PROJECT = os.environ['TEST_PROJECT'] -PG_DSN_NODE = f'/appscale/projects/{TEST_PROJECT}/postgres_dsn' -PROJECT_NODE = f'/appscale/projects/{TEST_PROJECT}' PROJECT_QUEUES_NODE = f'/appscale/projects/{TEST_PROJECT}/queues' PROJECT_QUEUES_CONFIG = { 'queue': { @@ -26,9 +23,6 @@ } PROJECT_QUEUES_CONFIG_BYTES = bytes(json.dumps(PROJECT_QUEUES_CONFIG), 'utf-8') -POSTGRES = 'postgres' -CASSANDRA = 'cassandra' - def async_test(test): @functools.wraps(test) @@ -55,24 +49,6 @@ def taskqueue(request): return api_helper.TaskQueue(tq_locations, TEST_PROJECT) -@pytest.fixture(scope='session') -def pull_queues_backend(request): - """ Reports what backend is used for Pull Queues - """ - # Configure Zookeeper client - zk_location = request.config.getoption('--zk-location') - zk_client = kazoo.client.KazooClient(hosts=zk_location) - zk_client.start() - # Check if PG_DSN node is specified for the project - try: - zk_client.get(PG_DSN_NODE) - except NoNodeError: - return CASSANDRA - finally: - zk_client.stop() - return POSTGRES - - @pytest.fixture(scope='session', autouse=True) def init_queues_config(request): # Configure Zookeeper client diff --git a/AppTaskQueue/test/e2e/test_protobuffer_api.py b/AppTaskQueue/test/e2e/test_protobuffer_api.py index 8f20fc2b7d..9c68c46a2e 100644 --- a/AppTaskQueue/test/e2e/test_protobuffer_api.py +++ b/AppTaskQueue/test/e2e/test_protobuffer_api.py @@ -3,7 +3,7 @@ import pytest -from conftest import async_test, TEST_PROJECT, POSTGRES, CASSANDRA +from conftest import async_test, TEST_PROJECT from helpers import taskqueue_service_pb2 from helpers.api_helper import timed @@ -89,10 +89,7 @@ async def test_add_lease_prolong_delete(taskqueue): @async_test -async def test_add_lease_retry_retry_delete_pg(taskqueue, pull_queues_backend): - if pull_queues_backend == CASSANDRA: - pytest.skip('Skipped for Cassandra backend') - +async def test_add_lease_retry_retry_delete_pg(taskqueue): # Initialize tasks queue_str = 'pull-queue-5-retry' queue_bytes = bytes(queue_str, 'utf8') @@ -185,102 +182,3 @@ async def test_add_lease_retry_retry_delete_pg(taskqueue, pull_queues_backend): # Verify that queue is empty listed = await taskqueue.rest('GET', path_suffix=f'/{queue_str}/tasks') assert listed.json == {'kind': 'taskqueues#tasks'} # items should be missing - - -@async_test -async def test_add_lease_retry_retry_delete_cassandra(taskqueue, pull_queues_backend): - if pull_queues_backend == POSTGRES: - pytest.skip('Skipped for Postgres backend') - - # Initialize tasks - queue_str = 'pull-queue-5-retry' - queue_bytes = bytes(queue_str, 'utf8') - add_tasks = [] - for n in range(4): - add_task = taskqueue_service_pb2.TaskQueueAddRequest() - add_task.app_id = bytes(TEST_PROJECT, 'utf8') - add_task.queue_name = queue_bytes - add_task.mode = taskqueue_service_pb2.TaskQueueMode.PULL - add_task.task_name = b'task-%d' % n - add_task.body = b'some-payload-%d' % n - add_task.eta_usec = 0 - add_tasks.append(add_task) - bulk_add = taskqueue_service_pb2.TaskQueueBulkAddRequest() - bulk_add.add_request.extend(add_tasks) - start_time = await taskqueue.remote_time_usec() - total_delay = 0 - - # Add tasks using bulk add - _, delay = await taskqueue.timed_protobuf('BulkAdd', bulk_add) - total_delay += delay - - # Lease 4 tasks for 2 seconds - lease_req = taskqueue_service_pb2.TaskQueueQueryAndOwnTasksRequest() - lease_req.queue_name = queue_bytes - lease_req.lease_seconds = 2 - lease_req.max_tasks = 4 - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [0, 0, 0, 0] - # Try to lease 4 tasks for 2 seconds - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [] - - # Give 5 seconds for lease to expire - time.sleep(5) - - # Lease 2 tasks for 2 seconds (retry) - lease_req.max_tasks = 2 - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [1, 1] - - # Lease 2 tasks for 2 seconds (retry) - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [1, 1] - - # Try to lease 2 tasks for 2 seconds - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [] - - # Give 5 seconds for lease to expire - time.sleep(5) - - # Try to lease 3 tasks for 2 seconds - lease_req.max_tasks = 3 - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [2, 2, 2] - - # Give 5 seconds for lease to expire - time.sleep(5) - - # Try to lease 3 tasks for 2 seconds - leased, delay = await taskqueue.timed_protobuf('QueryAndOwnTasks', lease_req) - total_delay += delay - assert [task.retry_count for task in leased.task] == [2, 3, 3] - - # Verify listed tasks - listed = await taskqueue.rest('GET', path_suffix=f'/{queue_str}/tasks') - sorting_key = lambda item: (item['retry_count'], int(item['leaseTimestamp'])) - tasks = sorted(listed.json['items'], key=sorting_key) - actual = [(task['retry_count'], int(task['leaseTimestamp'])) for task in tasks] - expected = [ - (3, pytest.approx(start_time + 12_000_000, abs=600_000 + total_delay)), - (3, pytest.approx(start_time + 17_000_000, abs=600_000 + total_delay)), - (4, pytest.approx(start_time + 17_000_000, abs=600_000 + total_delay)), - (4, pytest.approx(start_time + 17_000_000, abs=600_000 + total_delay)), - ] - assert actual == expected - - # Delete tasks - req = taskqueue_service_pb2.TaskQueueDeleteRequest() - req.queue_name = queue_bytes - req.task_name.extend([task.task_name for task in add_tasks]) - await taskqueue.protobuf('Delete', req) - # Verify that queue is empty - listed = await taskqueue.rest('GET', path_suffix=f'/{queue_str}/tasks') - assert listed.json == {'kind': 'taskqueues#tasks'} # items should be missing diff --git a/AppTaskQueue/test/helpers/prepare-cassandra.sh b/AppTaskQueue/test/helpers/prepare-cassandra.sh deleted file mode 100755 index 37946f4703..0000000000 --- a/AppTaskQueue/test/helpers/prepare-cassandra.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env bash -# -# Ensures that single-node Cassandra cluster is running on this machine. -# Creates AppScale-related tables in Cassandra. - -set -e -set -u - -usage() { - echo "Usage: ${0} --private-ip --zk-ip " - echo - echo "Options:" - echo " --private-ip Private IP of this machine" - echo " --zk-ip IP of the zookeeper machine" - exit 1 -} - -PRIVATE_IP= -ZK_IP= - -# Let's get the command line arguments. -while [ $# -gt 0 ]; do - if [ "${1}" = "--private-ip" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - PRIVATE_IP="${1}" - shift - continue - fi - if [ "${1}" = "--zk-ip" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - ZK_IP="${1}" - shift - continue - fi - usage -done - -log() { - LEVEL=${2:-INFO} - echo "$(date +'%a %b %d %T %Y'): $LEVEL $1" -} - -if [ -z ${PRIVATE_IP} ] || [ -z ${ZK_IP} ]; then - usage -fi - - -echo ${PRIVATE_IP} > /etc/appscale/masters -echo ${PRIVATE_IP} > /etc/appscale/slaves -echo ${ZK_IP} > /etc/appscale/zookeeper_locations - - -log "Configuring Cassandra" -/root/appscale/scripts/setup_cassandra_config_files.py --local-ip ${PRIVATE_IP} \ - --master-ip ${PRIVATE_IP} - -log "Starting Cassandra" -su -c '/opt/cassandra/cassandra/bin/cassandra -p cassandra.pid' cassandra -cassandra_wait_start=$(date +%s) -while ! (/opt/cassandra/cassandra/bin/nodetool status | grep UN); do - current_time=$(date +%s) - elapsed_time=$((current_time - cassandra_wait_start)) - if [ "${elapsed_time}" -gt 60 ] - then - log "Timed out waiting for Cassandra to start" "ERROR" - exit 1 - fi - sleep 1 -done - -log "Creating tables" -for i in 1 2 3 ; do - RESULT=FAILED - appscale-prime-cassandra --replication 1 && RESULT=OK && break - log "Failed to create Cassandra tables" "WARNING" - sleep 15 -done - -if [ ${RESULT} = FAILED ]; then - log "Failed to create Cassandra tables after 3 retries" "ERROR" - exit 1 -fi diff --git a/AppTaskQueue/test/helpers/prepare-postgres.sh b/AppTaskQueue/test/helpers/prepare-postgres.sh index c456b7bd68..09487230b4 100755 --- a/AppTaskQueue/test/helpers/prepare-postgres.sh +++ b/AppTaskQueue/test/helpers/prepare-postgres.sh @@ -91,28 +91,11 @@ do sleep ${attempt} done -log "Checking if DB and user already exist" -if psql --dbname ${DBNAME} --username ${USERNAME} --host ${HOST} \ - --command 'SELECT current_timestamp;' -then - log "DB and user are already configured" - exit 0 -fi - -log "Creating Database and Role" -CREATE_ROLE="CREATE ROLE \"${USERNAME}\" WITH LOGIN PASSWORD '${PASSWORD}';" -sudo -u postgres psql --command "${CREATE_ROLE}" -sudo -u postgres createdb --owner "${USERNAME}" "${DBNAME}" -echo "${HOST}:5432:${DBNAME}:${USERNAME}:${PASSWORD}" > ~/.pgpass -chmod 600 ~/.pgpass -cp ~/.pgpass /root/.pgpass - - log "Updating Postgres configs to accept host connections to the Database" PG_MAJOR_VER=$(psql --version | awk '{ print $3 }' | awk -F '.' '{ print $1 }') PG_VERSION=$(psql --version | awk '{ print $3 }' | awk -F '.' '{ print $1 "." $2 }') -if [ "${PG_MAJOR_VER}" = "10" ]; then +if (( "${PG_MAJOR_VER}" >= 10 )); then PG_CONFIG_DIR="/etc/postgresql/${PG_MAJOR_VER}" else PG_CONFIG_DIR="/etc/postgresql/${PG_VERSION}" @@ -132,10 +115,30 @@ fi # Allow host connections to the specified DB if grep -q -E "^host[ \t]+${DBNAME}[ \t]+${USERNAME}[ \t]+" "${PG_HBA}" then - sed -i "s|^host[ \t]+${DBNAME}[ \t]+${USERNAME}[ \t]+.*|host ${DBNAME} ${USERNAME} ${HOST}/0 md5|" "${PG_HBA}" + sed -i "s|^host[ \t]+${DBNAME}[ \t]+${USERNAME}[ \t]+.*|host ${DBNAME} ${USERNAME} 0.0.0.0/0 md5|" "${PG_HBA}" else - echo "host ${DBNAME} ${USERNAME} ${HOST}/0 md5" >> "${PG_HBA}" + echo "host ${DBNAME} ${USERNAME} 0.0.0.0/0 md5" >> "${PG_HBA}" +fi + +systemctl restart postgresql.service +systemctl enable postgresql.service +systemctl status postgresql.service + + +trap 'rm -f ~/.pgpass' EXIT +echo "${HOST}:5432:${DBNAME}:${USERNAME}:${PASSWORD}" > ~/.pgpass +chmod 600 ~/.pgpass + +log "Checking if DB and user already exist" +if psql --dbname ${DBNAME} --username ${USERNAME} --host ${HOST} \ + --command 'SELECT current_timestamp;' +then + log "DB and user are already configured" + exit 0 fi -log "Restarting Postgres" -service postgresql restart + +log "Creating Database and Role" +CREATE_ROLE="CREATE ROLE \"${USERNAME}\" WITH LOGIN PASSWORD '${PASSWORD}';" +sudo -u postgres psql --command "${CREATE_ROLE}" +sudo -u postgres createdb --owner "${USERNAME}" "${DBNAME}" diff --git a/AppTaskQueue/test/helpers/prepare-zookeeper.sh b/AppTaskQueue/test/helpers/prepare-zookeeper.sh index cc2abf1aa5..d83083de5d 100755 --- a/AppTaskQueue/test/helpers/prepare-zookeeper.sh +++ b/AppTaskQueue/test/helpers/prepare-zookeeper.sh @@ -24,9 +24,6 @@ while ! ${ZK_CLI} ls / ; do sleep 1 done -log "Set cassandra-related appscale configs in zookeeper" +log "Create root appscale nodes in zookeeper" /usr/share/zookeeper/bin/zkCli.sh create /appscale "" /usr/share/zookeeper/bin/zkCli.sh create /appscale/projects "" -/usr/share/zookeeper/bin/zkCli.sh create /appscale/config "" -/usr/share/zookeeper/bin/zkCli.sh create /appscale/config/cassandra \ - '{"num_tokens":1}' diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index 3a05d61e0a..b955b9676e 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -7,11 +7,10 @@ set -e set -u usage() { - echo "Usage: ${0} --ports --db-ip --zk-ip --lb-ip [--source-dir ]" + echo "Usage: ${0} --ports --zk-ip --lb-ip [--source-dir ]" echo echo "Options:" echo " --ports Comma-separated list of ports to start TQ on" - echo " --db-ip IP of the database machine (cassandra and datastore)" echo " --zk-ip IP of the zookeeper machine" echo " --lb-ip IP of the AppScale loadbalancer machine" echo " --source-dir TaskQueue sources path to use for upgrade" @@ -19,7 +18,6 @@ usage() { } PORTS= -DB_IP= ZK_IP= LB_IP= TQ_SOURCE_DIR= @@ -35,15 +33,6 @@ while [ $# -gt 0 ]; do shift continue fi - if [ "${1}" = "--db-ip" ]; then - shift - if [ -z "${1}" ]; then - usage - fi - DB_IP="${1}" - shift - continue - fi if [ "${1}" = "--zk-ip" ]; then shift if [ -z "${1}" ]; then @@ -79,7 +68,7 @@ log() { echo "$(date +'%a %b %d %T %Y'): $LEVEL $1" } -if [ -z ${PORTS} ] || [ -z ${DB_IP} ] || [ -z ${ZK_IP} ] || [ -z ${LB_IP} ]; then +if [ -z ${PORTS} ] || [ -z ${ZK_IP} ] || [ -z ${LB_IP} ]; then usage fi @@ -110,8 +99,6 @@ if [ ! -z ${TQ_SOURCE_DIR} ]; then fi log "Filling /etc/appscale/* files with addresses of required services" -echo ${DB_IP} > /etc/appscale/masters -echo ${DB_IP} > /etc/appscale/slaves echo ${ZK_IP} > /etc/appscale/zookeeper_locations echo ${LB_IP} > /etc/appscale/load_balancer_ips hostname -I > /etc/appscale/my_private_ip diff --git a/AppTaskQueue/test/load/README.md b/AppTaskQueue/test/load/README.md index 984d2f3ece..26959bd572 100644 --- a/AppTaskQueue/test/load/README.md +++ b/AppTaskQueue/test/load/README.md @@ -19,15 +19,11 @@ running behind a load balancer. `AppTaskQueue/test/suits/run-load-test.sh` script can run entire test automatically. It does number of things: 1. Provisions TaskQueue service on VMs (started from appscale image). - 2. Initializes two projects (one is based on Cassadra implementation - of pull queues, another on Postgres). - 3. Makes sure pull queue is defined for Cassandra project and is empty. - 4. Starts specified number of users to produce high load against Cassandra - project. + 2. Initializes test project. + 3. Makes sure pull queue is defined and is empty. + 4. Starts specified number of users to produce high load. 5. Analyses outcomes, reports result. -Steps from 3 to 5 are repeated for Postgres implementation of pull queues. - ### Running load test manually @@ -59,7 +55,6 @@ is 10.10.1.20:4000, Zookeeper is available at 10.10.1.25. mkdir ./logs export VALIDATION_LOG=./logs export TEST_PROJECT=tq-test-proj - export PULL_QUEUES_BACKEND=postgres # or cassandra ``` 5. Start producer and worker locusts, wait for processes to exit: ```bash @@ -108,7 +103,6 @@ privileges). ```bash cat > ./layout.txt << CONTENT ROLE PUBLIC_IP/HOST_NAME PRIVATE_IP - cassandra 192.168.100.40 10.10.8.20 postgres 192.168.100.41 10.10.8.21 zookeeper 192.168.100.42 10.10.8.22 loadbalancer 192.168.100.43 10.10.8.23 @@ -191,17 +185,6 @@ covered. Helper scripts need to be implemented for easier testing of different scenarios. -#### The test shouldn't be aware of backend - -Initially this test was introduced to make sure that Postgres backend for -Pull Queues works consistently under load. So it was (is yet) important -to also compare Postgres implementation with original Cassandra implementation. -That's why Pull Queues backend is explicitly configured in -`AppTaskQueue/test/suites/run-load-test.sh`. - -Whenever Postgres implementation becomes default, any mentions of Pull Queues -backend should be removed from the test. - #### Configure .gitignore to ignore test artifacts The test can be run outside particular folder, so it is currently problematic diff --git a/AppTaskQueue/test/load/prepare_queues.py b/AppTaskQueue/test/load/prepare_queues.py index 0ae244a8dc..ca78eec2bc 100644 --- a/AppTaskQueue/test/load/prepare_queues.py +++ b/AppTaskQueue/test/load/prepare_queues.py @@ -26,7 +26,6 @@ zk_client.start() TEST_PROJECT = os.environ['TEST_PROJECT'] - PROJECT_NODE = f'/appscale/projects/{TEST_PROJECT}' PROJECT_QUEUES_NODE = f'/appscale/projects/{TEST_PROJECT}/queues' PROJECT_QUEUES_CONFIG = { 'queue': { diff --git a/AppTaskQueue/test/load/worker_locust.py b/AppTaskQueue/test/load/worker_locust.py index fc140bf04d..55cd3c8684 100644 --- a/AppTaskQueue/test/load/worker_locust.py +++ b/AppTaskQueue/test/load/worker_locust.py @@ -17,7 +17,6 @@ LEASE_SECONDS = 30 PRODUCER_PID = int(os.environ['PRODUCERS_PID']) -PULL_QUEUES_BACKEND = os.environ['PULL_QUEUES_BACKEND'] class Worker(TaskQueueLocust): @@ -56,9 +55,6 @@ def lease(self): if not leased.task: return Worker.LAST_LEASE_TIME = time.time() - if PULL_QUEUES_BACKEND == 'cassandra': - for task in leased.task: - task.retry_count += 1 tasks_info = [self.get_task_info(task) for task in leased.task] # Assuming that virtual tasks can be run in parallel diff --git a/AppTaskQueue/test/suites/layout-example.txt b/AppTaskQueue/test/suites/layout-example.txt index e70d13e777..a3eea1ee06 100644 --- a/AppTaskQueue/test/suites/layout-example.txt +++ b/AppTaskQueue/test/suites/layout-example.txt @@ -1,17 +1,15 @@ # Layout table should contain: # - exactly one postgres, -# - exactly one cassandra, # - exactly one zookeeper, # - exactly one loadbalancer, # - at least one taskqueue. # # run-load-test.sh script will parse layout file command like this: -# CASSANDRA_PUBLIC=$(grep -E '^cassandra' LAYOUT_FILE | awk '{ print $2 }') +# POSTGRES_PUBLIC=$(grep -E '^postgres' LAYOUT_FILE | awk '{ print $2 }') # So comments like this and header are fine. # # ROLE PUBLIC_IP/HOST_NAME PRIVATE_IP -cassandra 192.168.100.40 10.10.8.20 postgres 192.168.100.41 10.10.8.21 zookeeper 192.168.100.42 10.10.8.22 loadbalancer 192.168.100.43 10.10.8.23 diff --git a/AppTaskQueue/test/suites/run-e2e-tests.sh b/AppTaskQueue/test/suites/run-e2e-tests.sh index f4c14a3ed4..8f1e2cc9a7 100755 --- a/AppTaskQueue/test/suites/run-e2e-tests.sh +++ b/AppTaskQueue/test/suites/run-e2e-tests.sh @@ -116,7 +116,6 @@ scp -o StrictHostKeyChecking=no \ -i "${KEY_LOCATION}" \ "${HELPERS_DIR}/prepare-postgres.sh" \ "${HELPERS_DIR}/prepare-zookeeper.sh" \ - "${HELPERS_DIR}/prepare-cassandra.sh" \ "${HELPERS_DIR}/restart-taskqueue.sh" \ "${USER}@${VM_ADDR}:/tmp/" ssh -o StrictHostKeyChecking=no \ @@ -133,8 +132,7 @@ scp -o StrictHostKeyChecking=no \ # Save DSN string and projects config to variables PG_DSN="dbname=appscale-test-project user=appscale password=appscale-pwd host=${VM_PRIVATE_IP}" -POSTGRES_PROJECT='postgres-test-project' -CASSANDRA_PROJECT='cassandra-test-project' +TEST_PROJECT='test-project' log "" log "==========================================================================" @@ -151,20 +149,15 @@ sudo /tmp/prepare-postgres.sh --host "${VM_PRIVATE_IP}" \ echo "=== Starting Zookeeper server ===" sudo /tmp/prepare-zookeeper.sh -echo "=== Starting and priming Cassandra ===" -sudo /tmp/prepare-cassandra.sh --private-ip ${VM_PRIVATE_IP} \ - --zk-ip ${VM_PRIVATE_IP} - echo "=== Creating project nodes in Zookeeper ===" sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${CASSANDRA_PROJECT} "" + /appscale/projects/${TEST_PROJECT} "" sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${POSTGRES_PROJECT} "" + /appscale/tasks "" sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${POSTGRES_PROJECT}/postgres_dsn "${PG_DSN}" + /appscale/tasks/postgres_dsn "${PG_DSN}" sudo /tmp/restart-taskqueue.sh --ports 50001,50002 \ - --db-ip "${VM_PRIVATE_IP}" \ --zk-ip "${VM_PRIVATE_IP}" \ --lb-ip "${VM_PRIVATE_IP}" \ --source-dir /tmp/AppTaskQueue @@ -213,24 +206,6 @@ venv/bin/pip install ${HELPERS_DIR} venv/bin/pip install pytest venv/bin/pip install kazoo -STATUS=0 - -log "" -log "====================================================" -log "=== Test Cassandra implementation of Pull Queues ===" -log "====================================================" -export TEST_PROJECT="${CASSANDRA_PROJECT}" +export TEST_PROJECT venv/bin/pytest -vv --tq-locations ${VM_ADDR}:50001 ${VM_ADDR}:50002 \ - --zk-location "${VM_ADDR}" \ - || STATUS=1 - -log "" -log "===================================================" -log "=== Test Postgres implementation of Pull Queues ===" -log "===================================================" -export TEST_PROJECT="${POSTGRES_PROJECT}" -venv/bin/pytest -vv --tq-locations ${VM_ADDR}:50001 ${VM_ADDR}:50002 \ - --zk-location "${VM_ADDR}" \ - || STATUS=1 - -exit ${STATUS} + --zk-location "${VM_ADDR}" diff --git a/AppTaskQueue/test/suites/run-load-test.sh b/AppTaskQueue/test/suites/run-load-test.sh index 969529629d..70e0c0f376 100755 --- a/AppTaskQueue/test/suites/run-load-test.sh +++ b/AppTaskQueue/test/suites/run-load-test.sh @@ -144,8 +144,6 @@ log() { } log "Parsing layout file at ${LAYOUT_FILE}" -CASSANDRA_VM=$(grep -E "^cassandra" "${LAYOUT_FILE}" | awk '{ print $2 }') -CASSANDRA_VM_PRIVATE_IP=$(grep -E "^cassandra" "${LAYOUT_FILE}" | awk '{ print $3 }') POSTGRES_VM=$(grep -E "^postgres" "${LAYOUT_FILE}" | awk '{ print $2 }') POSTGRES_VM_PRIVATE_IP=$(grep -E "^postgres" "${LAYOUT_FILE}" | awk '{ print $3 }') ZOOKEEPER_VM=$(grep -E "^zookeeper" "${LAYOUT_FILE}" | awk '{ print $2 }') @@ -195,12 +193,6 @@ log "====================================================================" log "=== Sending provisioning scripts, sources and other files to VMs ===" log "====================================================================" -log "### Copying cassandra initialisation script to Cassandra machine ###" -scp -o StrictHostKeyChecking=no \ - -i "${KEY_LOCATION}" \ - "${HELPERS_DIR}/prepare-cassandra.sh" \ - "${USER}@${CASSANDRA_VM}:/tmp/prepare-cassandra.sh" - log "### Copying postgres initialisation script to Postgres machine ###" scp -o StrictHostKeyChecking=no \ -i "${KEY_LOCATION}" \ @@ -257,8 +249,7 @@ COMMAND # Save DSN string and projects config to variables PG_DSN="dbname=appscale-test-project user=appscale password=appscale-pwd host=${POSTGRES_VM_PRIVATE_IP}" -POSTGRES_PROJECT='postgres-test-project' -CASSANDRA_PROJECT='cassandra-test-project' +TEST_PROJECT='test-project' log "### Initializing Zookeeper at ${USER}@${ZOOKEEPER_VM} ###" ssh -o StrictHostKeyChecking=no -i ${KEY_LOCATION} ${USER}@${ZOOKEEPER_VM} << COMMANDS @@ -266,26 +257,15 @@ ssh -o StrictHostKeyChecking=no -i ${KEY_LOCATION} ${USER}@${ZOOKEEPER_VM} << CO # Run general zookeeper provisioning script sudo /tmp/prepare-zookeeper.sh - # Configure project with Cassandra as a backend for Pull Queues - sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${CASSANDRA_PROJECT} "" - # Configure project with Postgres as a backend for Pull Queues sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${POSTGRES_PROJECT} "" - sudo /usr/share/zookeeper/bin/zkCli.sh delete \ - /appscale/projects/${POSTGRES_PROJECT}/postgres_dsn + /appscale/projects/${TEST_PROJECT} "" sudo /usr/share/zookeeper/bin/zkCli.sh create \ - /appscale/projects/${POSTGRES_PROJECT}/postgres_dsn "${PG_DSN}" + /appscale/tasks "" + sudo /usr/share/zookeeper/bin/zkCli.sh create \ + /appscale/tasks/postgres_dsn "${PG_DSN}" COMMANDS -log "### Initializing Cassandra at ${USER}@${CASSANDRA_VM} ###" -ssh -o StrictHostKeyChecking=no -i ${KEY_LOCATION} ${USER}@${CASSANDRA_VM} << CMD - set -e - sudo /tmp/prepare-cassandra.sh --private-ip ${CASSANDRA_VM_PRIVATE_IP} \ - --zk-ip ${ZOOKEEPER_VM} -CMD - # Generate comma-separated list of ports from (50000) to (50000 + TQ_PER_VM) TQ_PORTS=$(echo $(seq 50000 $((50000 + TQ_PER_VM - 1)))) # Start all TaskQueue servers @@ -295,7 +275,6 @@ do ssh -o StrictHostKeyChecking=no -i ${KEY_LOCATION} ${USER}@${tq_vm} << COMMAND set -e sudo /tmp/restart-taskqueue.sh --ports "${TQ_PORTS// /,}" \ - --db-ip "${CASSANDRA_VM_PRIVATE_IP}" \ --zk-ip "${ZOOKEEPER_VM}" \ --lb-ip "${LOADBALANCER_VM}" \ --source-dir /tmp/AppTaskQueue @@ -363,93 +342,17 @@ venv/bin/pip install tabulate status=0 -log "" -log "====================================================" -log "=== Test Cassandra implementation of Pull Queues ===" -log "====================================================" -LOCUST_LOGS="${LOGS_DIR}/locust-cassandra" -mkdir "${LOCUST_LOGS}" -VALIDATION_LOG="${LOGS_DIR}/validation-cassandra" -mkdir "${VALIDATION_LOG}" -export VALIDATION_LOG -export PULL_QUEUES_BACKEND="cassandra" -export TEST_PROJECT="${CASSANDRA_PROJECT}" -export RUN_TIME - -log "Ensuring queues are configured and empty" -venv/bin/python ./prepare_queues.py --zookeeper-location ${ZOOKEEPER_VM} \ - --taskqueue-location ${TQ_LOCATION} - -log "Starting task producers with timeout ${LOCUST_TIMEOUT}s" -timeout "${LOCUST_TIMEOUT}" \ - venv/bin/locust --host "${TQ_LOCATION}" --no-web \ - --clients ${PRODUCERS} \ - --hatch-rate $((PRODUCERS/3 + 1)) \ - --csv-base-name "${LOCUST_LOGS}/producers" \ - --logfile "${LOCUST_LOGS}/producers-log" \ - --locustfile ./producer_locust.py \ - > "${LOCUST_LOGS}/producers-out" 2>&1 & -PRODUCERS_PID=$! -export PRODUCERS_PID # let workers know when producers are terminated - -log "Starting workers with timeout ${LOCUST_TIMEOUT}s" -timeout "${LOCUST_TIMEOUT}" \ - venv/bin/locust --host "${TQ_LOCATION}" --no-web \ - --clients ${WORKERS} \ - --hatch-rate $((WORKERS/10 + 1)) \ - --csv-base-name "${LOCUST_LOGS}/workers" \ - --logfile "${LOCUST_LOGS}/workers-log" \ - --locustfile ./worker_locust.py \ - > "${LOCUST_LOGS}/workers-out" 2>&1 & -WORKERS_PID=$! - -set +e - -log "Waiting for producers to finish work or timeout..." -wait ${PRODUCERS_PID} -PRODUCERS_STATUS=$? -if [ ${PRODUCERS_STATUS} == 124 ]; then - log "Producers timed out to finish work in ${LOCUST_TIMEOUT}s" "ERROR" - status=1 -elif [ ${PRODUCERS_STATUS} != 0 ]; then - log "Producers exited with non-zero status (${PRODUCERS_STATUS})" "WARNING" - log "It's probably because some requests were failed. Ignoring it." -fi - -log "Waiting for workers to finish work or timeout..." -wait ${WORKERS_PID} -WORKERS_STATUS=$? -if [ ${WORKERS_STATUS} == 124 ]; then - log "Workers timed out to finish work in ${LOCUST_TIMEOUT}s" "ERROR" - status=1 -elif [ ${WORKERS_STATUS} != 0 ]; then - log "Workers exited with non-zero status (${WORKERS_STATUS})" "WARNING" - log "It's probably because some requests were failed. Ignoring it." -fi - -set -e - -log "Verifying consistency of taskqueue activity log" -venv/bin/python ./check_consistency.py --validation-log ${VALIDATION_LOG} \ - --taskqueue-location ${TQ_LOCATION} \ - --ignore-exceeded-retry-limit \ - || status=1 - -log "Verifying performance reported by locust" -venv/bin/python ./check_performance.py --locust-log ${LOCUST_LOGS} || status=1 - log "" -log "===================================================" -log "=== Test Postgres implementation of Pull Queues ===" -log "===================================================" -LOCUST_LOGS="${LOGS_DIR}/locust-postgres" +log "===============================" +log "=== Run TaskQueue load test ===" +log "===============================" +LOCUST_LOGS="${LOGS_DIR}/locust" mkdir "${LOCUST_LOGS}" -VALIDATION_LOG="${LOGS_DIR}/validation-postgres" +VALIDATION_LOG="${LOGS_DIR}/validation" mkdir "${VALIDATION_LOG}" export VALIDATION_LOG -export PULL_QUEUES_BACKEND="postgres" -export TEST_PROJECT="${POSTGRES_PROJECT}" +export TEST_PROJECT export RUN_TIME log "Ensuring queues are configured and empty" diff --git a/AppTaskQueue/test/unit/test_service_stats.py b/AppTaskQueue/test/unit/test_service_stats.py index a5da6b9b6d..04b59babc0 100644 --- a/AppTaskQueue/test/unit/test_service_stats.py +++ b/AppTaskQueue/test/unit/test_service_stats.py @@ -24,7 +24,7 @@ def get_app(self): def setUp(self): """ Patches handlers of Taskqueue application in order - to prevent real calls to Cassandra and Datastore because only + to prevent real calls to Postgres and Datastore because only service statistics matters for this test. """ super(TestServiceStatistics, self).setUp() From ec9b7fe77f4c8945752651ae3381fe97c601b828 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 26 Sep 2019 19:36:24 +0300 Subject: [PATCH 144/221] Fix Issue: Save DSN to Zookeeper on startup --- AppController/djinn.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 90448ab662..b866b91150 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3561,6 +3561,10 @@ def stop_search2_role end def start_taskqueue_master + if @options.key?('postgres_dsn') + ZKInterface.set_postgres_dsn(@options['postgres_dsn']) + end + verbose = @options['verbose'].downcase == "true" TaskQueue.start_master(false, verbose) return true @@ -3571,6 +3575,10 @@ def stop_taskqueue end def start_taskqueue_slave + if @options.key?('postgres_dsn') + ZKInterface.set_postgres_dsn(@options['postgres_dsn']) + end + # All slaves connect to the master to start master_ip = nil @state_change_lock.synchronize { From 51ea6b9fb13b8e3ec055427d598f323df89a8a8b Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Thu, 26 Sep 2019 19:40:46 +0300 Subject: [PATCH 145/221] Remove Cassandra backend support for PullQueues --- .../appscale/taskqueue/appscale_taskqueue.py | 3 +- .../appscale/taskqueue/datastore_client.py | 6 +- .../appscale/taskqueue/distributed_tq.py | 21 +- .../taskqueue/pg_connection_wrapper.py | 71 +- AppTaskQueue/appscale/taskqueue/queue.py | 1228 +---------------- .../appscale/taskqueue/queue_manager.py | 55 +- AppTaskQueue/appscale/taskqueue/rest_api.py | 21 +- AppTaskQueue/setup.py | 1 - 8 files changed, 136 insertions(+), 1270 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py b/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py index c5a3dea371..5f96d96af1 100644 --- a/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py +++ b/AppTaskQueue/appscale/taskqueue/appscale_taskqueue.py @@ -16,7 +16,7 @@ from appscale.common.constants import ZK_PERSISTENT_RECONNECTS from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.taskqueue import distributed_tq +from appscale.taskqueue import distributed_tq, pg_connection_wrapper from appscale.taskqueue.constants import SHUTTING_DOWN_TIMEOUT from .protocols import taskqueue_service_pb2 from .protocols import remote_api_pb2 @@ -331,6 +331,7 @@ def main(): connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() + pg_connection_wrapper.start_postgres_dsn_watch(zk_client) register_location(zk_client, appscale_info.get_private_ip(), args.port) # Initialize tornado server diff --git a/AppTaskQueue/appscale/taskqueue/datastore_client.py b/AppTaskQueue/appscale/taskqueue/datastore_client.py index dadf52f3b2..a2d1f4c004 100644 --- a/AppTaskQueue/appscale/taskqueue/datastore_client.py +++ b/AppTaskQueue/appscale/taskqueue/datastore_client.py @@ -329,16 +329,16 @@ def _make_request(self, project_id, method, body): response.raise_for_status() except exceptions.ConnectionError as e: raise DatastoreTransientError( - 'Connection error occurred with message: {}'.format(e.message)) + 'Connection error occurred with message: {}'.format(e)) except exceptions.Timeout: raise DatastoreTransientError( 'Operation timed out after {} seconds.'.format(timeout)) except exceptions.HTTPError as e: raise DatastoreTransientError( - 'HTTP error occurred with message: {}'.format(e.message)) + 'HTTP error occurred with message: {}'.format(e)) except socket.error as e: raise DatastoreTransientError( - 'Socket error occurred with message: {}'.format(e.message)) + 'Socket error occurred with message: {}'.format(e)) api_response = remote_api_pb2.Response() api_response.ParseFromString(response.content) diff --git a/AppTaskQueue/appscale/taskqueue/distributed_tq.py b/AppTaskQueue/appscale/taskqueue/distributed_tq.py index 50069a99f7..99b54edb97 100644 --- a/AppTaskQueue/appscale/taskqueue/distributed_tq.py +++ b/AppTaskQueue/appscale/taskqueue/distributed_tq.py @@ -34,9 +34,7 @@ from .queue import ( InvalidLeaseRequest, PostgresPullQueue, - PullQueue, - PushQueue, - TransientError + PushQueue ) from .queue_manager import GlobalQueueManager from .service_manager import GlobalServiceManager @@ -156,7 +154,7 @@ def fetch_queue_stats(self, app_id, http_data): stats_response = response.queuestats.add() - if isinstance(queue, (PullQueue, PostgresPullQueue)): + if isinstance(queue, PostgresPullQueue): num_tasks = queue.total_tasks() oldest_eta = queue.oldest_eta() else: @@ -243,12 +241,9 @@ def query_and_own_tasks(self, app_id, http_data): tag = None if request.HasField("tag"): tag = request.tag.decode('utf-8') - try: - tasks = queue.lease_tasks(request.max_tasks, request.lease_seconds, - group_by_tag=request.group_by_tag, tag=tag) - except TransientError as lease_error: - pb_error = TaskQueueServiceError.TRANSIENT_ERROR - return response.Encode(), pb_error, str(lease_error) + + tasks = queue.lease_tasks(request.max_tasks, request.lease_seconds, + group_by_tag=request.group_by_tag, tag=tag) for task in tasks: task_pb = response.task.add() @@ -277,8 +272,6 @@ def add(self, source_info, http_data): try: self.__bulk_add(source_info, bulk_request, bulk_response) - except TransientError as error: - return b'', TaskQueueServiceError.TRANSIENT_ERROR, str(error) except QueueNotFound as error: return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) except DatastorePermanentError as error: @@ -317,8 +310,6 @@ def bulk_add(self, source_info, http_data): self.__bulk_add(source_info, request, response) except QueueNotFound as error: return b'', TaskQueueServiceError.UNKNOWN_QUEUE, str(error) - except TransientError as error: - return b'', TaskQueueServiceError.TRANSIENT_ERROR, str(error) except DatastorePermanentError as error: return b'', TaskQueueServiceError.INTERNAL_ERROR, str(error) except BadFilterConfiguration as error: @@ -352,7 +343,7 @@ def __bulk_add(self, source_info, request, response): add_request.mode == taskqueue_service_pb2.TaskQueueMode.PULL): queue = self.get_queue(add_request.app_id.decode('utf-8'), add_request.queue_name.decode('utf-8')) - if not isinstance(queue, (PullQueue, PostgresPullQueue)): + if not isinstance(queue, PostgresPullQueue): task_result.result = TaskQueueServiceError.INVALID_QUEUE_MODE error_found = True continue diff --git a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py index 194301149d..aa23b1a844 100644 --- a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py +++ b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py @@ -1,25 +1,84 @@ """ Postgres connection wrapper with autoreconnect functionality. """ -import functools import psycopg2 -from psycopg2 import errorcodes +from tornado.ioloop import IOLoop from appscale.taskqueue.utils import logger +class NoDSNSpecified(Exception): + pass + + class PostgresConnectionWrapper(object): + """ Implements automatic reconnection to Postgresql server. """ - def __init__(self, *args, **kwargs): - self._args = args - self._kwargs = kwargs + def __init__(self, dsn=None): + self._dsn = dsn self._connection = None + def set_dsn(self, dsn): + """ Resets PostgresConnectionWrapper to use new DSN string. + + Args: + dsn: a str representing Postgres DSN string. + """ + if self._connection and not self._connection.closed: + self.close() + self._connection = None + self._dsn = dsn + def get_connection(self): + """ Provides postgres connection. It can either return existing + working connection or establish new one. + + Returns: + An instance of psycopg2 connection. + """ if not self._connection or self._connection.closed: logger.info('Establishing new connection to Postgres server') - self._connection = psycopg2.connect(*self._args, **self._kwargs) + self._connection = psycopg2.connect(dsn=self._dsn) return self._connection def close(self): + """ Closes psycopg2 connection. + """ return self._connection.close() + + +def start_postgres_dsn_watch(zk_client): + """ Created zookeeper DataWatch for updating pg_wrapper + when Postgres DSN string is updated. + + Args: + zk_client: an instance of zookeeper client. + """ + zk_client.ensure_path('/appscale/tasks') + zk_client.DataWatch('/appscale/tasks/postgres_dsn', _update_dsn_watch) + + +def _update_dsn(new_dsn): + """ Updates Postgres DSN string to be used + for establishing connection to Postgresql server. + + Args: + new_dsn: A bytes array representing new DSN string. + """ + if not new_dsn: + raise NoDSNSpecified('No DSN string was found at zookeeper node ' + '"/appscale/tasks/postgres_dsn"') + pg_wrapper.set_dsn(new_dsn.decode('utf-8')) + + +def _update_dsn_watch(new_dsn, _): + """ Schedules update of Postgres DSN to be executed in tornado IO loop. + + Args: + new_dsn: A bytes array representing new DSN string. + """ + main_io_loop = IOLoop.instance() + main_io_loop.add_callback(_update_dsn, new_dsn) + + +pg_wrapper = PostgresConnectionWrapper() diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 3a0d5bf962..1caa4d45a9 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -3,31 +3,16 @@ import base64 import json import sys -import time -import uuid import psycopg2 -from appscale.common import appscale_info from appscale.common import retrying from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -import cassandra -from cassandra.cluster import Cluster -from cassandra.query import BatchStatement -from cassandra.query import ConsistencyLevel -from cassandra.query import SimpleStatement -from cassandra.policies import ( - DCAwareRoundRobinPolicy, - FallthroughRetryPolicy, - RetryPolicy -) -from collections import deque -from threading import Lock + +from .pg_connection_wrapper import pg_wrapper from .constants import AGE_LIMIT_REGEX -from .constants import EmptyQueue from .constants import InvalidQueueConfiguration from .constants import RATE_REGEX -from .constants import TaskNotFound from .task import InvalidTaskInfo from .task import Task from .utils import logger @@ -35,76 +20,6 @@ sys.path.append(APPSCALE_PYTHON_APPSERVER) -# The number of times to retry idempotent statements. -BASIC_RETRY_COUNT = 5 - - -class IdempotentRetryPolicy(RetryPolicy): - """ A policy used for retrying idempotent statements. """ - def on_read_timeout(self, query, consistency, required_responses, - received_responses, data_retrieved, retry_num): - """ This is called when a ReadTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_write_timeout(self, query, consistency, write_type, - required_responses, received_responses, retry_num): - """ This is called when a WriteTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_unavailable(self, query, consistency, required_replicas, - alive_replicas, retry_num): - """ The coordinator has detected an insufficient number of live replicas. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_replicas: The number of replicas required to complete query. - alive_replicas: The number of replicas that are ready to complete query. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - -# A basic policy that retries idempotent operations. -BASIC_RETRIES = IdempotentRetryPolicy() - -# A policy that does not retry statements. -NO_RETRIES = FallthroughRetryPolicy() - -TRANSIENT_CASSANDRA_ERRORS = ( - cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, - cassandra.OperationTimedOut, cassandra.cluster.NoHostAvailable) - -# The load balancing policy to use when connecting to a cluster. -LB_POLICY = DCAwareRoundRobinPolicy() - # This format is used when returning the long name of a queue as # part of a leased task. This is to mimic a GCP oddity/bug. LONG_QUEUE_FORM = 'projects/{app}/taskqueues/{queue}' @@ -127,41 +42,10 @@ def on_unavailable(self, query, consistency, required_replicas, } -def current_time_ms(): - """ Gets the current time with millisecond precision. This allows the server - to return exactly what Cassandra will store. - - Returns: - A datetime object with the current time. - """ - now = datetime.datetime.utcnow() - new_microsecond = int(now.microsecond / 1000) * 1000 - return now.replace(microsecond=new_microsecond) - - -def next_key(key): - """ Calculates the next partition value of a key. Note: Cassandra BOP orders - 'b' before 'aa'. - - Args: - key: A string containing a Cassandra key. - Returns: - A string containing the next partition value. - """ - mutable_key = list(key) - mutable_key[-1] = chr(ord(key[-1]) + 1) - return ''.join(mutable_key) - - class InvalidLeaseRequest(Exception): pass -class TransientError(Exception): - """ Indicates that the queue was unable to complete an operation. """ - pass - - class Queue(object): """ Represents a queue created by an App Engine application. """ @@ -332,17 +216,27 @@ class PostgresPullQueue(Queue): TTL_INTERVAL_AFTER_DELETED = '7 days' - def __init__(self, queue_info, app, pg_connection_wrapper): + # The maximum number of tasks that can be leased at a time. + MAX_LEASE_AMOUNT = 1000 + + # Tasks can be leased for up to a week. + MAX_LEASE_TIME = 60 * 60 * 24 * 7 + + # The maximum number of index entries to cache. + MAX_CACHE_SIZE = 500 + + # The number of seconds to keep the index cache. + MAX_CACHE_DURATION = 30 + + def __init__(self, queue_info, app): """ Create a PostgresPullQueue object. Args: queue_info: A dictionary containing queue info. app: A string containing the application ID. - pg_connection_wrapper: A psycopg2 connection wrapper. """ super(PostgresPullQueue, self).__init__(queue_info, app) self.connection_key = self.app - self.pg_connection_wrapper = pg_connection_wrapper self.ensure_project_schema_created() self.queue_id = self.ensure_queue_registered() self.ensure_tasks_table_created() @@ -351,13 +245,13 @@ def __init__(self, queue_info, app, pg_connection_wrapper): # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) def ensure_project_schema_created(self): - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: logger.info('Ensuring "{schema_name}" schema is created' .format(schema_name=self.schema_name)) pg_cursor.execute( - 'CREATE SCHEMA IF NOT EXISTS "{schema_name};' + 'CREATE SCHEMA IF NOT EXISTS "{schema_name}";' .format(schema_name=self.schema_name) ) @@ -365,7 +259,7 @@ def ensure_project_schema_created(self): # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) def ensure_queue_registered(self): - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: logger.info('Ensuring "{}" table is created' @@ -404,7 +298,7 @@ def ensure_queue_registered(self): # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) def ensure_tasks_table_created(self): - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: logger.info('Ensuring "{}" table is created' @@ -469,7 +363,7 @@ def add_task(self, task): except AttributeError: lease_expires = 'current_timestamp' - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() try: with pg_connection: with pg_connection.cursor() as pg_cursor: @@ -514,7 +408,7 @@ def get_task(self, task, omit_payload=False): else: columns = ['payload', 'task_name', 'time_enqueued', 'lease_expires', 'lease_count', 'tag'] - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -540,7 +434,7 @@ def delete_task(self, task): Args: task: A Task object. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -564,7 +458,7 @@ def update_lease(self, task, new_lease_seconds): Returns: A Task object. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -629,7 +523,7 @@ def update_task(self, task, new_lease_seconds): else: old_eta_verification = '' - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -658,7 +552,7 @@ def list_tasks(self, limit=100): """ columns = ['task_name', 'time_enqueued', 'lease_expires', 'lease_count', 'tag'] - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -687,13 +581,13 @@ def lease_tasks(self, num_tasks, lease_seconds, group_by_tag=False, Returns: A list of Task objects. """ - if num_tasks > PullQueue.MAX_LEASE_AMOUNT: + if num_tasks > self.MAX_LEASE_AMOUNT: raise InvalidLeaseRequest('Only {} tasks can be leased at a time' - .format(PullQueue.MAX_LEASE_AMOUNT)) + .format(self.MAX_LEASE_AMOUNT)) - if lease_seconds > PullQueue.MAX_LEASE_TIME: + if lease_seconds > self.MAX_LEASE_TIME: raise InvalidLeaseRequest('Tasks can only be leased for up to {} seconds' - .format(PullQueue.MAX_LEASE_TIME)) + .format(self.MAX_LEASE_TIME)) start_time = datetime.datetime.utcnow() logger.debug('Leasing {} tasks for {} sec. group_by_tag={}, tag={}'. @@ -720,7 +614,7 @@ def lease_tasks(self, num_tasks, lease_seconds, group_by_tag=False, '"{table}".{col}'.format(table=self.tasks_table_name, col=column) for column in columns ] - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -761,7 +655,7 @@ def lease_tasks(self, num_tasks, lease_seconds, group_by_tag=False, def purge(self): """ Remove all tasks from queue. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -809,7 +703,7 @@ def total_tasks(self): Returns: An integer specifying the number of tasks in the queue. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -827,7 +721,7 @@ def oldest_eta(self): A datetime object specifying the oldest ETA or None if there are no tasks. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -842,7 +736,7 @@ def oldest_eta(self): def flush_deleted(self): """ Removes all tasks which were deleted more than week ago. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -895,7 +789,7 @@ def _get_earliest_tag(self): Returns: A string containing a tag or None. """ - pg_connection = self.pg_connection_wrapper.get_connection() + pg_connection = pg_wrapper.get_connection() with pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( @@ -945,1055 +839,3 @@ def __repr__(self): """ return ''.format( self.name, self.app, self.task_retry_limit) - - -class PullQueue(Queue): - - # The maximum number of tasks that can be leased at a time. - MAX_LEASE_AMOUNT = 1000 - - # Tasks can be leased for up to a week. - MAX_LEASE_TIME = 60 * 60 * 24 * 7 - - # The maximum number of index entries to cache. - MAX_CACHE_SIZE = 500 - - # The number of seconds to keep the index cache. - MAX_CACHE_DURATION = 30 - - # The seconds to wait after fetching 0 index results before retrying. - EMPTY_RESULTS_COOLDOWN = 5 - - # The number of times to retry connecting to Cassandra. - INITIAL_CONNECT_RETRIES = 20 - - # The keyspace used for all tables - KEYSPACE = "Keyspace1" - - def __init__(self, queue_info, app): - """ Create a PullQueue object. - - Args: - queue_info: A dictionary containing queue info. - app: A string containing the application ID. - """ - self.index_cache = {'global': {}, 'by_tag': {}} - self.index_cache_lock = Lock() - - hosts = appscale_info.get_db_ips() - remaining_retries = self.INITIAL_CONNECT_RETRIES - while True: - try: - self.cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES, - load_balancing_policy=LB_POLICY) - self.session = self.cluster.connect(self.KEYSPACE) - break - except cassandra.cluster.NoHostAvailable as connection_error: - remaining_retries -= 1 - if remaining_retries < 0: - raise connection_error - time.sleep(3) - - self.session.default_consistency_level = ConsistencyLevel.QUORUM - - super(PullQueue, self).__init__(queue_info, app) - - def add_task(self, task, retries=5): - """ Adds a task to the queue. - - Args: - task: A Task object. - retries: The number of times to retry adding the task. - Raises: - InvalidTaskInfo if the task ID already exists in the queue. - """ - if not hasattr(task, 'payloadBase64'): - raise InvalidTaskInfo('{} is missing a payload.'.format(task)) - - enqueue_time = datetime.datetime.utcnow() - try: - lease_expires = task.leaseTimestamp - except AttributeError: - lease_expires = datetime.datetime.utcfromtimestamp(0) - - parameters = { - 'app': self.app, - 'queue': self.name, - 'id': task.id, - 'payload': task.payloadBase64, - 'enqueued': enqueue_time, - 'retry_count': 0, - 'lease_expires': lease_expires, - 'op_id': uuid.uuid4() - } - - try: - parameters['tag'] = task.tag - except AttributeError: - parameters['tag'] = None - - self._insert_task(parameters, retries) - - task.queueName = self.name - task.enqueueTimestamp = enqueue_time - task.leaseTimestamp = lease_expires - - # Create index entries so the task can be queried by ETA and (tag, ETA). - # This can't be done in a batch because the payload from the previous - # insert can be up to 1MB, and Cassandra does not approve of large batches. - try: - tag = task.tag - except AttributeError: - # The API does not differentiate between empty and unspecified tags. - tag = '' - - insert_eta_index = SimpleStatement(""" - INSERT INTO pull_queue_eta_index (app, queue, eta, id, tag) - VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s) - """, retry_policy=BASIC_RETRIES) - parameters = { - 'app': self.app, - 'queue': self.name, - 'eta': task.get_eta(), - 'id': task.id, - 'tag': tag - } - self.session.execute(insert_eta_index, parameters) - - insert_tag_index = SimpleStatement(""" - INSERT INTO pull_queue_tags_index (app, queue, tag, eta, id) - VALUES (%(app)s, %(queue)s, %(tag)s, %(eta)s, %(id)s) - """, retry_policy=BASIC_RETRIES) - self.session.execute(insert_tag_index, parameters) - - logger.debug('Added task: {}'.format(task)) - - def get_task(self, task, omit_payload=False): - """ Gets a task from the queue. - - Args: - task: A Task object. - omit_payload: A boolean indicating that the payload should not be - fetched. - Returns: - A task object or None. - """ - payload = 'payload,' - if omit_payload: - payload = '' - select_task = """ - SELECT {payload} enqueued, lease_expires, retry_count, tag - FROM pull_queue_tasks - WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s - """.format(payload=payload) - statement = SimpleStatement(select_task, - consistency_level=ConsistencyLevel.SERIAL) - parameters = {'app': self.app, 'queue': self.name, 'id': task.id} - try: - response = self.session.execute(statement, parameters)[0] - except IndexError: - return None - - task_info = { - 'id': task.id, - 'queueName': self.name, - 'enqueueTimestamp': response.enqueued, - 'leaseTimestamp': response.lease_expires, - 'retry_count': response.retry_count, - } - - if response.tag is not None: - task_info['tag'] = response.tag - - if not omit_payload: - task_info['payloadBase64'] = response.payload - - return Task(task_info) - - def delete_task(self, task): - """ Deletes a task from the queue. - - Args: - task: A Task object. - """ - # Retrieve the ETA info so that the index can also be deleted. - task = self.get_task(task, omit_payload=True) - if task is not None: - self._delete_task_and_index(task) - - logger.debug('Deleted task: {}'.format(task)) - - def update_lease(self, task, new_lease_seconds, retries=5): - """ Updates the duration of a task lease. - - Args: - task: A Task object. - new_lease_seconds: An integer specifying when to set the new ETA. It - represents the number of seconds from now. - retries: The number of times to try the update. - Returns: - A Task object. - """ - new_eta = current_time_ms() + datetime.timedelta(seconds=new_lease_seconds) - parameters = { - 'app': self.app, - 'queue': self.name, - 'id': task.id, - 'old_eta': task.get_eta(), - 'new_eta': new_eta, - 'current_time': datetime.datetime.utcnow(), - 'op_id': uuid.uuid4() - } - self._update_lease(parameters, retries) - - task.leaseTimestamp = new_eta - return task - - def update_task(self, task, new_lease_seconds, retries=5): - """ Updates leased tasks. - - Args: - task: A task object. - new_lease_seconds: An integer specifying when to set the new ETA. It - represents the number of seconds from now. - retries: The number of times to try the update. - """ - new_eta = current_time_ms() + datetime.timedelta(seconds=new_lease_seconds) - parameters = { - 'app': self.app, - 'queue': self.name, - 'id': task.id, - 'new_eta': new_eta, - 'current_time': datetime.datetime.utcnow(), - 'op_id': uuid.uuid4() - } - - try: - old_eta = task.leaseTimestamp - except AttributeError: - old_eta = None - if old_eta == datetime.datetime.utcfromtimestamp(0): - old_eta = None - - if old_eta is not None: - parameters['old_eta'] = old_eta - self._update_lease(parameters, retries) - else: - self._update_lease(parameters, retries, check_lease=False) - - task.leaseTimestamp = new_eta - return task - - def list_tasks(self, limit=100): - """ List all non-deleted tasks in the queue. - - Args: - limit: An integer specifying the maximum number of tasks to list. - Returns: - A list of Task objects. - """ - session = self.session - - tasks = [] - start_date = datetime.datetime.utcfromtimestamp(0) - task_id = '' - while True: - query_tasks = """ - SELECT eta, id, tag FROM pull_queue_eta_index - WHERE token(app, queue, eta, id) > token(%(app)s, %(queue)s, %(eta)s, %(id)s) - AND token(app, queue, eta, id) < token(%(app)s, %(next_queue)s, 0, '') - LIMIT {limit} - """.format(limit=limit) - parameters = {'app': self.app, 'queue': self.name, 'eta': start_date, - 'id': task_id, 'next_queue': next_key(self.name)} - results = [result for result in session.execute(query_tasks, parameters)] - - if not results: - break - - satisfied_request = False - for result in results: - task = self.get_task(Task({'id': result.id}), omit_payload=True) - if task is None: - self._delete_index(result.eta, result.id, result.tag) - continue - - tasks.append(task) - if len(tasks) >= limit: - satisfied_request = True - break - if satisfied_request: - break - - # Update the cursor. - start_date = results[-1].eta - task_id = results[-1].id - - return tasks - - def lease_tasks(self, num_tasks, lease_seconds, group_by_tag=False, - tag=None): - """ Acquires a lease on tasks from the queue. - - Args: - num_tasks: An integer specifying the number of tasks to lease. - lease_seconds: An integer specifying how long to lease the tasks. - group_by_tag: A boolean indicating that only tasks of one tag should - be leased. - tag: A string containing the tag for the task. - Returns: - A list of Task objects. - """ - if num_tasks > self.MAX_LEASE_AMOUNT: - raise InvalidLeaseRequest( - 'Only {} tasks can be leased at a time'.format(self.MAX_LEASE_AMOUNT)) - - if lease_seconds > self.MAX_LEASE_TIME: - raise InvalidLeaseRequest('Tasks can only be leased for up to {} seconds' - .format(self.MAX_LEASE_TIME)) - - start_time = datetime.datetime.utcnow() - logger.debug('Leasing {} tasks for {} sec. group_by_tag={}, tag={}'. - format(num_tasks, lease_seconds, group_by_tag, tag)) - # If not specified, the tag is assumed to be that of the oldest task. - if group_by_tag and tag is None: - try: - tag = self._get_earliest_tag() - except EmptyQueue: - return [] - - # Fetch available tasks and try to lease them until the requested number - # has been leased or until the index has been exhausted. - leased = [] - leased_ids = set() - indices_seen = set() - new_eta = None - while True: - tasks_needed = num_tasks - len(leased) - if tasks_needed < 1: - break - - try: - index_results = self._query_available_tasks( - tasks_needed, group_by_tag, tag) - except TRANSIENT_CASSANDRA_ERRORS: - raise TransientError('Unable to query available tasks') - - # The following prevents any task from being leased multiple times in the - # same request. If the lease time is very small, it's possible for the - # lease to expire while results are still being fetched. - index_results = [result for result in index_results - if result.id not in leased_ids] - - # If there are no more available tasks, return whatever has been leased. - if not index_results: - break - - # Determine new_eta when the first index_results are received - if new_eta is None: - new_eta = current_time_ms() + datetime.timedelta(seconds=lease_seconds) - - lease_results = self._lease_batch(index_results, new_eta) - for index_num, index_result in enumerate(index_results): - task = lease_results[index_num] - if task is None: - # If this lease request has previously encountered this index, it's - # likely that either the index is invalid or that the task has - # exceeded its retry_count. - if index_result.id in indices_seen: - self._resolve_task(index_result) - indices_seen.add(index_result.id) - continue - - leased.append(task) - leased_ids.add(task.id) - - time_elapsed = datetime.datetime.utcnow() - start_time - logger.debug('Leased {} tasks [time elapsed: {}]'.format(len(leased), str(time_elapsed))) - logger.debug('IDs leased: {}'.format([task.id for task in leased])) - return leased - - def total_tasks(self): - """ Get the total number of tasks in the queue. - - Returns: - An integer specifying the number of tasks in the queue. - """ - select_count = """ - SELECT COUNT(*) FROM pull_queue_tasks - WHERE token(app, queue, id) >= token(%(app)s, %(queue)s, '') - AND token(app, queue, id) < token(%(app)s, %(next_queue)s, '') - """ - parameters = {'app': self.app, 'queue': self.name, - 'next_queue': next_key(self.name)} - return self.session.execute(select_count, parameters)[0].count - - def oldest_eta(self): - """ Get the ETA of the oldest task - - Returns: - A datetime object specifying the oldest ETA or None if there are no - tasks. - """ - session = self.session - select_oldest = """ - SELECT eta FROM pull_queue_eta_index - WHERE token(app, queue, eta, id) >= token(%(app)s, %(queue)s, 0, '') - AND token(app, queue, eta, id) < token(%(app)s, %(next_queue)s, 0, '') - LIMIT 1 - """ - parameters = {'app': self.app, 'queue': self.name, - 'next_queue': next_key(self.name)} - try: - return session.execute(select_oldest, parameters)[0].eta - except IndexError: - return None - - def purge(self): - """ Remove all tasks from queue. - - Cassandra cannot perform a range scan during a delete, so this function - selects all the tasks before deleting them one at a time. - """ - select_tasks = """ - SELECT id, enqueued, lease_expires, tag FROM pull_queue_tasks - WHERE token(app, queue, id) >= token(%(app)s, %(queue)s, '') - AND token(app, queue, id) < token(%(app)s, %(next_queue)s, '') - """ - parameters = {'app': self.app, 'queue': self.name, - 'next_queue': next_key(self.name)} - results = self.session.execute(select_tasks, parameters) - - for result in results: - task_info = {'id': result.id, - 'enqueueTimestamp': result.enqueued, - 'leaseTimestamp': result.lease_expires} - if result.tag: - task_info['tag'] = result.tag - - self._delete_task_and_index(Task(task_info)) - - def to_json(self, include_stats=False, fields=None): - """ Generate a JSON representation of the queue. - - Args: - include_stats: A boolean indicating whether or not to include stats. - fields: A tuple of fields to include in the output. - Returns: - A string in JSON format representing the queue. - """ - if fields is None: - fields = QUEUE_FIELDS - - queue = {} - if 'kind' in fields: - queue['kind'] = 'taskqueues#taskqueue' - - if 'id' in fields: - queue['id'] = self.name - - if 'maxLeases' in fields: - queue['maxLeases'] = self.task_retry_limit - - stat_fields = () - for field in fields: - if isinstance(field, dict) and 'stats' in field: - stat_fields = field['stats'] - - if stat_fields and include_stats: - queue['stats'] = self._get_stats(fields=stat_fields) - - return json.dumps(queue) - - def _task_mutated_by_id(self, task_id, op_id): - """ Checks if the task entry was last mutated with the given ID. - - Args: - task_id: A string specifying the task ID. - op_id: A uuid identifying a process that tried to mutate the task. - Returns: - A boolean indicating that the task was last mutated with the ID. - """ - select_statement = SimpleStatement(""" - SELECT op_id FROM pull_queue_tasks - WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s - """, consistency_level=ConsistencyLevel.SERIAL) - parameters = { - 'app': self.app, - 'queue': self.name, - 'id': task_id, - 'op_id': op_id - } - try: - result = self.session.execute(select_statement, parameters)[0] - except IndexError: - raise TaskNotFound('Task does not exist: {}'.format(task_id)) - - return result.op_id == op_id - - def _insert_task(self, parameters, retries): - """ Insert task entry into pull_queue_tasks. - - Args: - parameters: A dictionary specifying the task parameters. - retries: The number of times to try the insert. - Raises: - InvalidTaskInfo if the task ID already exists in the queue. - """ - insert_statement = SimpleStatement(""" - INSERT INTO pull_queue_tasks ( - app, queue, id, payload, - enqueued, lease_expires, retry_count, tag, op_id - ) - VALUES ( - %(app)s, %(queue)s, %(id)s, %(payload)s, - %(enqueued)s, %(lease_expires)s, %(retry_count)s, %(tag)s, %(op_id)s - ) - IF NOT EXISTS - """, retry_policy=NO_RETRIES) - try: - parameters['payload'] = parameters['payload'].decode('utf-8') - result = self.session.execute(insert_statement, parameters) - except TRANSIENT_CASSANDRA_ERRORS as error: - retries_left = retries - 1 - if retries_left <= 0: - raise - logger.warning( - 'Encountered error while inserting task: {}. Retrying.'.format(error)) - return self._insert_task(parameters, retries_left) - - if result.was_applied: - return - - try: - success = self._task_mutated_by_id(parameters['id'], parameters['op_id']) - except TaskNotFound: - raise TransientError('Unable to insert task') - - if not success: - error = InvalidTaskInfo() - error.message = 'Task name already taken: {}'.format(parameters['id']) - raise error - - def _update_lease(self, parameters, retries, check_lease=True): - """ Update lease expiration on a task entry. - - Args: - parameters: A dictionary specifying the new parameters. - retries: The number of times to try the update. - check_lease: A boolean specifying that the old lease_expires field must - match the one provided. - Raises: - InvalidLeaseRequest if the lease has already expired. - """ - update_task = """ - UPDATE pull_queue_tasks - SET lease_expires = %(new_eta)s, op_id = %(op_id)s - WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s - IF lease_expires > %(current_time)s - """ - - # When reporting errors, GCP does not differentiate between a lease - # expiration and the client providing the wrong old_eta. - if check_lease: - update_task += 'AND lease_expires = %(old_eta)s' - - update_statement = SimpleStatement(update_task, retry_policy=NO_RETRIES) - try: - result = self.session.execute(update_statement, parameters) - except TRANSIENT_CASSANDRA_ERRORS as error: - retries_left = retries - 1 - if retries_left <= 0: - raise - logger.warning( - 'Encountered error while updating lease: {}. Retrying.'.format(error)) - return self._update_lease(parameters, retries_left, - check_lease=check_lease) - - if result.was_applied: - return - - if not self._task_mutated_by_id(parameters['id'], parameters['op_id']): - raise InvalidLeaseRequest('The task lease has expired.') - - def _query_index(self, num_tasks, group_by_tag, tag): - """ Query the index table for available tasks. - - Args: - num_tasks: An integer specifying the number of tasks to lease. - group_by_tag: A boolean indicating that only tasks of one tag should - be leased. - tag: A string containing the tag for the task. - - Returns: - A list of results from the index table. - """ - if group_by_tag: - query_tasks = """ - SELECT tag, eta, id FROM pull_queue_tags_index - WHERE token(app, queue, tag, eta, id) >= token(%(app)s, %(queue)s, %(tag)s, 0, '') - AND token(app, queue, tag, eta, id) <= token(%(app)s, %(queue)s, %(tag)s, dateof(now()), '') - LIMIT {limit} - """.format(limit=num_tasks) - parameters = {'app': self.app, 'queue': self.name, 'tag': tag} - results = self.session.execute(query_tasks, parameters) - else: - query_tasks = """ - SELECT eta, id, tag FROM pull_queue_eta_index - WHERE token(app, queue, eta, id) >= token(%(app)s, %(queue)s, 0, '') - AND token(app, queue, eta, id) <= token(%(app)s, %(queue)s, dateof(now()), '') - LIMIT {limit} - """.format(limit=num_tasks) - parameters = {'app': self.app, 'queue': self.name} - results = self.session.execute(query_tasks, parameters) - return results - - def _query_available_tasks(self, num_tasks, group_by_tag, tag): - """ Query the cache or index table for available tasks. - - Args: - num_tasks: An integer specifying the number of tasks to lease. - group_by_tag: A boolean indicating that only tasks of one tag should - be leased. - tag: A string containing the tag for the task. - - Returns: - A list of index results. - """ - # If the request is larger than the max cache size, don't use the cache. - if num_tasks > self.MAX_CACHE_SIZE: - return self._query_index(num_tasks, group_by_tag, tag) - - with self.index_cache_lock: - if group_by_tag: - if tag not in self.index_cache['by_tag']: - self.index_cache['by_tag'][tag] = {} - tag_cache = self.index_cache['by_tag'][tag] - else: - tag_cache = self.index_cache['global'] - - # If results have never been fetched, populate the cache. - if not tag_cache: - results = self._query_index(self.MAX_CACHE_SIZE, group_by_tag, tag) - tag_cache['queue'] = deque(results) - tag_cache['last_fetch'] = datetime.datetime.now() - tag_cache['last_results'] = len(tag_cache['queue']) - - # If 0 results were fetched recently, don't try fetching again. - recently = datetime.datetime.now() - datetime.timedelta( - seconds=self.EMPTY_RESULTS_COOLDOWN) - if (not tag_cache['queue'] and tag_cache['last_results'] == 0 and - tag_cache['last_fetch'] > recently): - return [] - - # If the cache is outdated or insufficient, update it. - outdated = datetime.datetime.now() - datetime.timedelta( - seconds=self.MAX_CACHE_DURATION) - if (num_tasks > len(tag_cache['queue']) or - tag_cache['last_fetch'] < outdated): - results = self._query_index(self.MAX_CACHE_SIZE, group_by_tag, tag) - tag_cache['queue'] = deque(results) - tag_cache['last_fetch'] = datetime.datetime.now() - tag_cache['last_results'] = len(tag_cache['queue']) - - results = [] - for _ in range(num_tasks): - try: - results.append(tag_cache['queue'].popleft()) - except IndexError: - # The queue is empty. - break - - return results - - def _get_earliest_tag(self): - """ Get the tag with the earliest ETA. - - Returns: - A string containing a tag. - Raises: - EmptyQueue if there are no tasks. - """ - get_earliest_tag = """ - SELECT tag FROM pull_queue_eta_index - WHERE token(app, queue, eta, id) > token(%(app)s, %(queue)s, 0, '') - LIMIT 1 - """ - parameters = {'app': self.app, 'queue': self.name} - try: - tag = self.session.execute(get_earliest_tag, parameters)[0].tag - except IndexError: - raise EmptyQueue('No entries in queue index') - return tag - - def _increment_count_async(self, task): - """ Update retry count for a task. - - Args: - task: A Task object. - """ - session = self.session - - statement = """ - UPDATE pull_queue_tasks - SET retry_count=? - WHERE app=? AND queue=? AND id=? - IF retry_count=? - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - update_count = self.prepared_statements[statement] - - old_count = task.retry_count - new_count = task.retry_count + 1 - params = [new_count, self.app, self.name, task.id, old_count] - bound_update = update_count.bind(params) - bound_update.retry_policy = NO_RETRIES - self.session.execute_async(bound_update) - - def _lease_batch(self, indexes, new_eta): - """ Acquires a lease on tasks in the queue. - - Args: - indexes: An iterable containing results from the index table. - new_eta: A datetime object containing the new lease expiration. - - Returns: - A list of task objects or None if unable to acquire a lease. - """ - leased = [None for _ in indexes] - session = self.session - op_id = uuid.uuid4() - - lease_statement = """ - UPDATE pull_queue_tasks - SET lease_expires = ?, op_id = ? - WHERE app = ? AND queue = ? AND id = ? - IF lease_expires < ? - """ - if self.task_retry_limit != 0: - lease_statement += 'AND retry_count < {}'.format(self.task_retry_limit) - lease_task = session.prepare(lease_statement) - lease_task.retry_policy = NO_RETRIES - current_time = datetime.datetime.utcnow() - - update_futures = [] - for index in indexes: - params = (new_eta, op_id, self.app, self.name, index.id, current_time) - update_futures.append(session.execute_async(lease_task, params)) - - # Check which lease operations succeeded. - statement = """ - SELECT payload, enqueued, retry_count, tag, op_id - FROM pull_queue_tasks - WHERE app=? AND queue=? AND id=? - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - select = self.prepared_statements[statement] - - futures = {} - for result_num, update_future in enumerate(update_futures): - try: - result = update_future.result() - success = True - except cassandra.DriverException: - result = None - success = False - - if success and not result.was_applied: - # The lease operation failed, so keep this index as None. - continue - - index = indexes[result_num] - bound_select = select.bind([self.app, self.name, index.id]) - bound_select.consistency_level = ConsistencyLevel.SERIAL - future = session.execute_async(bound_select) - futures[result_num] = (future, not success) - - index_update_futures = [] - for result_num, (future, lease_timed_out) in futures.items(): - index = indexes[result_num] - try: - read_result = future.result()[0] - except (TRANSIENT_CASSANDRA_ERRORS, IndexError): - raise TransientError('Unable to read task {}'.format(index.id)) - - # If the operation IDs do not match, the lease was not successful. - if lease_timed_out and read_result.op_id != op_id: - continue - - task_info = { - 'queueName': self.name, - 'id': index.id, - 'payloadBase64': read_result.payload, - 'enqueueTimestamp': read_result.enqueued, - 'leaseTimestamp': new_eta, - 'retry_count': read_result.retry_count - } - if read_result.tag: - task_info['tag'] = read_result.tag - task = Task(task_info) - leased[result_num] = task - - self._increment_count_async(task) - index_update_futures.append(self._update_index_async(index, task)) - self._update_stats() - - # Make sure all of the index updates complete successfully. - for index_update in index_update_futures: - index_update.result() - - return leased - - def _update_index_async(self, old_index, task): - """ Updates the index table after leasing a task. - - Args: - old_index: The row to remove from the index table. - task: A Task object to create a new index entry for. - Returns: - A cassandra-driver future. - """ - session = self.session - - old_eta = old_index.eta - update_index = BatchStatement(retry_policy=BASIC_RETRIES) - - statement = """ - DELETE FROM pull_queue_eta_index - WHERE app=? - AND queue=? - AND eta=? - AND id=? - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - delete_old_eta_index = self.prepared_statements[statement] - - parameters = [self.app, self.name, old_eta, task.id] - update_index.add(delete_old_eta_index, parameters) - - statement = """ - DELETE FROM pull_queue_tags_index - WHERE app=? - AND queue=? - AND tag=? - AND eta=? - AND id=? - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - delete_old_tag_index = self.prepared_statements[statement] - - parameters = [self.app, self.name, old_index.tag, old_eta, task.id] - update_index.add(delete_old_tag_index, parameters) - - try: - tag = task.tag - except AttributeError: - tag = '' - - statement = """ - INSERT INTO pull_queue_eta_index (app, queue, eta, id, tag) - VALUES (?, ?, ?, ?, ?) - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - create_new_eta_index = self.prepared_statements[statement] - - parameters = [self.app, self.name, task.leaseTimestamp, task.id, tag] - update_index.add(create_new_eta_index, parameters) - - statement = """ - INSERT INTO pull_queue_tags_index (app, queue, tag, eta, id) - VALUES (?, ?, ?, ?, ?) - """ - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - create_new_tag_index = self.prepared_statements[statement] - - parameters = [self.app, self.name, tag, task.leaseTimestamp, task.id] - update_index.add(create_new_tag_index, parameters) - - return self.session.execute_async(update_index) - - def _delete_index(self, eta, task_id, tag): - """ Deletes an index entry for a task. - - Args: - eta: A datetime object. - task_id: A string containing the task ID. - tag: A string containing the task tag. - """ - delete_eta_index = """ - DELETE FROM pull_queue_eta_index - WHERE app = %(app)s - AND queue = %(queue)s - AND eta = %(eta)s - AND id = %(id)s - """ - parameters = {'app': self.app, 'queue': self.name, 'eta': eta, - 'id': task_id} - self.session.execute(delete_eta_index, parameters) - - delete_tag_index = """ - DELETE FROM pull_queue_tags_index - WHERE app = %(app)s - AND queue = %(queue)s - AND tag = %(tag)s - AND eta = %(eta)s - AND id = %(id)s - """ - parameters = {'app': self.app, 'queue': self.name, 'tag': tag, 'eta': eta, - 'id': task_id} - self.session.execute(delete_tag_index, parameters) - - def _delete_task_and_index(self, task, retries=5): - """ Deletes a task and its index. - - Args: - task: A Task object. - """ - delete_task = SimpleStatement(""" - DELETE FROM pull_queue_tasks - WHERE app = %(app)s AND queue = %(queue)s AND id = %(id)s - IF EXISTS - """, retry_policy=NO_RETRIES) - parameters = {'app': self.app, 'queue': self.name, 'id': task.id} - try: - self.session.execute(delete_task, parameters=parameters) - except TRANSIENT_CASSANDRA_ERRORS as error: - retries_left = retries - 1 - if retries_left <= 0: - raise - logger.warning( - 'Encountered error while deleting task: {}. Retrying.'.format(error)) - return self._delete_task_and_index(task, retries=retries_left) - - delete_task_eta_index = SimpleStatement(""" - DELETE FROM pull_queue_eta_index - WHERE app = %(app)s - AND queue = %(queue)s - AND eta = %(eta)s - AND id = %(id)s - """) - parameters = { - 'app': self.app, - 'queue': self.name, - 'eta': task.get_eta(), - 'id': task.id - } - self.session.execute(delete_task_eta_index, parameters=parameters) - - try: - tag = task.tag - except AttributeError: - tag = '' - - delete_task_tag_index = SimpleStatement(""" - DELETE FROM pull_queue_tags_index - WHERE app = %(app)s - AND queue = %(queue)s - AND tag = %(tag)s - AND eta = %(eta)s - AND id = %(id)s - """) - parameters = { - 'app': self.app, - 'queue': self.name, - 'tag': tag, - 'eta': task.get_eta(), - 'id': task.id - } - self.session.execute(delete_task_tag_index, parameters=parameters) - - def _resolve_task(self, index): - """ Cleans up expired tasks and indices. - - Args: - index: An index result. - """ - task = self.get_task(Task({'id': index.id}), omit_payload=True) - if task is None: - self._delete_index(index.eta, index.id, index.tag) - return - - if self.task_retry_limit != 0 and task.expired(self.task_retry_limit): - self._delete_task_and_index(task) - return - - # If the index does not match the task, update it. - if task.leaseTimestamp != index.eta: - self._update_index_async(index, task).result() - - def _update_stats(self): - """ Write queue metadata for keeping track of statistics. """ - session = self.session - # Stats are only kept for one hour. - ttl = 60 * 60 - statement = """ - INSERT INTO pull_queue_leases (app, queue, leased) - VALUES (?, ?, ?) - USING TTL {ttl} - """.format(ttl=ttl) - if statement not in self.prepared_statements: - self.prepared_statements[statement] = session.prepare(statement) - record_lease = self.prepared_statements[statement] - - parameters = [self.app, self.name, datetime.datetime.utcnow()] - self.session.execute_async(record_lease, parameters) - - def _get_stats(self, fields): - """ Fetch queue statistics. - - Args: - fields: A tuple of fields to include in the results. - Returns: - A dictionary containing queue statistics. - """ - session = self.session - stats = {} - - if 'totalTasks' in fields: - stats['totalTasks'] = self.total_tasks() - - if 'oldestTask' in fields: - epoch = datetime.datetime.utcfromtimestamp(0) - oldest_eta = self.oldest_eta() or epoch - stats['oldestTask'] = int((oldest_eta - epoch).total_seconds()) - - if 'leasedLastMinute' in fields: - select_count = """ - SELECT COUNT(*) from pull_queue_leases - WHERE token(app, queue, leased) > token(%(app)s, %(queue)s, %(ts)s) - AND token(app, queue, leased) <= - token(%(app)s, %(queue)s, dateof(now())) - """ - start_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=60) - parameters = {'app': self.app, 'queue': self.name, 'ts': start_time} - leased_last_minute = session.execute(select_count, parameters)[0].count - stats['leasedLastMinute'] = leased_last_minute - - if 'leasedLastHour' in fields: - select_count = """ - SELECT COUNT(*) from pull_queue_leases - WHERE token(app, queue, leased) > token(%(app)s, %(queue)s, %(ts)s) - AND token(app, queue, leased) <= - token(%(app)s, %(queue)s, dateof(now())) - """ - start_time = datetime.datetime.utcnow() - datetime.timedelta(minutes=60) - parameters = {'app': self.app, 'queue': self.name, 'ts': start_time} - leased_last_hour = session.execute(select_count, parameters)[0].count - stats['leasedLastHour'] = leased_last_hour - - return stats - - def __repr__(self): - """ Generates a string representation of the queue. - - Returns: - A string representing the PullQueue. - """ - return ''.format( - self.name, self.app, self.task_retry_limit) diff --git a/AppTaskQueue/appscale/taskqueue/queue_manager.py b/AppTaskQueue/appscale/taskqueue/queue_manager.py index e578eaeb04..acf56aaa8a 100644 --- a/AppTaskQueue/appscale/taskqueue/queue_manager.py +++ b/AppTaskQueue/appscale/taskqueue/queue_manager.py @@ -1,23 +1,20 @@ """ Keeps track of queue configuration details for producer connections. """ import json +import random from kazoo.exceptions import ZookeeperError +from tornado import gen from tornado.ioloop import IOLoop, PeriodicCallback -from appscale.taskqueue.pg_connection_wrapper import PostgresConnectionWrapper -from appscale.taskqueue.queue import PostgresPullQueue -from appscale.taskqueue.utils import create_celery_for_app -from .queue import PullQueue -from .queue import PushQueue -from .utils import logger +from .queue import PushQueue, PostgresPullQueue +from .utils import logger, create_celery_for_app class ProjectQueueManager(dict): """ Keeps track of queue configuration details for a single project. """ FLUSH_DELETED_INTERVAL = 3 * 60 * 60 # 3h - MAX_POSTGRES_BACKED_PROJECTS = 20 def __init__(self, zk_client, project_id): """ Creates a new ProjectQueueManager. @@ -29,31 +26,7 @@ def __init__(self, zk_client, project_id): super(ProjectQueueManager, self).__init__() self.zk_client = zk_client self.project_id = project_id - - project_dsn_node = '/appscale/projects/{}/postgres_dsn'.format(project_id) - global_dsn_node = '/appscale/tasks/postgres_dsn' - if self.zk_client.exists(project_dsn_node): - pg_dsn = self.zk_client.get(project_dsn_node) - logger.info('Using project-specific PostgreSQL as a backend for ' - 'Pull Queues of project "{}" '.format(project_id)) - elif self.zk_client.exists(global_dsn_node): - pg_dsn = self.zk_client.get(global_dsn_node) - logger.info('Using deployment-wide PostgreSQL as a backend for ' - 'Pull Queues"'.format(project_id)) - else: - pg_dsn = None - logger.info('Using Cassandra as a backend for Pull Queues of "{}"' - .format(project_id)) - - if pg_dsn: - # TODO: PostgresConnectionWrapper may need an update when - # TaskQueue becomes concurrent - self.pg_connection_wrapper = PostgresConnectionWrapper( - dsn=pg_dsn[0].decode('utf-8') - ) - self._configure_periodical_flush() - else: - self.pg_connection_wrapper = None + self._configure_periodical_flush() self.queues_node = '/appscale/projects/{}/queues'.format(project_id) self.watch = zk_client.DataWatch(self.queues_node, @@ -88,11 +61,8 @@ def update_queues(self, queue_config): queue_info['name'] = queue_name if 'mode' not in queue_info or queue_info['mode'] == 'push': self[queue_name] = PushQueue(queue_info, self.project_id) - elif self.pg_connection_wrapper: - self[queue_name] = PostgresPullQueue(queue_info, self.project_id, - self.pg_connection_wrapper) else: - self[queue_name] = PullQueue(queue_info, self.project_id) + self[queue_name] = PostgresPullQueue(queue_info, self.project_id) # Establish a new Celery connection based on the new queues, and close the # old one. @@ -120,8 +90,6 @@ def stop(self): """ Close the Celery and Postgres connections if they still exist. """ if self.celery is not None: self.celery.close() - if self.pg_connection_wrapper is not None: - self.pg_connection_wrapper.close() def _update_queues_watch(self, queue_config, _): """ Handles updates to a queue configuration node. @@ -152,14 +120,23 @@ def _update_queues_watch(self, queue_config, _): def _configure_periodical_flush(self): """ Creates and starts periodical callback to clear old deleted tasks. """ + @gen.coroutine def flush_deleted(): - """ Calls flush_deleted method for all PostgresPullQueues. + """ Calls flush_deleted method for all PostgresPullQueues + with asynchronous delay to avoid concentration of flush queries + to SQL server during short period of time. """ + yield gen.sleep(random.random() * self.FLUSH_DELETED_INTERVAL / 2) postgres_pull_queues = (q for q in self.values() if isinstance(q, PostgresPullQueue)) for q in postgres_pull_queues: + yield gen.sleep(3) q.flush_deleted() + # def schedule_flush_deleted(): + # main_io_loop = IOLoop.instance() + # main_io_loop.add_callback(flush_deleted) + PeriodicCallback(flush_deleted, self.FLUSH_DELETED_INTERVAL * 1000).start() diff --git a/AppTaskQueue/appscale/taskqueue/rest_api.py b/AppTaskQueue/appscale/taskqueue/rest_api.py index c78a73936a..ea5e20bf94 100644 --- a/AppTaskQueue/appscale/taskqueue/rest_api.py +++ b/AppTaskQueue/appscale/taskqueue/rest_api.py @@ -15,9 +15,8 @@ from .task import InvalidTaskInfo, Task, TASK_FIELDS from .queue import (InvalidLeaseRequest, LONG_QUEUE_FORM, - PullQueue, PostgresPullQueue, - QUEUE_FIELDS, - TransientError) + PostgresPullQueue, + QUEUE_FIELDS) # The prefix for all of the handlers of the pull queue REST API. REST_PREFIX = '/taskqueue/v1beta2/projects/(?:.~)?([a-z0-9-]+)/taskqueues' @@ -105,7 +104,7 @@ def get(self, project_id): return pull_queues = [queue_name for queue_name, queue in project_queues.items() - if isinstance(queue, PullQueue)] + if isinstance(queue, PostgresPullQueue)] json.dump(pull_queues, self) @@ -130,7 +129,7 @@ def get(self, project, queue): write_error(self, HTTPCodes.NOT_FOUND, 'Queue not found.') return - if not isinstance(queue, (PullQueue, PostgresPullQueue)): + if not isinstance(queue, PostgresPullQueue): write_error(self, HTTPCodes.BAD_REQUEST, 'The REST API is only applicable to pull queues.') return @@ -229,10 +228,8 @@ def post(self, project, queue): try: queue.add_task(task) except InvalidTaskInfo as insert_error: - write_error(self, HTTPCodes.BAD_REQUEST, insert_error.message) + write_error(self, HTTPCodes.BAD_REQUEST, str(insert_error)) return - except TransientError as error: - write_error(self, HTTPCodes.INTERNAL_ERROR, str(error)) self.write(json.dumps(task.json_safe_dict(fields=fields))) @@ -294,7 +291,7 @@ def post(self, project, queue): try: tasks = queue.lease_tasks(num_tasks, lease_seconds, group_by_tag, tag) except InvalidLeaseRequest as lease_error: - write_error(self, HTTPCodes.BAD_REQUEST, lease_error.message) + write_error(self, HTTPCodes.BAD_REQUEST, str(lease_error)) return except TransientError as lease_error: write_error(self, HTTPCodes.INTERNAL_ERROR, str(lease_error)) @@ -403,7 +400,7 @@ def post(self, project, queue, task): try: task = queue.update_lease(provided_task, new_lease_seconds) except InvalidLeaseRequest as lease_error: - write_error(self, HTTPCodes.BAD_REQUEST, lease_error.message) + write_error(self, HTTPCodes.BAD_REQUEST, str(lease_error)) return except TaskNotFound as error: write_error(self, HTTPCodes.NOT_FOUND, str(error)) @@ -459,7 +456,7 @@ def patch(self, project, queue, task): try: new_task = Task(task_info) except InvalidTaskInfo as task_error: - write_error(self, HTTPCodes.BAD_REQUEST, task_error.message) + write_error(self, HTTPCodes.BAD_REQUEST, str(task_error)) return try: @@ -487,7 +484,7 @@ def patch(self, project, queue, task): try: task = queue.update_task(new_task, new_lease_seconds) except InvalidLeaseRequest as lease_error: - write_error(self, HTTPCodes.BAD_REQUEST, lease_error.message) + write_error(self, HTTPCodes.BAD_REQUEST, str(lease_error)) return except TaskNotFound as error: write_error(self, HTTPCodes.NOT_FOUND, str(error)) diff --git a/AppTaskQueue/setup.py b/AppTaskQueue/setup.py index 0576da1801..0b3b8c8d50 100644 --- a/AppTaskQueue/setup.py +++ b/AppTaskQueue/setup.py @@ -11,7 +11,6 @@ platforms='Posix', install_requires=[ 'appscale-common', - 'cassandra-driver<3.18.0', 'celery>=3.1,<4.0.0', 'eventlet==0.22', 'kazoo', From e0ac086a916ae0a88888d64b4be0f8089f787cf9 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 26 Sep 2019 13:17:30 -0700 Subject: [PATCH 146/221] Updates for systemd, app controller service helper comments --- AppController/lib/service_helper.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/AppController/lib/service_helper.rb b/AppController/lib/service_helper.rb index 34f55b9e63..b95a2c72d1 100644 --- a/AppController/lib/service_helper.rb +++ b/AppController/lib/service_helper.rb @@ -43,6 +43,11 @@ def self.is_running?(name, port = nil) end # This function returns a list of running services + # + # If name is a "template" (e.g. appscale-myservice@) then multiple + # services could be matched. + # + # If name is a regular service there will be at most one running service. def self.running(name) services = [] service_name_match = if name.end_with?('@') then "#{name.to_s}*" else name.to_s end From 29ad801c6928ce827277a82dcbf50361c312d1ee Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 26 Sep 2019 16:24:42 -0700 Subject: [PATCH 147/221] Updates for systemd, common helper service reload --- common/appscale/common/service_helper.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/common/appscale/common/service_helper.py b/common/appscale/common/service_helper.py index 4b0d101ae7..3778f515aa 100644 --- a/common/appscale/common/service_helper.py +++ b/common/appscale/common/service_helper.py @@ -122,6 +122,23 @@ def restart(name, background=False, start=True): background=background)) +def reload(name, background=False, start=True): + """ Reload the given service(s). + + Args: + name: A str representing the name of the service(s) to reload. + background: True to start without blocking + start: True to start services if not already running (use False with name pattern) + """ + logger.info('Reloading service(s) {0}'.format(name)) + command = 'try-reload-or-restart' + if start: + command = 'reload-or-restart' + __safe_systemctl_run(__build_command(command, + __name_match(name), + background=background)) + + def list(running=False): """ List appscale service(s). From a0d6c95fb9bcb2569a96c0d03bb4618bb8789826 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 26 Sep 2019 16:28:17 -0700 Subject: [PATCH 148/221] Updates for systemd, admin server routing manager haproxy update --- AdminServer/appscale/admin/routing/haproxy.py | 59 +++++-------------- .../appscale/admin/routing/routing_manager.py | 8 +-- debian/appscale_install_functions.sh | 6 +- system/units/appscale-admin.service | 1 + system/units/appscale-haproxy.service | 18 ------ system/units/appscale-haproxy@.service | 18 ++++++ 6 files changed, 40 insertions(+), 70 deletions(-) delete mode 100644 system/units/appscale-haproxy.service create mode 100644 system/units/appscale-haproxy@.service diff --git a/AdminServer/appscale/admin/routing/haproxy.py b/AdminServer/appscale/admin/routing/haproxy.py index 89c84ff817..b484def6c3 100644 --- a/AdminServer/appscale/admin/routing/haproxy.py +++ b/AdminServer/appscale/admin/routing/haproxy.py @@ -4,11 +4,10 @@ import monotonic import os import pkgutil -import signal -import subprocess from tornado import gen +from appscale.common import service_helper from appscale.common.appscale_info import get_private_ip logger = logging.getLogger(__name__) @@ -22,11 +21,11 @@ # The location of the combined HAProxy config file for AppScale services. SERVICE_CONFIG = os.path.join(CONFIG_DIR, 'service-haproxy.cfg') -# The location of the pidfile for instance-related HAProxy processes. -APP_PID = os.path.join('/', 'run', 'appscale', 'app-haproxy.pid') +# The instance name for instance-related HAProxy processes. +APP_INSTANCE = 'app' -# The location of the pidfile for service-related HAProxy processes. -SERVICE_PID = os.path.join('/', 'run', 'appscale', 'service-haproxy.pid') +# The instance name for service-related HAProxy processes. +SERVICE_INSTANCE = 'service' # The location of the unix socket used for reporting application stats. APP_STATS_SOCKET = os.path.join(CONFIG_DIR, 'stats') @@ -112,7 +111,7 @@ class HAProxy(object): # The minimum number of seconds to wait between each reload operation. RELOAD_COOLDOWN = .1 - def __init__(self, config_location, pid_location, stats_socket): + def __init__(self, instance, config_location, stats_socket): """ Creates a new HAProxy operator. """ self.connect_timeout_ms = self.DEFAULT_CONNECT_TIMEOUT * 1000 self.client_timeout_ms = self.DEFAULT_CLIENT_TIMEOUT * 1000 @@ -120,8 +119,8 @@ def __init__(self, config_location, pid_location, stats_socket): self.blocks = {} self.reload_future = None + self._instance = instance self._config_location = config_location - self._pid_location = pid_location self._stats_socket = stats_socket # Given the arbitrary base of the monotonic clock, it doesn't make sense @@ -164,37 +163,12 @@ def reload(self): yield self.reload_future - def _get_pid(self): - try: - with open(self._pid_location) as pid_file: - pid = int(pid_file.read()) - except IOError as error: - if error.errno != errno.ENOENT: - raise - - pid = None - - # Check if the process is running. - if pid is not None: - try: - os.kill(pid, 0) - except OSError as error: - if error.errno == errno.ESRCH: - pid = None - else: - logger.warning('Encountered unexpected error when checking haproxy ' - 'process: {}'.format(str(error))) - - return pid + @property + def _service(self): + return 'appscale-haproxy@{}.service'.format(self._instance) def _stop(self): - pid = self._get_pid() - if pid is not None: - try: - os.kill(pid, signal.SIGUSR1) - except OSError as error: - if error.errno != errno.ESRCH: - logger.error('Unable to stop haproxy process') + service_helper.stop(self._service) try: os.remove(self._config_location) @@ -219,6 +193,7 @@ def _reload(self): # Ensure process is not running if there is nothing to route. if new_content is None: self._stop() + return try: with open(self._config_location, 'r') as config_file: @@ -235,12 +210,6 @@ def _reload(self): with open(self._config_location, 'w') as config_file: config_file.write(new_content) - pid = self._get_pid() - if pid is None: - subprocess.check_call(['haproxy', '-f', self._config_location, '-D', - '-p', self._pid_location]) - else: - subprocess.check_call(['haproxy', '-f', self._config_location, '-D', - '-p', self._pid_location, '-sf', str(pid)]) + service_helper.reload(self._service) - logger.info('Updated HAProxy config') + logger.info('Updated {} HAProxy config'.format(self._instance)) diff --git a/AdminServer/appscale/admin/routing/routing_manager.py b/AdminServer/appscale/admin/routing/routing_manager.py index c87421f362..d7539d8877 100644 --- a/AdminServer/appscale/admin/routing/routing_manager.py +++ b/AdminServer/appscale/admin/routing/routing_manager.py @@ -9,8 +9,8 @@ from appscale.admin.constants import CONTROLLER_STATE_NODE from appscale.admin.routing.haproxy import ( - APP_CONFIG, APP_PID, APP_STATS_SOCKET, HAProxy, HAProxyListenBlock, - SERVICE_CONFIG, SERVICE_PID, SERVICE_STATS_SOCKET) + APP_CONFIG, APP_INSTANCE, APP_STATS_SOCKET, HAProxy, HAProxyListenBlock, + SERVICE_CONFIG, SERVICE_INSTANCE, SERVICE_STATS_SOCKET) from appscale.common.async_retrying import ( retry_children_watch_coroutine, retry_data_watch_coroutine) from appscale.common.constants import ( @@ -166,8 +166,8 @@ def __init__(self, zk_client): Args: zk_client: A KazooClient. """ - self._app_haproxy = HAProxy(APP_CONFIG, APP_PID, APP_STATS_SOCKET) - self._service_haproxy = HAProxy(SERVICE_CONFIG, SERVICE_PID, + self._app_haproxy = HAProxy(APP_INSTANCE, APP_CONFIG, APP_STATS_SOCKET) + self._service_haproxy = HAProxy(SERVICE_INSTANCE, SERVICE_CONFIG, SERVICE_STATS_SOCKET) self._versions = {} self._zk_client = zk_client diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index a2e50596e8..1c0dcb83d3 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -252,13 +252,13 @@ postinstallhaproxy() # Pre 1.8 uses wrapper with systemd if [ -f "/usr/sbin/haproxy-systemd-wrapper" ] ; then - HAPROXY_UNITD_DIR="${DESTDIR}/lib/systemd/system/appscale-haproxy.service.d" + HAPROXY_UNITD_DIR="${DESTDIR}/lib/systemd/system/appscale-haproxy@.service.d" [ -d "${HAPROXY_UNITD_DIR}" ] || mkdir -p "${HAPROXY_UNITD_DIR}" - cat <<"EOF" > "${DESTDIR}/lib/systemd/system/appscale-haproxy.service.d/10-appscale-haproxy.conf" + cat <<"EOF" > "${DESTDIR}/lib/systemd/system/appscale-haproxy@.service.d/10-appscale-haproxy.conf" [Service] Type=simple ExecStart= -ExecStart=/usr/sbin/haproxy-systemd-wrapper -f ${CONFIG} -p /run/appscale/service-haproxy.pid $EXTRAOPTS +ExecStart=/usr/sbin/haproxy-systemd-wrapper -f ${CONFIG_DIR}%i${CONFIG_SUFFIX} -p /run/appscale/%i-haproxy.pid $EXTRAOPTS EOF fi } diff --git a/system/units/appscale-admin.service b/system/units/appscale-admin.service index c952e82de2..b1e58d3846 100644 --- a/system/units/appscale-admin.service +++ b/system/units/appscale-admin.service @@ -2,6 +2,7 @@ Description=AppScale Admin API Before=appscale-control.target PartOf=appscale-control.target +Wants=appscale-haproxy@service.service appscale-haproxy@app.service [Service] Environment=APPSCALE_ADMIN_OPTION_PORT=17442 diff --git a/system/units/appscale-haproxy.service b/system/units/appscale-haproxy.service deleted file mode 100644 index e018ab2be4..0000000000 --- a/system/units/appscale-haproxy.service +++ /dev/null @@ -1,18 +0,0 @@ -[Unit] -Description=AppScale Services HAProxy Load Balancer -Before=appscale-service.target -PartOf=appscale-service.target - -[Service] -Type=notify -Environment=CONFIG=/etc/haproxy/service-haproxy.cfg -ExecStartPre=/usr/sbin/haproxy -f ${CONFIG} -c -q -ExecStart=/usr/sbin/haproxy -Ws -f ${CONFIG} -p /run/appscale/service-haproxy.pid $EXTRAOPTS -ExecReload=/usr/sbin/haproxy -f ${CONFIG} -c -q -ExecReload=/bin/kill -USR2 $MAINPID -SuccessExitStatus=0 143 -KillMode=mixed -SyslogIdentifier=%p - -[Install] -WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-haproxy@.service b/system/units/appscale-haproxy@.service new file mode 100644 index 0000000000..7e8f7dd29d --- /dev/null +++ b/system/units/appscale-haproxy@.service @@ -0,0 +1,18 @@ +[Unit] +Description=AppScale HAProxy Load Balancer (%i) +Before=appscale-service.target +PartOf=appscale-service.target + +[Service] +Type=notify +Environment=CONFIG_DIR=/etc/haproxy/ CONFIG_SUFFIX=-haproxy.cfg +ExecStartPre=/usr/sbin/haproxy -f ${CONFIG_DIR}%i${CONFIG_SUFFIX} -c -q +ExecStart=/usr/sbin/haproxy -Ws -f ${CONFIG_DIR}%i${CONFIG_SUFFIX} -p /run/appscale/%i-haproxy.pid $EXTRAOPTS +ExecReload=/usr/sbin/haproxy -f ${CONFIG_DIR}%i${CONFIG_SUFFIX} -c -q +ExecReload=/bin/kill -USR2 $MAINPID +SuccessExitStatus=0 143 +KillMode=mixed +SyslogIdentifier=%p-%i + +[Install] +WantedBy=appscale-service.target \ No newline at end of file From 1630c50f202cbcd5548bfb82b742065f2f540fc7 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 26 Sep 2019 18:07:25 -0700 Subject: [PATCH 149/221] Updates for systemd, taskqueue unit now python 3 --- system/units/appscale-taskqueue@.service | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/system/units/appscale-taskqueue@.service b/system/units/appscale-taskqueue@.service index 7b914e986e..a20f626c71 100644 --- a/system/units/appscale-taskqueue@.service +++ b/system/units/appscale-taskqueue@.service @@ -4,7 +4,7 @@ Before=appscale-service.target PartOf=appscale-service.target [Service] -ExecStart=/opt/appscale_venvs/appscale_taskqueue/bin/python2 /opt/appscale_venvs/appscale_taskqueue/bin/appscale-taskqueue -p %i $APPSCALE_OPTION_VERBOSE +ExecStart=/opt/appscale_venvs/appscale_taskqueue/bin/python3 /opt/appscale_venvs/appscale_taskqueue/bin/appscale-taskqueue -p %i $APPSCALE_OPTION_VERBOSE SyslogIdentifier=%p-%i [Install] From cdaaed8a8c15778578457d8b3359a10c7721a946 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 26 Sep 2019 20:38:48 -0700 Subject: [PATCH 150/221] Updates for systemd, hermes test fix --- Hermes/tests/test_process.py | 8 ++++---- Hermes/tests/test_unified_service_names.py | 8 ++------ 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/Hermes/tests/test_process.py b/Hermes/tests/test_process.py index 4ff7652a0b..1822b7c910 100644 --- a/Hermes/tests/test_process.py +++ b/Hermes/tests/test_process.py @@ -5,9 +5,9 @@ from appscale.hermes.unified_service_names import ServicesEnum from appscale.hermes.producers import process_stats -+SYSTEMCTL_SHOW = b""" +SYSTEMCTL_SHOW = b""" MainPID=8466 -Id=appscale-haproxy.service +Id=appscale-haproxy@service.service MainPID=5045 Id=appscale-instance-run@appscaledashboard_default_v1_1566168050028-20000.service @@ -17,7 +17,7 @@ @patch('appscale.common.appscale_info.get_private_ip') @patch('appscale.hermes.producers.process_stats._process_stats') @patch('subprocess.check_output') -def test_reading_systemd_status(self, mock_check_output, mock_process_stats, +def test_reading_systemd_status(mock_check_output, mock_process_stats, mock_get_private_ip): # Mocking `systemctl show` output and appscale_info.get_private_ip mock_check_output.return_value = SYSTEMCTL_SHOW @@ -28,7 +28,7 @@ def test_reading_systemd_status(self, mock_check_output, mock_process_stats, # Checking expectations mock_process_stats.assert_has_calls([ - call(8466, ServicesEnum.SERVICE_HAPROXY, 'appscale-haproxy.service', '1.1.1.1'), + call(8466, ServicesEnum.SERVICE_HAPROXY, 'appscale-haproxy@service.service', '1.1.1.1'), call(5045, ServicesEnum.APPLICATION, 'appscale-instance-run@appscaledashboard_default_v1_1566168050028-20000.service', '1.1.1.1') ]) assert isinstance(snapshot, process_stats.ProcessesStatsSnapshot) diff --git a/Hermes/tests/test_unified_service_names.py b/Hermes/tests/test_unified_service_names.py index 2f7e7d9a2b..738fc5bf1b 100644 --- a/Hermes/tests/test_unified_service_names.py +++ b/Hermes/tests/test_unified_service_names.py @@ -18,12 +18,12 @@ def test_search_for_known_service(self): 'appscale-logserver.service': ServicesEnum.LOG_SERVICE, 'appscale-infrastructure@basic.service': ServicesEnum.IAAS_MANAGER, 'appscale-infrastructure@shadow.service': ServicesEnum.IAAS_MANAGER, - 'appscale-haproxy.service': ServicesEnum.SERVICE_HAPROXY, + 'appscale-haproxy@service.service': ServicesEnum.SERVICE_HAPROXY, 'ejabberd.service': ServicesEnum.EJABBERD, 'appscale-celery@snowmachineapp.service': ServicesEnum.CELERY, 'appscale-instance-manager.service': ServicesEnum.APPMANAGER, } - for external_name, expected in external_name_to_expectation.iteritems(): + for external_name, expected in external_name_to_expectation.items(): assert find_service_by_external_name(external_name) == expected def test_search_for_unknown_service(self): @@ -41,10 +41,6 @@ def test_parsing_application_id(self): assert app == 'appppa' def test_parsing_port(self): - # Celery service - celery = ServicesEnum.CELERY - port = celery.get_port_by_external_name('appscale-celery@ppa-9999.service') - assert port == 9999 # Application service application = ServicesEnum.APPLICATION port = application.get_port_by_external_name('appscale-instance-run@appppa-20008.service') From 311cb8cf5d59cfeeb63275201498c6b1bf8a088d Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 27 Sep 2019 01:39:20 -0700 Subject: [PATCH 151/221] Updates for systemd, hermes update for haproxy service instance --- Hermes/appscale/hermes/unified_service_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Hermes/appscale/hermes/unified_service_names.py b/Hermes/appscale/hermes/unified_service_names.py index a7aaa655d2..c8be3f6291 100644 --- a/Hermes/appscale/hermes/unified_service_names.py +++ b/Hermes/appscale/hermes/unified_service_names.py @@ -164,7 +164,7 @@ class ServicesEnum(object): CRON = Service(name='crond', name_matcher=r'^cron.service$') APPMANAGER = Service(name='appmanager', name_matcher='^appscale-instance-manager.service$') - SERVICE_HAPROXY = Service(name='service_haproxy', name_matcher='^appscale-haproxy.service$') + SERVICE_HAPROXY = Service(name='service_haproxy', name_matcher='^appscale-haproxy@service.service$') KNOWN_SERVICES = [ From fefef68d14e4c1e634946ceaff674e2425366723 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 27 Sep 2019 14:28:52 +0300 Subject: [PATCH 152/221] Don't create PullQueue tables in Cassandra --- .../datastore/cassandra_env/schema.py | 176 +----------------- 1 file changed, 1 insertion(+), 175 deletions(-) diff --git a/AppDB/appscale/datastore/cassandra_env/schema.py b/AppDB/appscale/datastore/cassandra_env/schema.py index f5af998243..1cbad7e508 100644 --- a/AppDB/appscale/datastore/cassandra_env/schema.py +++ b/AppDB/appscale/datastore/cassandra_env/schema.py @@ -14,7 +14,7 @@ from appscale.common.constants import SCHEMA_CHANGE_TIMEOUT from appscale.common.datastore_index import DatastoreIndex, merge_indexes from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from cassandra import ConsistencyLevel, OperationTimedOut +from cassandra import ConsistencyLevel from cassandra.cluster import Cluster from cassandra.cluster import SimpleStatement from cassandra.policies import FallthroughRetryPolicy, RetryPolicy @@ -251,179 +251,6 @@ def create_entity_ids_table(session): raise -def rebuild_task_indexes(session): - """ Creates index entries for all pull queue tasks. - - Args: - session: A cassandra-driver session. - """ - logger.info('Rebuilding task indexes') - batch_size = 100 - total_tasks = 0 - app = '' - queue = '' - id_ = '' - while True: - results = session.execute(""" - SELECT app, queue, id, lease_expires, tag FROM pull_queue_tasks - WHERE token(app, queue, id) > token(%(app)s, %(queue)s, %(id)s) - LIMIT {} - """.format(batch_size), {'app': app, 'queue': queue, 'id': id_}) - results_list = list(results) - for result in results_list: - parameters = {'app': result.app, 'queue': result.queue, - 'eta': result.lease_expires, 'id': result.id, - 'tag': result.tag or ''} - - insert_eta_index = SimpleStatement(""" - INSERT INTO pull_queue_eta_index (app, queue, eta, id, tag) - VALUES (%(app)s, %(queue)s, %(eta)s, %(id)s, %(tag)s) - """, retry_policy=BASIC_RETRIES) - session.execute(insert_eta_index, parameters) - - insert_tag_index = SimpleStatement(""" - INSERT INTO pull_queue_tags_index (app, queue, tag, eta, id) - VALUES (%(app)s, %(queue)s, %(tag)s, %(eta)s, %(id)s) - """, retry_policy=BASIC_RETRIES) - session.execute(insert_tag_index, parameters) - - total_tasks += len(results_list) - if len(results_list) < batch_size: - break - - app = results_list[-1].app - queue = results_list[-1].queue - id_ = results_list[-1].id - - logger.info('Created entries for {} tasks'.format(total_tasks)) - - -def create_pull_queue_tables(cluster, session): - """ Create the required tables for pull queues. - - Args: - cluster: A cassandra-driver cluster. - session: A cassandra-driver session. - """ - logger.info('Trying to create pull_queue_tasks') - create_table = """ - CREATE TABLE IF NOT EXISTS pull_queue_tasks ( - app text, - queue text, - id text, - payload text, - enqueued timestamp, - lease_expires timestamp, - retry_count int, - tag text, - op_id uuid, - PRIMARY KEY ((app, queue, id)) - ) - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating pull_queue_tasks. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] - if 'op_id' not in keyspace_metadata.tables['pull_queue_tasks'].columns: - try: - session.execute('ALTER TABLE pull_queue_tasks ADD op_id uuid', - timeout=SCHEMA_CHANGE_TIMEOUT) - except OperationTimedOut: - logger.warning( - 'Encountered a timeout when altering pull_queue_tasks. Waiting {} ' - 'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - rebuild_indexes = False - if ('pull_queue_tasks_index' in keyspace_metadata.tables and - 'tag_exists' in keyspace_metadata.tables['pull_queue_tasks_index'].columns): - rebuild_indexes = True - logger.info('Dropping outdated pull_queue_tags index') - session.execute('DROP INDEX IF EXISTS pull_queue_tags', - timeout=SCHEMA_CHANGE_TIMEOUT) - - logger.info('Dropping outdated pull_queue_tag_exists index') - session.execute('DROP INDEX IF EXISTS pull_queue_tag_exists', - timeout=SCHEMA_CHANGE_TIMEOUT) - - logger.info('Dropping outdated pull_queue_tasks_index table') - session.execute('DROP TABLE pull_queue_tasks_index', - timeout=SCHEMA_CHANGE_TIMEOUT) - - logger.info('Trying to create pull_queue_eta_index') - create_index_table = """ - CREATE TABLE IF NOT EXISTS pull_queue_eta_index ( - app text, - queue text, - eta timestamp, - id text, - tag text, - PRIMARY KEY ((app, queue, eta, id)) - ) WITH gc_grace_seconds = 120 - """ - statement = SimpleStatement(create_index_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating pull_queue_eta_index.' - ' Waiting {} seconds for schema to settle.' - .format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - logger.info('Trying to create pull_queue_tags_index') - create_tags_index_table = """ - CREATE TABLE IF NOT EXISTS pull_queue_tags_index ( - app text, - queue text, - tag text, - eta timestamp, - id text, - PRIMARY KEY ((app, queue, tag, eta, id)) - ) WITH gc_grace_seconds = 120 - """ - statement = SimpleStatement(create_tags_index_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating pull_queue_tags_index.' - ' Waiting {} seconds for schema to settle.' - .format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - if rebuild_indexes: - rebuild_task_indexes(session) - - logger.info('Trying to create pull_queue_leases') - create_leases_table = """ - CREATE TABLE IF NOT EXISTS pull_queue_leases ( - app text, - queue text, - leased timestamp, - PRIMARY KEY ((app, queue, leased)) - ) WITH gc_grace_seconds = 120 - """ - statement = SimpleStatement(create_leases_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating pull_queue_leases. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - def current_datastore_version(session): """ Retrieves the existing datastore version value. @@ -571,7 +398,6 @@ def prime_cassandra(replication): create_batch_tables(cluster, session) create_groups_table(session) create_transactions_table(session) - create_pull_queue_tables(cluster, session) create_entity_ids_table(session) first_entity = session.execute( From 3b28748160ee75fb06b8c64817e3e1c96f6ad938 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 27 Sep 2019 15:48:03 +0300 Subject: [PATCH 153/221] Set postgres_dsn as soon as zk client is initialized --- AppController/djinn.rb | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index b866b91150..760959102c 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3268,6 +3268,12 @@ def start_stop_api_services pick_zookeeper(@zookeeper_data) set_custom_config start_log_server + if @options.key?('postgres_dsn') + ZKInterface.set_postgres_dsn(@options['postgres_dsn']) + end + if @options.key?('fdb_clusterfile_content') + ZKInterface.set_fdb_clusterfile_content(@options['fdb_clusterfile_content']) + end else stop_log_server end @@ -3561,10 +3567,6 @@ def stop_search2_role end def start_taskqueue_master - if @options.key?('postgres_dsn') - ZKInterface.set_postgres_dsn(@options['postgres_dsn']) - end - verbose = @options['verbose'].downcase == "true" TaskQueue.start_master(false, verbose) return true @@ -3575,10 +3577,6 @@ def stop_taskqueue end def start_taskqueue_slave - if @options.key?('postgres_dsn') - ZKInterface.set_postgres_dsn(@options['postgres_dsn']) - end - # All slaves connect to the master to start master_ip = nil @state_change_lock.synchronize { @@ -3665,7 +3663,6 @@ def assign_datastore_processes backend = 'cassandra' if @options.key?('fdb_clusterfile_content') - ZKInterface.set_fdb_clusterfile_content(@options['fdb_clusterfile_content']) backend = 'fdb' end From 89b59b5e741c9ae162b5b321169b960b608466bb Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Fri, 27 Sep 2019 16:02:17 +0300 Subject: [PATCH 154/221] Verbose debug output in TQ e2e test --- AppTaskQueue/test/helpers/prepare-postgres.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AppTaskQueue/test/helpers/prepare-postgres.sh b/AppTaskQueue/test/helpers/prepare-postgres.sh index 09487230b4..cec4eb831e 100755 --- a/AppTaskQueue/test/helpers/prepare-postgres.sh +++ b/AppTaskQueue/test/helpers/prepare-postgres.sh @@ -141,4 +141,6 @@ fi log "Creating Database and Role" CREATE_ROLE="CREATE ROLE \"${USERNAME}\" WITH LOGIN PASSWORD '${PASSWORD}';" sudo -u postgres psql --command "${CREATE_ROLE}" +echo "Creating DB" sudo -u postgres createdb --owner "${USERNAME}" "${DBNAME}" +echo "Done - $?" From 0cc0c55b3004a0b6c0885cdc3c4d3aedd76799d9 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 27 Sep 2019 08:44:44 -0700 Subject: [PATCH 155/221] Updates for systemd, admin server instance manager test fix --- AdminServer/tests/test_instance_manager.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AdminServer/tests/test_instance_manager.py b/AdminServer/tests/test_instance_manager.py index 31ac99e173..c489fe1369 100644 --- a/AdminServer/tests/test_instance_manager.py +++ b/AdminServer/tests/test_instance_manager.py @@ -118,6 +118,7 @@ def test_start_app_goodconfig_java(self): instance_manager = InstanceManager( None, None, None, None, None, None, None, None, None) + instance_manager._login_server = '192.168.33.10' instance_manager._projects_manager = { 'test': {'default': {'v1': version_manager}}} instance_manager._deployment_config = flexmock( @@ -190,6 +191,7 @@ def test_start_app_failed_copy_java(self): instance_manager = InstanceManager( None, None, None, None, None, None, None, None, None) + instance_manager._login_server = '192.168.33.10' instance_manager._projects_manager = { 'test': {'default': {'v1': version_manager}}} instance_manager._deployment_config = flexmock( From cc1ce036b02643983ededa3c7a282015b9ad3728 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 27 Sep 2019 15:24:51 -0700 Subject: [PATCH 156/221] Move index directories to separate module This allows multiple modules to import the index directory codecs. --- .../datastore/fdb/index_directories.py | 681 ++++++++++++++++++ AppDB/appscale/datastore/fdb/indexes.py | 680 +---------------- 2 files changed, 687 insertions(+), 674 deletions(-) create mode 100644 AppDB/appscale/datastore/fdb/index_directories.py diff --git a/AppDB/appscale/datastore/fdb/index_directories.py b/AppDB/appscale/datastore/fdb/index_directories.py new file mode 100644 index 0000000000..b709345fd2 --- /dev/null +++ b/AppDB/appscale/datastore/fdb/index_directories.py @@ -0,0 +1,681 @@ +import itertools +import sys + +import six + +from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.dbconstants import BadRequest, InternalError +from appscale.datastore.fdb.codecs import ( + decode_value, encode_value, encode_versionstamp_index, Path) +from appscale.datastore.fdb.utils import ( + DS_ROOT, fdb, format_prop_val, VERSIONSTAMP_SIZE) + +sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.datastore import datastore_pb, entity_pb +from google.appengine.datastore.datastore_pb import Query_Filter, Query_Order + +first_gt_or_equal = fdb.KeySelector.first_greater_or_equal + +KEY_PROP = u'__key__' + + +class IndexEntry(object): + """ Encapsulates details for an index entry. """ + __slots__ = ['project_id', 'namespace', 'path', 'commit_versionstamp', + 'deleted_versionstamp'] + + def __init__(self, project_id, namespace, path, commit_versionstamp, + deleted_versionstamp): + self.project_id = project_id + self.namespace = namespace + self.path = path + self.commit_versionstamp = commit_versionstamp + self.deleted_versionstamp = deleted_versionstamp + + @property + def kind(self): + return self.path[-2] + + @property + def key(self): + key = entity_pb.Reference() + key.set_app(self.project_id) + key.set_name_space(self.namespace) + key.mutable_path().MergeFrom(Path.decode(self.path)) + return key + + @property + def group(self): + group = entity_pb.Path() + group.add_element().MergeFrom(Path.decode_element(self.path[:2])) + return group + + def __repr__(self): + return u'IndexEntry(%r, %r, %r, %r, %r)' % ( + self.project_id, self.namespace, self.path, self.commit_versionstamp, + self.deleted_versionstamp) + + def key_result(self): + entity = entity_pb.EntityProto() + entity.mutable_key().MergeFrom(self.key) + entity.mutable_entity_group() + return entity + + def cursor_result(self, ordered_props): + compiled_cursor = datastore_pb.CompiledCursor() + position = compiled_cursor.add_position() + position.mutable_key().MergeFrom(self.key) + position.set_start_inclusive(False) + return compiled_cursor + + +class PropertyEntry(IndexEntry): + """ Encapsulates details for a single-property index entry. """ + __slots__ = ['prop_name', 'value'] + + def __init__(self, project_id, namespace, path, prop_name, value, + commit_versionstamp, deleted_versionstamp): + super(PropertyEntry, self).__init__( + project_id, namespace, path, commit_versionstamp, deleted_versionstamp) + self.prop_name = prop_name + self.value = value + + def __repr__(self): + return u'PropertyEntry(%r, %r, %r, %r, %r, %r, %r)' % ( + self.project_id, self.namespace, self.path, self.prop_name, self.value, + self.commit_versionstamp, self.deleted_versionstamp) + + def __str__(self): + return u'PropertyEntry(%s, %r, %s, %s, %s, %r, %r)' % ( + self.project_id, self.namespace, self.path, self.prop_name, + format_prop_val(self.value), self.commit_versionstamp, + self.deleted_versionstamp) + + def prop_result(self): + entity = entity_pb.EntityProto() + entity.mutable_key().MergeFrom(self.key) + entity.mutable_entity_group().MergeFrom(self.group) + prop = entity.add_property() + prop.set_name(self.prop_name) + prop.set_meaning(entity_pb.Property.INDEX_VALUE) + prop.set_multiple(False) + prop.mutable_value().MergeFrom(self.value) + return entity + + def cursor_result(self, ordered_props): + compiled_cursor = datastore_pb.CompiledCursor() + position = compiled_cursor.add_position() + position.mutable_key().MergeFrom(self.key) + position.set_start_inclusive(False) + if self.prop_name in ordered_props: + index_value = position.add_indexvalue() + index_value.set_property(self.prop_name) + index_value.mutable_value().MergeFrom(self.value) + + return compiled_cursor + + +class CompositeEntry(IndexEntry): + """ Encapsulates details for a composite index entry. """ + __slots__ = ['properties'] + + def __init__(self, project_id, namespace, path, properties, + commit_versionstamp, deleted_versionstamp): + super(CompositeEntry, self).__init__( + project_id, namespace, path, commit_versionstamp, deleted_versionstamp) + self.properties = properties + + def __repr__(self): + return u'CompositeEntry(%r, %r, %r, %r, %r, %r)' % ( + self.project_id, self.namespace, self.path, self.properties, + self.commit_versionstamp, self.deleted_versionstamp) + + def prop_result(self): + entity = entity_pb.EntityProto() + entity.mutable_key().MergeFrom(self.key) + entity.mutable_entity_group().MergeFrom(self.group) + for prop_name, value in self.properties: + prop = entity.add_property() + prop.set_name(prop_name) + prop.set_meaning(entity_pb.Property.INDEX_VALUE) + # TODO: Check if this is sometimes True. + prop.set_multiple(False) + prop.mutable_value().MergeFrom(value) + + return entity + + def cursor_result(self, ordered_props): + compiled_cursor = datastore_pb.CompiledCursor() + position = compiled_cursor.add_position() + position.mutable_key().MergeFrom(self.key) + position.set_start_inclusive(False) + for prop_name, value in self.properties: + if prop_name not in ordered_props: + continue + + index_value = position.add_indexvalue() + index_value.set_property(prop_name) + index_value.mutable_value().MergeFrom(value) + + return compiled_cursor + + +class IndexSlice(object): + """ Encapsulates details about an index range in a way that's mutable. """ + __slots__ = ['_directory_prefix', '_order_info', '_ancestor', '_start_parts', + '_stop_parts'] + + def __init__(self, directory_prefix, order_info, ancestor=False): + self._directory_prefix = directory_prefix + self._order_info = order_info + self._ancestor = ancestor + + self._start_parts = [self._directory_prefix] + self._stop_parts = [self._directory_prefix, b'\xFF'] + + @property + def start(self): + return first_gt_or_equal(b''.join(self._start_parts)) + + @property + def stop(self): + return first_gt_or_equal(b''.join(self._stop_parts)) + + @property + def _expected_parts(self): + total = 1 # directory prefix + if self._ancestor: + total += 1 + + total += len(self._order_info) + total += 1 # path + total += 1 # commit versionstamp + return total + + def set_ancestor(self, ancestor_path): + if not ancestor_path: + return + + index = 1 if self._ancestor else -2 + if self._ancestor: + self._set_start(index, Path.pack(ancestor_path)) + self._set_stop(index, Path.pack(ancestor_path)) + self._set_stop(index + 1, b'\xFF') + else: + prefix = Path.pack(ancestor_path, omit_terminator=True) + self._set_start(index, prefix) + self._set_stop(index, prefix + b'\xFF') + + def apply_prop_filter(self, prop_name, op, value): + index, direction = self._prop_details(prop_name) + prop_reverse = direction == Query_Order.DESCENDING + encoded_value = encode_value(value, prop_reverse) + if op == Query_Filter.EQUAL: + self._set_start(index, encoded_value) + self._set_stop(index, encoded_value) + self._set_stop(index + 1, b'\xFF') + return + + if (op == Query_Filter.GREATER_THAN_OR_EQUAL and not prop_reverse or + op == Query_Filter.LESS_THAN_OR_EQUAL and prop_reverse): + self._set_start(index, encoded_value) + elif (op == Query_Filter.GREATER_THAN and not prop_reverse or + op == Query_Filter.LESS_THAN and prop_reverse): + self._set_start(index, encoded_value + b'\xFF') + elif (op == Query_Filter.LESS_THAN_OR_EQUAL and not prop_reverse or + op == Query_Filter.GREATER_THAN_OR_EQUAL and prop_reverse): + self._set_stop(index, encoded_value) + self._set_stop(index + 1, b'\xFF') + elif (op == Query_Filter.LESS_THAN and not prop_reverse or + op == Query_Filter.GREATER_THAN and prop_reverse): + self._set_stop(index, encoded_value) + else: + raise BadRequest(u'Unexpected filter operation') + + def apply_path_filter(self, op, path, ancestor_path=()): + if not isinstance(path, tuple): + path = Path.flatten(path) + + remaining_path = path[len(ancestor_path):] if self._ancestor else path + if not remaining_path: + raise InternalError(u'Path filter must be within ancestor') + + start = Path.pack(remaining_path, omit_terminator=True) + # Since the commit versionstamp could potentially start with 0xFF, this + # selection scans up to the next possible path value. + stop = start + six.int2byte(Path.MIN_ID_MARKER) + index = -2 + if op == Query_Filter.EQUAL: + self._set_start(index, start) + self._set_stop(index, stop) + self._set_stop(index + 1, b'\xFF') + return + + if op == Query_Filter.GREATER_THAN_OR_EQUAL: + self._set_start(index, start) + elif op == Query_Filter.GREATER_THAN: + self._set_start(index, stop) + elif op == Query_Filter.LESS_THAN_OR_EQUAL: + self._set_stop(index, stop) + elif op == Query_Filter.LESS_THAN: + self._set_stop(index, start) + else: + raise BadRequest(u'Unexpected filter operation') + + def apply_cursor(self, op, cursor, ancestor_path): + if op in (Query_Filter.GREATER_THAN_OR_EQUAL, Query_Filter.GREATER_THAN): + existing_parts = self._start_parts + else: + existing_parts = self._stop_parts + + for prop_name, direction in self._order_info: + cursor_prop = next((prop for prop in cursor.property_list() + if prop.name() == prop_name), None) + if cursor_prop is not None: + index = self._prop_details(prop_name)[0] + encoded_value = encode_value(cursor_prop.value(), + direction == Query_Order.DESCENDING) + self._update_parts(existing_parts, index, encoded_value) + + self.apply_path_filter(op, cursor.key().path(), ancestor_path) + + def _prop_details(self, prop_name): + prop_index = next( + (index for index, (name, direction) in enumerate(self._order_info) + if name == prop_name), None) + if prop_index is None: + raise InternalError(u'{} is not in index'.format(prop_name)) + + index = prop_index + 1 # Account for directory prefix. + if self._ancestor: + index += 1 + + return index, self._order_info[prop_index][1] + + def _update_parts(self, parts, index, new_value): + if index < 0: + index = self._expected_parts + index + + # Ensure fields are set in order. + if len(parts) < index: + raise BadRequest(u'Invalid filter combination') + + if len(parts) == index: + parts.append(new_value) + return + + if new_value == parts[index]: + return + + # If this field has already been set, ensure the new range is smaller. + candidate = parts[:index] + [new_value] + if parts is self._start_parts: + if b''.join(candidate) < b''.join(parts): + raise BadRequest(u'Invalid filter combination') + + self._start_parts = candidate + elif parts is self._stop_parts: + if b''.join(candidate) > b''.join(parts): + raise BadRequest(u'Invalid filter combination') + + self._stop_parts = candidate + + def _set_start(self, index, new_value): + return self._update_parts(self._start_parts, index, new_value) + + def _set_stop(self, index, new_value): + return self._update_parts(self._stop_parts, index, new_value) + + +class Index(object): + """ The base class for different datastore index types. """ + __slots__ = ['directory'] + + def __init__(self, directory): + self.directory = directory + + @property + def project_id(self): + return self.directory.get_path()[len(DS_ROOT)] + + @property + def versionstamp_slice(self): + """ The portion of keys that contain the commit versionstamp. """ + return slice(-VERSIONSTAMP_SIZE, None) + + @property + def prop_names(self): + return NotImplementedError() + + def get_slice(self, filter_props, ancestor_path=tuple(), start_cursor=None, + end_cursor=None, reverse_scan=False): + has_ancestor_field = getattr(self, 'ancestor', False) + order_info = getattr( + self, 'order_info', tuple((prop_name, Query_Order.ASCENDING) + for prop_name in self.prop_names)) + index_slice = IndexSlice( + self.directory.rawPrefix, order_info, ancestor=has_ancestor_field) + + # First, apply the ancestor filter if it comes first in the index. + if has_ancestor_field: + index_slice.set_ancestor(ancestor_path) + + # Second, apply property filters in the index's definition order. + ordered_filter_props = [] + for prop_name in self.prop_names: + filter_prop = next((filter_prop for filter_prop in filter_props + if filter_prop.name == prop_name), None) + if filter_prop is not None: + ordered_filter_props.append(filter_prop) + + for filter_prop in ordered_filter_props: + for op, value in filter_prop.filters: + index_slice.apply_prop_filter(filter_prop.name, op, value) + + # Third, apply the ancestor filter if it hasn't been applied yet. + if not has_ancestor_field: + index_slice.set_ancestor(ancestor_path) + + # Fourth, apply key property filters. + key_filter_props = [filter_prop for filter_prop in filter_props + if filter_prop.name == KEY_PROP] + for filter_prop in key_filter_props: + for op, path in filter_prop.filters: + index_slice.apply_path_filter(op, path, ancestor_path) + + # Finally, apply cursors. + if start_cursor is not None: + op = (Query_Filter.LESS_THAN if reverse_scan + else Query_Filter.GREATER_THAN) + index_slice.apply_cursor(op, start_cursor, ancestor_path) + + if end_cursor is not None: + op = (Query_Filter.GREATER_THAN_OR_EQUAL if reverse_scan + else Query_Filter.LESS_THAN_OR_EQUAL) + index_slice.apply_cursor(op, end_cursor, ancestor_path) + + return slice(index_slice.start, index_slice.stop) + + +class KindlessIndex(Index): + """ + A KindlessIndex handles the encoding and decoding details for kind index + entries. These are paths that point to entity keys. + + The FDB directory for a kindless index looks like + (, 'kindless-indexes', ). + + Within this directory, keys are encoded as + . + + The contains the entity path. See codecs.Path for encoding details. + + The is a 10-byte versionstamp that specifies the commit + version of the transaction that wrote the index entry. + """ + DIR_NAME = u'kindless-indexes' + + @property + def prop_names(self): + return () + + @property + def namespace(self): + return self.directory.get_path()[-1] + + def __repr__(self): + return u'KindlessIndex(%r)' % self.directory + + @classmethod + def directory_path(cls, project_id, namespace): + return project_id, cls.DIR_NAME, namespace + + def encode_key(self, path, commit_versionstamp): + key = b''.join([self.directory.rawPrefix, Path.pack(path), + commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) + if not commit_versionstamp: + key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) + + return key + + def decode(self, kv): + path = Path.unpack(kv.key, len(self.directory.rawPrefix))[0] + commit_versionstamp = kv.key[self.versionstamp_slice] + deleted_versionstamp = kv.value or None + return IndexEntry(self.project_id, self.namespace, path, + commit_versionstamp, deleted_versionstamp) + + +class KindIndex(Index): + """ + A KindIndex handles the encoding and decoding details for kind index entries. + These are paths grouped by kind that point to entity keys. + + The FDB directory for a kind index looks like + (, 'kind-indexes', , ). + + Within this directory, keys are encoded as + . + + The contains the entity path. See codecs.Path for encoding details. + + The is a 10-byte versionstamp that specifies the commit + version of the transaction that wrote the index entry. + """ + DIR_NAME = u'kind-indexes' + + @property + def namespace(self): + return self.directory.get_path()[-2] + + @property + def kind(self): + return self.directory.get_path()[-1] + + def __repr__(self): + return u'KindIndex(%r)' % self.directory + + @classmethod + def directory_path(cls, project_id, namespace, kind): + return project_id, cls.DIR_NAME, namespace, kind + + @classmethod + def section_path(cls, project_id): + return project_id, cls.DIR_NAME + + @property + def prop_names(self): + return () + + def encode_key(self, path, commit_versionstamp): + key = b''.join([self.directory.rawPrefix, Path.pack(path), + commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) + if not commit_versionstamp: + key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) + + return key + + def decode(self, kv): + path = Path.unpack(kv.key, len(self.directory.rawPrefix))[0] + commit_versionstamp = kv.key[self.versionstamp_slice] + deleted_versionstamp = kv.value or None + return IndexEntry(self.project_id, self.namespace, path, + commit_versionstamp, deleted_versionstamp) + + +class SinglePropIndex(Index): + """ + A SinglePropIndex handles the encoding and decoding details for single-prop + index entries. These are property values for a particular kind that point to + entity keys. + + The FDB directory for a single-prop index looks like + (, 'single-property-indexes', , , ). + + Within this directory, keys are encoded as + (, , ). + + The contains a property value. See the codecs module for encoding + details. + + The contains the entity path. See codecs.Path for encoding details. + + The is a 10-byte versionstamp that specifies the commit + version of the transaction that wrote the index entry. + """ + DIR_NAME = u'single-property-indexes' + + @property + def namespace(self): + return self.directory.get_path()[-3] + + @property + def kind(self): + return self.directory.get_path()[-2] + + @property + def prop_name(self): + return self.directory.get_path()[-1] + + @property + def prop_names(self): + return (self.directory.get_path()[-1],) + + def __repr__(self): + return u'SinglePropIndex(%r)' % self.directory + + @classmethod + def directory_path(cls, project_id, namespace, kind, prop_name): + return project_id, cls.DIR_NAME, namespace, kind, prop_name + + def encode_key(self, value, path, commit_versionstamp): + key = b''.join([self.directory.rawPrefix, encode_value(value), + Path.pack(path), + commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) + if not commit_versionstamp: + key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) + + return key + + def decode(self, kv): + value, pos = decode_value(kv.key, len(self.directory.rawPrefix)) + path = Path.unpack(kv.key, pos)[0] + commit_versionstamp = kv.key[self.versionstamp_slice] + deleted_versionstamp = kv.value or None + return PropertyEntry(self.project_id, self.namespace, path, self.prop_name, + value, commit_versionstamp, deleted_versionstamp) + + +class CompositeIndex(Index): + """ + A CompositeIndex handles the encoding and decoding details for composite + index entries. + + The FDB directory for a composite index looks like + (, 'composite-indexes', , ). + + Within this directory, keys are encoded as + (, , , + ). + + If the index definition requires an ancestor, the + contains an encoded tuple specifying the full or partial path of the entity's + ancestor. The number of entries written for ancestor composite indexes is + equal to the number of ancestor path elements. For example, an entity with + three path elements would be encoded with the following two entries: + (('Kind1', 'key1'), , ('Kind2', 'key2', 'key3'), ) + (('Kind1', 'key1, 'Kind2', 'key2'), , ('key3',), ) + + The portion contains the property values as defined by the + index. See the codecs module for encoding details. + + The is an encoded tuple containing the portion of the entity + path that isn't specified by the . If the index definition + does not require an ancestor, this contains the full path. + + The is a 10-byte versionstamp that specifies the commit + version of the transaction that wrote the index entry. + """ + __slots__ = ['kind', 'ancestor', 'order_info'] + + DIR_NAME = u'composite-indexes' + + def __init__(self, directory, kind, ancestor, order_info): + super(CompositeIndex, self).__init__(directory) + self.kind = kind + self.ancestor = ancestor + self.order_info = order_info + + @property + def id(self): + return int(self.directory.get_path()[-2]) + + @property + def namespace(self): + return self.directory.get_path()[-1] + + @property + def prop_names(self): + return tuple(prop_name for prop_name, _ in self.order_info) + + def __repr__(self): + return u'CompositeIndex(%r, %r, %r, %r)' % ( + self.directory, self.kind, self.ancestor, self.order_info) + + @classmethod + def directory_path(cls, project_id, index_id, namespace): + return project_id, cls.DIR_NAME, six.text_type(index_id), namespace + + def encode_key(self, ancestor_path, encoded_values, remaining_path, + commit_versionstamp): + ancestor_path = Path.pack(ancestor_path) if ancestor_path else b'' + remaining_path = Path.pack(remaining_path) + key = b''.join( + (self.directory.rawPrefix, ancestor_path) + tuple(encoded_values) + + (remaining_path, commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE)) + if not commit_versionstamp: + key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) + + return key + + def encode_keys(self, prop_list, path, commit_versionstamp): + encoded_values_by_prop = [] + for index_prop_name, direction in self.order_info: + reverse = direction == Query_Order.DESCENDING + encoded_values_by_prop.append( + tuple(encode_value(prop.value(), reverse) for prop in prop_list + if prop.name() == index_prop_name)) + + encoded_value_combos = itertools.product(*encoded_values_by_prop) + if not self.ancestor: + return tuple(self.encode_key((), values, path, commit_versionstamp) + for values in encoded_value_combos) + + keys = [] + for index in range(2, len(path), 2): + ancestor_path = path[:index] + remaining_path = path[index:] + keys.extend( + [self.encode_key(ancestor_path, values, remaining_path, + commit_versionstamp) + for values in encoded_value_combos]) + + return tuple(keys) + + def decode(self, kv): + pos = len(self.directory.rawPrefix) + properties = [] + if self.ancestor: + ancestor_path, pos = Path.unpack(kv.key, pos) + else: + ancestor_path = () + + for prop_name, direction in self.order_info: + value, pos = decode_value(kv.key, pos, + direction == Query_Order.DESCENDING) + properties.append((prop_name, value)) + + remaining_path = Path.unpack(kv.key, pos)[0] + path = ancestor_path + remaining_path + commit_versionstamp = kv.key[self.versionstamp_slice] + deleted_versionstamp = kv.value or None + return CompositeEntry(self.project_id, self.namespace, path, properties, + commit_versionstamp, deleted_versionstamp) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 8c5b585e85..010c46e14d 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -5,34 +5,28 @@ """ from __future__ import division -import itertools import logging -import monotonic import sys -import six from tornado import gen from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER from appscale.datastore.fdb.codecs import ( - decode_str, decode_value, encode_value, encode_versionstamp_index, Path) + decode_str, encode_versionstamp_index, Path) +from appscale.datastore.fdb.composite_indexes import CompositeIndexManager +from appscale.datastore.fdb.index_directories import ( + CompositeEntry, CompositeIndex, IndexEntry, KEY_PROP, KindIndex, + KindlessIndex, PropertyEntry, SinglePropIndex) from appscale.datastore.fdb.sdk import FindIndexToUse, ListCursor from appscale.datastore.fdb.utils import ( - format_prop_val, DS_ROOT, fdb, get_scatter_val, MAX_FDB_TX_DURATION, - ResultIterator, SCATTER_PROP, VERSIONSTAMP_SIZE) + fdb, get_scatter_val, ResultIterator, SCATTER_PROP, VERSIONSTAMP_SIZE) from appscale.datastore.dbconstants import BadRequest, InternalError -from appscale.datastore.index_manager import IndexInaccessible sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore import datastore_pb, entity_pb from google.appengine.datastore.datastore_pb import Query_Filter, Query_Order logger = logging.getLogger(__name__) -KEY_PROP = u'__key__' - -first_gt_or_equal = fdb.KeySelector.first_greater_or_equal - class FilterProperty(object): """ Encapsulates details for a FilterProperty that came from a query. """ @@ -141,147 +135,6 @@ def get_scan_direction(query, index): return Query_Order.DESCENDING -class IndexEntry(object): - """ Encapsulates details for an index entry. """ - __slots__ = ['project_id', 'namespace', 'path', 'commit_versionstamp', - 'deleted_versionstamp'] - - def __init__(self, project_id, namespace, path, commit_versionstamp, - deleted_versionstamp): - self.project_id = project_id - self.namespace = namespace - self.path = path - self.commit_versionstamp = commit_versionstamp - self.deleted_versionstamp = deleted_versionstamp - - @property - def kind(self): - return self.path[-2] - - @property - def key(self): - key = entity_pb.Reference() - key.set_app(self.project_id) - key.set_name_space(self.namespace) - key.mutable_path().MergeFrom(Path.decode(self.path)) - return key - - @property - def group(self): - group = entity_pb.Path() - group.add_element().MergeFrom(Path.decode_element(self.path[:2])) - return group - - def __repr__(self): - return u'IndexEntry(%r, %r, %r, %r, %r)' % ( - self.project_id, self.namespace, self.path, self.commit_versionstamp, - self.deleted_versionstamp) - - def key_result(self): - entity = entity_pb.EntityProto() - entity.mutable_key().MergeFrom(self.key) - entity.mutable_entity_group() - return entity - - def cursor_result(self, ordered_props): - compiled_cursor = datastore_pb.CompiledCursor() - position = compiled_cursor.add_position() - position.mutable_key().MergeFrom(self.key) - position.set_start_inclusive(False) - return compiled_cursor - - -class PropertyEntry(IndexEntry): - """ Encapsulates details for a single-property index entry. """ - __slots__ = ['prop_name', 'value'] - - def __init__(self, project_id, namespace, path, prop_name, value, - commit_versionstamp, deleted_versionstamp): - super(PropertyEntry, self).__init__( - project_id, namespace, path, commit_versionstamp, deleted_versionstamp) - self.prop_name = prop_name - self.value = value - - def __repr__(self): - return u'PropertyEntry(%r, %r, %r, %r, %r, %r, %r)' % ( - self.project_id, self.namespace, self.path, self.prop_name, self.value, - self.commit_versionstamp, self.deleted_versionstamp) - - def __str__(self): - return u'PropertyEntry(%s, %r, %s, %s, %s, %r, %r)' % ( - self.project_id, self.namespace, self.path, self.prop_name, - format_prop_val(self.value), self.commit_versionstamp, - self.deleted_versionstamp) - - def prop_result(self): - entity = entity_pb.EntityProto() - entity.mutable_key().MergeFrom(self.key) - entity.mutable_entity_group().MergeFrom(self.group) - prop = entity.add_property() - prop.set_name(self.prop_name) - prop.set_meaning(entity_pb.Property.INDEX_VALUE) - prop.set_multiple(False) - prop.mutable_value().MergeFrom(self.value) - return entity - - def cursor_result(self, ordered_props): - compiled_cursor = datastore_pb.CompiledCursor() - position = compiled_cursor.add_position() - position.mutable_key().MergeFrom(self.key) - position.set_start_inclusive(False) - if self.prop_name in ordered_props: - index_value = position.add_indexvalue() - index_value.set_property(self.prop_name) - index_value.mutable_value().MergeFrom(self.value) - - return compiled_cursor - - -class CompositeEntry(IndexEntry): - """ Encapsulates details for a composite index entry. """ - __slots__ = ['properties'] - - def __init__(self, project_id, namespace, path, properties, - commit_versionstamp, deleted_versionstamp): - super(CompositeEntry, self).__init__( - project_id, namespace, path, commit_versionstamp, deleted_versionstamp) - self.properties = properties - - def __repr__(self): - return u'CompositeEntry(%r, %r, %r, %r, %r, %r)' % ( - self.project_id, self.namespace, self.path, self.properties, - self.commit_versionstamp, self.deleted_versionstamp) - - def prop_result(self): - entity = entity_pb.EntityProto() - entity.mutable_key().MergeFrom(self.key) - entity.mutable_entity_group().MergeFrom(self.group) - for prop_name, value in self.properties: - prop = entity.add_property() - prop.set_name(prop_name) - prop.set_meaning(entity_pb.Property.INDEX_VALUE) - # TODO: Check if this is sometimes True. - prop.set_multiple(False) - prop.mutable_value().MergeFrom(value) - - return entity - - def cursor_result(self, ordered_props): - compiled_cursor = datastore_pb.CompiledCursor() - position = compiled_cursor.add_position() - position.mutable_key().MergeFrom(self.key) - position.set_start_inclusive(False) - for prop_name, value in self.properties: - if prop_name not in ordered_props: - continue - - index_value = position.add_indexvalue() - index_value.set_property(prop_name) - index_value.mutable_value().MergeFrom(value) - - return compiled_cursor - - class IndexIterator(object): """ Returns pages of index entry results. It ignores Key-Values that do not apply @@ -495,527 +348,6 @@ def _usable(self, entry): return entry.deleted_versionstamp is None -class IndexSlice(object): - """ Encapsulates details about an index range in a way that's mutable. """ - __slots__ = ['_directory_prefix', '_order_info', '_ancestor', '_start_parts', - '_stop_parts'] - - def __init__(self, directory_prefix, order_info, ancestor=False): - self._directory_prefix = directory_prefix - self._order_info = order_info - self._ancestor = ancestor - - self._start_parts = [self._directory_prefix] - self._stop_parts = [self._directory_prefix, b'\xFF'] - - @property - def start(self): - return first_gt_or_equal(b''.join(self._start_parts)) - - @property - def stop(self): - return first_gt_or_equal(b''.join(self._stop_parts)) - - @property - def _expected_parts(self): - total = 1 # directory prefix - if self._ancestor: - total += 1 - - total += len(self._order_info) - total += 1 # path - total += 1 # commit versionstamp - return total - - def set_ancestor(self, ancestor_path): - if not ancestor_path: - return - - index = 1 if self._ancestor else -2 - if self._ancestor: - self._set_start(index, Path.pack(ancestor_path)) - self._set_stop(index, Path.pack(ancestor_path)) - self._set_stop(index + 1, b'\xFF') - else: - prefix = Path.pack(ancestor_path, omit_terminator=True) - self._set_start(index, prefix) - self._set_stop(index, prefix + b'\xFF') - - def apply_prop_filter(self, prop_name, op, value): - index, direction = self._prop_details(prop_name) - prop_reverse = direction == Query_Order.DESCENDING - encoded_value = encode_value(value, prop_reverse) - if op == Query_Filter.EQUAL: - self._set_start(index, encoded_value) - self._set_stop(index, encoded_value) - self._set_stop(index + 1, b'\xFF') - return - - if (op == Query_Filter.GREATER_THAN_OR_EQUAL and not prop_reverse or - op == Query_Filter.LESS_THAN_OR_EQUAL and prop_reverse): - self._set_start(index, encoded_value) - elif (op == Query_Filter.GREATER_THAN and not prop_reverse or - op == Query_Filter.LESS_THAN and prop_reverse): - self._set_start(index, encoded_value + b'\xFF') - elif (op == Query_Filter.LESS_THAN_OR_EQUAL and not prop_reverse or - op == Query_Filter.GREATER_THAN_OR_EQUAL and prop_reverse): - self._set_stop(index, encoded_value) - self._set_stop(index + 1, b'\xFF') - elif (op == Query_Filter.LESS_THAN and not prop_reverse or - op == Query_Filter.GREATER_THAN and prop_reverse): - self._set_stop(index, encoded_value) - else: - raise BadRequest(u'Unexpected filter operation') - - def apply_path_filter(self, op, path, ancestor_path=()): - if not isinstance(path, tuple): - path = Path.flatten(path) - - remaining_path = path[len(ancestor_path):] if self._ancestor else path - if not remaining_path: - raise InternalError(u'Path filter must be within ancestor') - - start = Path.pack(remaining_path, omit_terminator=True) - # Since the commit versionstamp could potentially start with 0xFF, this - # selection scans up to the next possible path value. - stop = start + six.int2byte(Path.MIN_ID_MARKER) - index = -2 - if op == Query_Filter.EQUAL: - self._set_start(index, start) - self._set_stop(index, stop) - self._set_stop(index + 1, b'\xFF') - return - - if op == Query_Filter.GREATER_THAN_OR_EQUAL: - self._set_start(index, start) - elif op == Query_Filter.GREATER_THAN: - self._set_start(index, stop) - elif op == Query_Filter.LESS_THAN_OR_EQUAL: - self._set_stop(index, stop) - elif op == Query_Filter.LESS_THAN: - self._set_stop(index, start) - else: - raise BadRequest(u'Unexpected filter operation') - - def apply_cursor(self, op, cursor, ancestor_path): - if op in (Query_Filter.GREATER_THAN_OR_EQUAL, Query_Filter.GREATER_THAN): - existing_parts = self._start_parts - else: - existing_parts = self._stop_parts - - for prop_name, direction in self._order_info: - cursor_prop = next((prop for prop in cursor.property_list() - if prop.name() == prop_name), None) - if cursor_prop is not None: - index = self._prop_details(prop_name)[0] - encoded_value = encode_value(cursor_prop.value(), - direction == Query_Order.DESCENDING) - self._update_parts(existing_parts, index, encoded_value) - - self.apply_path_filter(op, cursor.key().path(), ancestor_path) - - def _prop_details(self, prop_name): - prop_index = next( - (index for index, (name, direction) in enumerate(self._order_info) - if name == prop_name), None) - if prop_index is None: - raise InternalError(u'{} is not in index'.format(prop_name)) - - index = prop_index + 1 # Account for directory prefix. - if self._ancestor: - index += 1 - - return index, self._order_info[prop_index][1] - - def _update_parts(self, parts, index, new_value): - if index < 0: - index = self._expected_parts + index - - # Ensure fields are set in order. - if len(parts) < index: - raise BadRequest(u'Invalid filter combination') - - if len(parts) == index: - parts.append(new_value) - return - - if new_value == parts[index]: - return - - # If this field has already been set, ensure the new range is smaller. - candidate = parts[:index] + [new_value] - if parts is self._start_parts: - if b''.join(candidate) < b''.join(parts): - raise BadRequest(u'Invalid filter combination') - - self._start_parts = candidate - elif parts is self._stop_parts: - if b''.join(candidate) > b''.join(parts): - raise BadRequest(u'Invalid filter combination') - - self._stop_parts = candidate - - def _set_start(self, index, new_value): - return self._update_parts(self._start_parts, index, new_value) - - def _set_stop(self, index, new_value): - return self._update_parts(self._stop_parts, index, new_value) - - -class Index(object): - """ The base class for different datastore index types. """ - __slots__ = ['directory'] - - def __init__(self, directory): - self.directory = directory - - @property - def project_id(self): - return self.directory.get_path()[len(DS_ROOT)] - - @property - def versionstamp_slice(self): - """ The portion of keys that contain the commit versionstamp. """ - return slice(-VERSIONSTAMP_SIZE, None) - - @property - def prop_names(self): - return NotImplementedError() - - def get_slice(self, filter_props, ancestor_path=tuple(), start_cursor=None, - end_cursor=None, reverse_scan=False): - has_ancestor_field = getattr(self, 'ancestor', False) - order_info = getattr( - self, 'order_info', tuple((prop_name, Query_Order.ASCENDING) - for prop_name in self.prop_names)) - index_slice = IndexSlice( - self.directory.rawPrefix, order_info, ancestor=has_ancestor_field) - - # First, apply the ancestor filter if it comes first in the index. - if has_ancestor_field: - index_slice.set_ancestor(ancestor_path) - - # Second, apply property filters in the index's definition order. - ordered_filter_props = [] - for prop_name in self.prop_names: - filter_prop = next((filter_prop for filter_prop in filter_props - if filter_prop.name == prop_name), None) - if filter_prop is not None: - ordered_filter_props.append(filter_prop) - - for filter_prop in ordered_filter_props: - for op, value in filter_prop.filters: - index_slice.apply_prop_filter(filter_prop.name, op, value) - - # Third, apply the ancestor filter if it hasn't been applied yet. - if not has_ancestor_field: - index_slice.set_ancestor(ancestor_path) - - # Fourth, apply key property filters. - key_filter_props = [filter_prop for filter_prop in filter_props - if filter_prop.name == KEY_PROP] - for filter_prop in key_filter_props: - for op, path in filter_prop.filters: - index_slice.apply_path_filter(op, path, ancestor_path) - - # Finally, apply cursors. - if start_cursor is not None: - op = (Query_Filter.LESS_THAN if reverse_scan - else Query_Filter.GREATER_THAN) - index_slice.apply_cursor(op, start_cursor, ancestor_path) - - if end_cursor is not None: - op = (Query_Filter.GREATER_THAN_OR_EQUAL if reverse_scan - else Query_Filter.LESS_THAN_OR_EQUAL) - index_slice.apply_cursor(op, end_cursor, ancestor_path) - - return slice(index_slice.start, index_slice.stop) - - -class KindlessIndex(Index): - """ - A KindlessIndex handles the encoding and decoding details for kind index - entries. These are paths that point to entity keys. - - The FDB directory for a kindless index looks like - (, 'kindless-indexes', ). - - Within this directory, keys are encoded as + . - - The contains the entity path. See codecs.Path for encoding details. - - The is a 10-byte versionstamp that specifies the commit - version of the transaction that wrote the index entry. - """ - DIR_NAME = u'kindless-indexes' - - @property - def prop_names(self): - return () - - @property - def namespace(self): - return self.directory.get_path()[-1] - - def __repr__(self): - return u'KindlessIndex(%r)' % self.directory - - @classmethod - def directory_path(cls, project_id, namespace): - return project_id, cls.DIR_NAME, namespace - - def encode_key(self, path, commit_versionstamp): - key = b''.join([self.directory.rawPrefix, Path.pack(path), - commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) - if not commit_versionstamp: - key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) - - return key - - def decode(self, kv): - path = Path.unpack(kv.key, len(self.directory.rawPrefix))[0] - commit_versionstamp = kv.key[self.versionstamp_slice] - deleted_versionstamp = kv.value or None - return IndexEntry(self.project_id, self.namespace, path, - commit_versionstamp, deleted_versionstamp) - - -class KindIndex(Index): - """ - A KindIndex handles the encoding and decoding details for kind index entries. - These are paths grouped by kind that point to entity keys. - - The FDB directory for a kind index looks like - (, 'kind-indexes', , ). - - Within this directory, keys are encoded as + . - - The contains the entity path. See codecs.Path for encoding details. - - The is a 10-byte versionstamp that specifies the commit - version of the transaction that wrote the index entry. - """ - DIR_NAME = u'kind-indexes' - - @property - def namespace(self): - return self.directory.get_path()[-2] - - @property - def kind(self): - return self.directory.get_path()[-1] - - def __repr__(self): - return u'KindIndex(%r)' % self.directory - - @classmethod - def directory_path(cls, project_id, namespace, kind): - return project_id, cls.DIR_NAME, namespace, kind - - @classmethod - def section_path(cls, project_id): - return project_id, cls.DIR_NAME - - @property - def prop_names(self): - return () - - def encode_key(self, path, commit_versionstamp): - key = b''.join([self.directory.rawPrefix, Path.pack(path), - commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) - if not commit_versionstamp: - key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) - - return key - - def decode(self, kv): - path = Path.unpack(kv.key, len(self.directory.rawPrefix))[0] - commit_versionstamp = kv.key[self.versionstamp_slice] - deleted_versionstamp = kv.value or None - return IndexEntry(self.project_id, self.namespace, path, - commit_versionstamp, deleted_versionstamp) - - -class SinglePropIndex(Index): - """ - A SinglePropIndex handles the encoding and decoding details for single-prop - index entries. These are property values for a particular kind that point to - entity keys. - - The FDB directory for a single-prop index looks like - (, 'single-property-indexes', , , ). - - Within this directory, keys are encoded as - (, , ). - - The contains a property value. See the codecs module for encoding - details. - - The contains the entity path. See codecs.Path for encoding details. - - The is a 10-byte versionstamp that specifies the commit - version of the transaction that wrote the index entry. - """ - DIR_NAME = u'single-property-indexes' - - @property - def namespace(self): - return self.directory.get_path()[-3] - - @property - def kind(self): - return self.directory.get_path()[-2] - - @property - def prop_name(self): - return self.directory.get_path()[-1] - - @property - def prop_names(self): - return (self.directory.get_path()[-1],) - - def __repr__(self): - return u'SinglePropIndex(%r)' % self.directory - - @classmethod - def directory_path(cls, project_id, namespace, kind, prop_name): - return project_id, cls.DIR_NAME, namespace, kind, prop_name - - def encode_key(self, value, path, commit_versionstamp): - key = b''.join([self.directory.rawPrefix, encode_value(value), - Path.pack(path), - commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE]) - if not commit_versionstamp: - key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) - - return key - - def decode(self, kv): - value, pos = decode_value(kv.key, len(self.directory.rawPrefix)) - path = Path.unpack(kv.key, pos)[0] - commit_versionstamp = kv.key[self.versionstamp_slice] - deleted_versionstamp = kv.value or None - return PropertyEntry(self.project_id, self.namespace, path, self.prop_name, - value, commit_versionstamp, deleted_versionstamp) - - -class CompositeIndex(Index): - """ - A CompositeIndex handles the encoding and decoding details for composite - index entries. - - The FDB directory for a composite index looks like - (, 'composite-indexes', , ). - - Within this directory, keys are encoded as - (, , , - ). - - If the index definition requires an ancestor, the - contains an encoded tuple specifying the full or partial path of the entity's - ancestor. The number of entries written for ancestor composite indexes is - equal to the number of ancestor path elements. For example, an entity with - three path elements would be encoded with the following two entries: - (('Kind1', 'key1'), , ('Kind2', 'key2', 'key3'), ) - (('Kind1', 'key1, 'Kind2', 'key2'), , ('key3',), ) - - The portion contains the property values as defined by the - index. See the codecs module for encoding details. - - The is an encoded tuple containing the portion of the entity - path that isn't specified by the . If the index definition - does not require an ancestor, this contains the full path. - - The is a 10-byte versionstamp that specifies the commit - version of the transaction that wrote the index entry. - """ - __slots__ = ['kind', 'ancestor', 'order_info'] - - DIR_NAME = u'composite-indexes' - - def __init__(self, directory, kind, ancestor, order_info): - super(CompositeIndex, self).__init__(directory) - self.kind = kind - self.ancestor = ancestor - self.order_info = order_info - - @property - def id(self): - return int(self.directory.get_path()[-2]) - - @property - def namespace(self): - return self.directory.get_path()[-1] - - @property - def prop_names(self): - return tuple(prop_name for prop_name, _ in self.order_info) - - def __repr__(self): - return u'CompositeIndex(%r, %r, %r, %r)' % ( - self.directory, self.kind, self.ancestor, self.order_info) - - @classmethod - def directory_path(cls, project_id, index_id, namespace): - return project_id, cls.DIR_NAME, six.text_type(index_id), namespace - - def encode_key(self, ancestor_path, encoded_values, remaining_path, - commit_versionstamp): - ancestor_path = Path.pack(ancestor_path) if ancestor_path else b'' - remaining_path = Path.pack(remaining_path) - key = b''.join( - (self.directory.rawPrefix, ancestor_path) + tuple(encoded_values) + - (remaining_path, commit_versionstamp or b'\x00' * VERSIONSTAMP_SIZE)) - if not commit_versionstamp: - key += encode_versionstamp_index(len(key) - VERSIONSTAMP_SIZE) - - return key - - def encode_keys(self, prop_list, path, commit_versionstamp): - encoded_values_by_prop = [] - for index_prop_name, direction in self.order_info: - reverse = direction == Query_Order.DESCENDING - encoded_values_by_prop.append( - tuple(encode_value(prop.value(), reverse) for prop in prop_list - if prop.name() == index_prop_name)) - - encoded_value_combos = itertools.product(*encoded_values_by_prop) - if not self.ancestor: - return tuple(self.encode_key((), values, path, commit_versionstamp) - for values in encoded_value_combos) - - keys = [] - for index in range(2, len(path), 2): - ancestor_path = path[:index] - remaining_path = path[index:] - keys.extend( - [self.encode_key(ancestor_path, values, remaining_path, - commit_versionstamp) - for values in encoded_value_combos]) - - return tuple(keys) - - def decode(self, kv): - pos = len(self.directory.rawPrefix) - properties = [] - if self.ancestor: - ancestor_path, pos = Path.unpack(kv.key, pos) - else: - ancestor_path = () - - for prop_name, direction in self.order_info: - value, pos = decode_value(kv.key, pos, - direction == Query_Order.DESCENDING) - properties.append((prop_name, value)) - - remaining_path = Path.unpack(kv.key, pos)[0] - path = ancestor_path + remaining_path - commit_versionstamp = kv.key[self.versionstamp_slice] - deleted_versionstamp = kv.value or None - return CompositeEntry(self.project_id, self.namespace, path, properties, - commit_versionstamp, deleted_versionstamp) - - class IndexManager(object): """ The IndexManager is the main interface that clients can use to interact with From acdf086b6e549fc71e26008a2eb37dbd3938d8c2 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 27 Sep 2019 15:27:20 -0700 Subject: [PATCH 157/221] Create common functions for validating cache This allows other modules to implement their own cache based on the global metadata key. --- AppDB/appscale/datastore/fdb/cache.py | 45 +++++++++++++++++---------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/cache.py b/AppDB/appscale/datastore/fdb/cache.py index 1d5a2a36ef..2ef70c2d67 100644 --- a/AppDB/appscale/datastore/fdb/cache.py +++ b/AppDB/appscale/datastore/fdb/cache.py @@ -12,14 +12,31 @@ logger = logging.getLogger(__name__) +# The location of the metadata version key. The value of this key is passed +# to FDB clients at the start of every transaction. +METADATA_KEY = b'\xff/metadataVersion' + + +@fdb.transactional +def ensure_metadata_key(tr): + current_version = tr[METADATA_KEY] + if not current_version.present(): + logger.info(u'Setting metadata key for the first time') + tr.set_versionstamped_value(METADATA_KEY, b'\x00' * 14) + + +@gen.coroutine +def current_metadata_version(tr, tornado_fdb): + current_version = yield tornado_fdb.get(tr, METADATA_KEY) + if not current_version.present(): + raise InternalError(u'The FDB cluster metadata key is missing') + + raise gen.Return(current_version.value) + class DirectoryCache(object): """ A simple cache that keeps track of directory prefixes. """ - # The location of the metadata version key. The value of this key is passed - # to FDB clients at the start of every transaction. - METADATA_KEY = b'\xff/metadataVersion' - # The number of items to keep in the cache. SIZE = 2048 @@ -48,16 +65,13 @@ def __contains__(self, item): return item in self._directory_dict def initialize(self): - self._ensure_metadata_key(self._db) + ensure_metadata_key(self._db) @gen.coroutine def get(self, tr, key): - current_version = yield self._tornado_fdb.get(tr, self.METADATA_KEY) - if not current_version.present(): - raise InternalError(u'The FDB cluster metadata key is missing') - - if current_version.value != self._metadata_version: - self._metadata_version = current_version.value + current_version = yield current_metadata_version(tr, self._tornado_fdb) + if current_version != self._metadata_version: + self._metadata_version = current_version self._directory_dict.clear() self._directory_keys.clear() @@ -71,9 +85,6 @@ def get(self, tr, key): raise gen.Return(self[full_key]) - @fdb.transactional - def _ensure_metadata_key(self, tr): - current_version = tr[self.METADATA_KEY] - if not current_version.present(): - logger.info(u'Setting metadata key for the first time') - tr.set_versionstamped_value(self.METADATA_KEY, b'\x00' * 14) + @staticmethod + def invalidate(tr): + tr.set_versionstamped_value(METADATA_KEY, b'\x00' * 14) From 13747dd1587d8e6e7dfb8dabb9596e6d47be95fd Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 27 Sep 2019 15:31:29 -0700 Subject: [PATCH 158/221] Store composite index metadata in FDB This allows index read and write operations to be consistent with the index definition state. The CompositeIndexManager keeps track of what indexes are defined for a project and which ones can be queried. An elected datastore server will watch for schema changes and backpopulate indexes when necessary. --- .../datastore/datastore_distributed.py | 1 + .../datastore/fdb/composite_indexes.py | 268 ++++++++++++++++++ AppDB/appscale/datastore/fdb/fdb_datastore.py | 32 ++- AppDB/appscale/datastore/fdb/indexes.py | 110 ++----- AppDB/appscale/datastore/fdb/utils.py | 8 + AppDB/appscale/datastore/scripts/datastore.py | 9 +- common/appscale/common/datastore_index.py | 23 +- 7 files changed, 339 insertions(+), 112 deletions(-) create mode 100644 AppDB/appscale/datastore/fdb/composite_indexes.py diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py index 6cdccd876e..5f5a01ade5 100644 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ b/AppDB/appscale/datastore/datastore_distributed.py @@ -3272,6 +3272,7 @@ def get_indexes(self, project_id): return indexes + @gen.coroutine def add_indexes(self, project_id, indexes): """ Adds composite index definitions to a project. diff --git a/AppDB/appscale/datastore/fdb/composite_indexes.py b/AppDB/appscale/datastore/fdb/composite_indexes.py new file mode 100644 index 0000000000..fb22ede09a --- /dev/null +++ b/AppDB/appscale/datastore/fdb/composite_indexes.py @@ -0,0 +1,268 @@ +import logging +import random +import sys +import uuid +from collections import namedtuple + +import monotonic +from tornado import gen +from tornado.ioloop import IOLoop + +from appscale.common.datastore_index import DatastoreIndex +from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.dbconstants import InternalError +from appscale.datastore.fdb.cache import ( + current_metadata_version, ensure_metadata_key) +from appscale.datastore.fdb.codecs import decode_str +from appscale.datastore.fdb.index_directories import CompositeIndex, KindIndex +from appscale.datastore.fdb.polling_lock import PollingLock +from appscale.datastore.fdb.utils import fdb, MAX_FDB_TX_DURATION, ResultIterator + +sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.datastore import entity_pb + +logger = logging.getLogger(__name__) + +# Though there is only one metadata version at any given time, it can be +# expensive to fetch definitions for all projects when a request only needs +# definitions for a single project. Therefore, the metadata version is cached +# alongside the index definitions for each project. +ProjectDefinitions = namedtuple('ProjectDefinitions', + ['version', 'definitions']) + + +class IndexMetadataDirectory(object): + DIR_NAME = u'index-definitions' + + __slots__ = ['directory'] + + def __init__(self, directory): + self.directory = directory + + @classmethod + def directory_path(cls, project_id): + return project_id, cls.DIR_NAME + + def encode_key(self, index_id): + if index_id is None: + raise InternalError(u'Index definition must have an assigned ID') + + return self.directory.pack((index_id,)) + + def encode(self, definition): + return self.encode_key(definition.id), definition.to_pb().Encode() + + def decode(self, kvs): + return tuple(DatastoreIndex.from_pb(entity_pb.CompositeIndex(kv.value)) + for kv in kvs) + + def get_slice(self): + return self.directory.range() + + +class CompositeIndexManager(object): + _LOCK_KEY = u'composite-index-manager-lock' + + _REBUILD_TRIGGER_KEY = u'rebuild-trigger' + + def __init__(self, db, tornado_fdb, data_manager, directory_cache): + self._db = db + self._tornado_fdb = tornado_fdb + self._data_manager = data_manager + self._directory_cache = directory_cache + self._trigger_key = None + + # By project ID + self._cache = {} + + lock_key = self._directory_cache.root_dir.pack((self._LOCK_KEY,)) + self._lock = PollingLock(self._db, self._tornado_fdb, lock_key) + + def start(self): + """ Starts the garbage collection work. """ + ensure_metadata_key(self._db) + self._trigger_key = self._directory_cache.root_dir.pack( + (self._REBUILD_TRIGGER_KEY,)) + self._lock.start() + IOLoop.current().spawn_callback(self._build_indexes) + + @gen.coroutine + def get_definitions(self, tr, project_id): + """ Fetches index definitions for a given project. """ + current_version = yield current_metadata_version(tr, self._tornado_fdb) + cached_definitions = self._cache.get( + project_id, ProjectDefinitions(None, ())) + if current_version != cached_definitions.version: + directory = yield self._get_directory(tr, project_id) + results = yield ResultIterator(tr, self._tornado_fdb, + directory.get_slice()).list() + self._cache[project_id] = ProjectDefinitions( + current_version, directory.decode(results)) + + raise gen.Return(self._cache[project_id].definitions) + + @gen.coroutine + def merge(self, tr, project_id, new_indexes): + """ + Adds new indexes to a project. Existing indexes that match are ignored. + """ + existing_indexes = yield self.get_definitions(tr, project_id) + + # Disregard index entries that already exist. + existing_index_defs = {index.encoded_def for index in existing_indexes} + new_indexes = [index for index in new_indexes + if index.encoded_def not in existing_index_defs] + + if not new_indexes: + return + + # Assign each new index an ID and store it. + directory = yield self._get_directory(tr, project_id) + for new_index in new_indexes: + if new_index.id is None: + # The ID must be a positive number that fits in a signed 64-bit int. + new_index.id = uuid.uuid1().int >> 65 + + key, value = directory.encode(new_index) + tr[key] = value + + self._mark_schema_change(tr) + + @gen.coroutine + def update_composite_index(self, project_id, index_pb, cursor=(None, None)): + start_ns, start_key = cursor + project_id = decode_str(project_id) + kind = decode_str(index_pb.definition().entity_type()) + tr = self._db.create_transaction() + deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 + kind_indexes = yield self._indexes_for_kind(tr, project_id, kind) + for kind_index in kind_indexes: + if start_ns is not None and kind_index.namespace < start_ns: + continue + + composite_path = CompositeIndex.directory_path( + project_id, index_pb.id(), kind_index.namespace) + composite_dir = yield self._directory_cache.get(tr, composite_path) + order_info = tuple( + (decode_str(prop.name()), prop.direction()) + for prop in index_pb.definition().property_list()) + composite_index = CompositeIndex( + composite_dir, kind, index_pb.definition().ancestor(), order_info) + + logger.info(u'Backfilling {}'.format(composite_index)) + remaining_range = kind_index.directory.range() + if start_key is not None: + remaining_range = slice( + fdb.KeySelector.first_greater_than(start_key), remaining_range.stop) + start_key = None + + result_iterator = ResultIterator(tr, self._tornado_fdb, remaining_range) + while True: + results, more_results = yield result_iterator.next_page() + index_entries = [kind_index.decode(result) for result in results] + version_entries = yield [self._data_manager.get_entry(tr, entry) + for entry in index_entries] + for index_entry, version_entry in zip(index_entries, version_entries): + new_keys = composite_index.encode_keys( + version_entry.decoded.property_list(), version_entry.path, + version_entry.commit_versionstamp) + for new_key in new_keys: + tr[new_key] = index_entry.deleted_versionstamp or b'' + + if not more_results: + logger.info(u'Finished backfilling {}'.format(composite_index)) + break + + if monotonic.monotonic() > deadline: + try: + yield self._tornado_fdb.commit(tr) + cursor = (kind_index.namespace, results[-1].key) + except fdb.FDBError as fdb_error: + logger.warning(u'Error while updating index: {}'.format(fdb_error)) + tr.on_error(fdb_error).wait() + + yield self.update_composite_index(project_id, index_pb, cursor) + return + + yield self._tornado_fdb.commit(tr) + + tr = self._db.create_transaction() + metadata_dir = yield self._get_directory(tr, project_id) + current_definition = yield self._tornado_fdb.get( + tr, metadata_dir.encode_key(index_pb.id())) + if not current_definition.present(): + return + + index = DatastoreIndex.from_pb( + entity_pb.CompositeIndex(current_definition.value)) + index.ready = True + key, value = metadata_dir.encode(index) + tr[key] = value + self._mark_schema_change(tr) + + yield self._tornado_fdb.commit(tr) + logger.info(u'{} is ready'.format(index)) + + @gen.coroutine + def _build_indexes(self): + while True: + try: + yield self._lock.acquire() + tr = self._db.create_transaction() + + # TODO: This can be made async. + project_ids = self._directory_cache.root_dir.list(tr) + project_definitions = yield [self.get_definitions(tr, project_id) + for project_id in project_ids] + to_rebuild = None + for project_id, definitions in zip(project_ids, project_definitions): + to_rebuild = next((definition for definition in definitions + if not definition.ready), None) + if to_rebuild is not None: + break + + if to_rebuild is None: + watch_future = self._tornado_fdb.watch(tr, self._trigger_key) + yield self._tornado_fdb.commit(tr) + yield watch_future + continue + + yield self.update_composite_index(to_rebuild.project_id, + to_rebuild.to_pb()) + except Exception: + logger.exception(u'Unexpected error while rebuilding indexes') + yield gen.sleep(random.random() * 20) + + @gen.coroutine + def _indexes_for_kind(self, tr, project_id, kind): + section_path = KindIndex.section_path(project_id) + section_dir = yield self._directory_cache.get(tr, section_path) + # TODO: This can be made async. + try: + namespaces = section_dir.list(tr) + except ValueError: + # There are no kind indexes that this transaction can see. + raise gen.Return([]) + + indexes = [] + for namespace in namespaces: + ns_dir = section_dir.open(tr, (namespace,)) + try: + kind_dir = ns_dir.open(tr, (kind,)) + except ValueError: + continue + + indexes.append(KindIndex(kind_dir)) + + raise gen.Return(indexes) + + def _mark_schema_change(self, tr): + # Notify leader that at least one index needs to be rebuilt. + tr.set_versionstamped_value(self._trigger_key, b'\x00' * 14) + self._directory_cache.invalidate(tr) + + @gen.coroutine + def _get_directory(self, tr, project_id): + path = IndexMetadataDirectory.directory_path(project_id) + directory = yield self._directory_cache.get(tr, path) + raise gen.Return(IndexMetadataDirectory(directory)) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 79a653147d..2ed7491f08 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -19,7 +19,6 @@ from tornado.ioloop import IOLoop from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.common.datastore_index import merge_indexes from appscale.datastore.dbconstants import ( BadRequest, ConcurrentModificationException, InternalError) from appscale.datastore.fdb.cache import DirectoryCache @@ -43,13 +42,13 @@ class FDBDatastore(object): """ A datastore implementation that uses FoundationDB. """ def __init__(self): - self.index_manager = None self._data_manager = None self._db = None self._scattered_allocator = ScatteredAllocator() self._tornado_fdb = None self._tx_manager = None self._gc = None + self._index_manager = None def start(self, fdb_clusterfile): self._db = fdb.open(fdb_clusterfile) @@ -59,12 +58,15 @@ def start(self, fdb_clusterfile): directory_cache.initialize() self._data_manager = DataManager(self._tornado_fdb, directory_cache) - self.index_manager = IndexManager( - self._db, self._tornado_fdb, self._data_manager, directory_cache) self._tx_manager = TransactionManager( self._db, self._tornado_fdb, directory_cache) + + self._index_manager = IndexManager( + self._db, self._tornado_fdb, self._data_manager, directory_cache) + self._index_manager.start() + self._gc = GarbageCollector( - self._db, self._tornado_fdb, self._data_manager, self.index_manager, + self._db, self._tornado_fdb, self._data_manager, self._index_manager, self._tx_manager, directory_cache) self._gc.start() @@ -231,10 +233,10 @@ def _dynamic_run_query(self, query, query_result): safe_versionstamp > read_versionstamp): raise BadRequest(u'The specified transaction has expired') - fetch_data = self.index_manager.include_data(query) - rpc_limit, check_more_results = self.index_manager.rpc_limit(query) + fetch_data = self._index_manager.include_data(query) + rpc_limit, check_more_results = self._index_manager.rpc_limit(query) - iterator = yield self.index_manager.get_iterator( + iterator = yield self._index_manager.get_iterator( tr, query, read_versionstamp) for prop_name in query.property_name_list(): prop_name = decode_str(prop_name) @@ -366,8 +368,9 @@ def rollback_transaction(self, project_id, txid): @gen.coroutine def update_composite_index(self, project_id, index): project_id = decode_str(project_id) - yield self.index_manager.update_composite_index(project_id, index) + yield self._index_manager.update_composite_index(project_id, index) + @gen.coroutine def add_indexes(self, project_id, indexes): """ Adds composite index definitions to a project. @@ -376,10 +379,9 @@ def add_indexes(self, project_id, indexes): project_id: A string specifying a project ID. indexes: An iterable containing index definitions. """ - # This is a temporary workaround to get a ZooKeeper client. This method - # will not use ZooKeeper in the future. - zk_client = self.index_manager.composite_index_manager._zk_client - merge_indexes(zk_client, project_id, indexes) + tr = self._db.create_transaction() + yield self._index_manager.merge(tr, project_id, indexes) + yield self._tornado_fdb.commit(tr) @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): @@ -409,7 +411,7 @@ def _upsert(self, tr, entity, old_entry_future=None): new_version = next_entity_version(old_entry.version) yield self._data_manager.put( tr, entity.key(), new_version, entity.Encode()) - yield self.index_manager.put_entries(tr, old_entry, entity) + yield self._index_manager.put_entries(tr, old_entry, entity) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) @@ -429,7 +431,7 @@ def _delete(self, tr, key, old_entry_future=None): new_version = next_entity_version(old_entry.version) yield self._data_manager.put(tr, key, new_version, b'') - yield self.index_manager.put_entries(tr, old_entry, new_entity=None) + yield self._index_manager.put_entries(tr, old_entry, new_entity=None) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 010c46e14d..7e535771a5 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -359,11 +359,15 @@ class IndexManager(object): _MAX_RESULTS = 300 def __init__(self, db, tornado_fdb, data_manager, directory_cache): - self.composite_index_manager = None self._db = db self._tornado_fdb = tornado_fdb self._data_manager = data_manager self._directory_cache = directory_cache + self._composite_index_manager = CompositeIndexManager( + self._db, self._tornado_fdb, self._data_manager, self._directory_cache) + + def start(self): + self._composite_index_manager.start() @gen.coroutine def put_entries(self, tr, old_version_entry, new_entity): @@ -517,6 +521,15 @@ def get_iterator(self, tr, query, read_versionstamp=None): raise gen.Return(iterator) + @gen.coroutine + def merge(self, tr, project_id, new_indexes): + yield self._composite_index_manager.merge(tr, project_id, new_indexes) + + @gen.coroutine + def update_composite_index(self, project_id, index): + yield self._composite_index_manager.update_composite_index( + project_id, index) + @gen.coroutine def _get_index_keys(self, tr, entity, commit_versionstamp=None): project_id = decode_str(entity.key().app()) @@ -600,12 +613,10 @@ def _get_perfect_index(self, tr, query): @gen.coroutine def _get_indexes(self, tr, project_id, namespace, kind): - try: - project_index_manager = self.composite_index_manager.projects[project_id] - except KeyError: - raise BadRequest(u'project_id: {} not found'.format(project_id)) + project_indexes = yield self._composite_index_manager.get_definitions( + tr, project_id) - relevant_indexes = [index for index in project_index_manager.indexes + relevant_indexes = [index for index in project_indexes if index.kind == kind] fdb_indexes = [] for index in relevant_indexes: @@ -657,11 +668,14 @@ def _single_prop_index(self, tr, project_id, namespace, kind, prop_name): def _composite_index(self, tr, project_id, index_id, namespace): path = CompositeIndex.directory_path(project_id, index_id, namespace) directory = yield self._directory_cache.get(tr, path) - kind, ancestor, order_info = self._index_details(project_id, index_id) + kind, ancestor, order_info = yield self._index_details( + tr, project_id, index_id) raise gen.Return(CompositeIndex(directory, kind, ancestor, order_info)) - def _index_details(self, project_id, index_id): - project_indexes = self.composite_index_manager.projects[project_id].indexes + @gen.coroutine + def _index_details(self, tr, project_id, index_id): + project_indexes = yield self._composite_index_manager.get_definitions( + tr, project_id) index_def = next((ds_index for ds_index in project_indexes if ds_index.id == index_id), None) if index_def is None: @@ -670,80 +684,4 @@ def _index_details(self, project_id, index_id): order_info = tuple( (decode_str(prop.name), prop.to_pb().direction()) for prop in index_def.properties) - return index_def.kind, index_def.ancestor, order_info - - @gen.coroutine - def _indexes_for_kind(self, tr, project_id, kind): - section_path = KindIndex.section_path(project_id) - section_dir = yield self._directory_cache.get(tr, section_path) - # TODO: This can be made async. - indexes = [] - namespaces = section_dir.list(tr) - for namespace in namespaces: - ns_dir = section_dir.open(tr, (namespace,)) - try: - kind_dir = ns_dir.open(tr, (kind,)) - except ValueError: - continue - - indexes.append(KindIndex(kind_dir)) - - raise gen.Return(indexes) - - @gen.coroutine - def update_composite_index(self, project_id, index_pb, cursor=(None, None)): - start_ns, start_key = cursor - project_id = decode_str(project_id) - kind = decode_str(index_pb.definition().entity_type()) - tr = self._db.create_transaction() - deadline = monotonic.monotonic() + MAX_FDB_TX_DURATION - 1 - kind_indexes = yield self._indexes_for_kind(tr, project_id, kind) - for kind_index in kind_indexes: - if start_ns is not None and kind_index.namespace < start_ns: - continue - - composite_path = CompositeIndex.directory_path( - project_id, index_pb.id(), kind_index.namespace) - composite_dir = yield self._directory_cache.get(tr, composite_path) - order_info = tuple( - (decode_str(prop.name()), prop.direction()) - for prop in index_pb.definition().property_list()) - composite_index = CompositeIndex( - composite_dir, kind, index_pb.definition().ancestor(), order_info) - - logger.info(u'Backfilling {}'.format(composite_index)) - remaining_range = kind_index.directory.range() - if start_key is not None: - remaining_range = slice( - fdb.KeySelector.first_greater_than(start_key), remaining_range.stop) - start_key = None - - result_iterator = ResultIterator(tr, self._tornado_fdb, remaining_range) - while True: - results, more_results = yield result_iterator.next_page() - index_entries = [kind_index.decode(result) for result in results] - version_entries = yield [self._data_manager.get_entry(tr, entry) - for entry in index_entries] - for index_entry, version_entry in zip(index_entries, version_entries): - new_keys = composite_index.encode_keys( - version_entry.decoded.property_list(), version_entry.path, - version_entry.commit_versionstamp) - for new_key in new_keys: - tr[new_key] = index_entry.deleted_versionstamp or b'' - - if not more_results: - logger.info(u'Finished backfilling {}'.format(composite_index)) - break - - if monotonic.monotonic() > deadline: - try: - yield self._tornado_fdb.commit(tr) - cursor = (kind_index.namespace, results[-1].key) - except fdb.FDBError as fdb_error: - logger.warning(u'Error while updating index: {}'.format(fdb_error)) - tr.on_error(fdb_error).wait() - - yield self.update_composite_index(project_id, index_pb, cursor) - return - - yield self._tornado_fdb.commit(tr) + raise gen.Return((index_def.kind, index_def.ancestor, order_info)) diff --git a/AppDB/appscale/datastore/fdb/utils.py b/AppDB/appscale/datastore/fdb/utils.py index dbd0dcc2d8..dc63511023 100644 --- a/AppDB/appscale/datastore/fdb/utils.py +++ b/AppDB/appscale/datastore/fdb/utils.py @@ -151,6 +151,14 @@ def get_read_version(self, tr): get_future.on_ready(callback) return tornado_future + def watch(self, tr, key): + tornado_future = TornadoFuture() + callback = lambda fdb_future: self._handle_fdb_result( + fdb_future, tornado_future) + watch_future = tr.watch(key) + watch_future.on_ready(callback) + return tornado_future + def _handle_fdb_result(self, fdb_future, tornado_future): try: result = fdb_future.wait() diff --git a/AppDB/appscale/datastore/scripts/datastore.py b/AppDB/appscale/datastore/scripts/datastore.py index 36e33f9a77..abca872ade 100644 --- a/AppDB/appscale/datastore/scripts/datastore.py +++ b/AppDB/appscale/datastore/scripts/datastore.py @@ -127,6 +127,7 @@ def post(self): class AddIndexesHandler(tornado.web.RequestHandler): + @gen.coroutine def post(self): """ At this time, there does not seem to be a public API method for creating @@ -140,7 +141,7 @@ def post(self): project_id = self.get_argument('project') indexes = [DatastoreIndex.from_dict(project_id, index) for index in json.loads(self.request.body)] - datastore_access.add_indexes(project_id, indexes) + yield datastore_access.add_indexes(project_id, indexes) class MainHandler(tornado.web.RequestHandler): @@ -959,12 +960,10 @@ def main(): zk_state_listener(zk_client.state) zk_client.ChildrenWatch(DATASTORE_SERVERS_NODE, update_servers_watch) - index_manager = IndexManager(zk_client, datastore_access, - perform_admin=True) if args.type == 'cassandra': + index_manager = IndexManager(zk_client, datastore_access, + perform_admin=True) datastore_access.index_manager = index_manager - else: - datastore_access.index_manager.composite_index_manager = index_manager server = tornado.httpserver.HTTPServer(pb_application) server.listen(args.port) diff --git a/common/appscale/common/datastore_index.py b/common/appscale/common/datastore_index.py index d5ca80c695..da0ef7636e 100644 --- a/common/appscale/common/datastore_index.py +++ b/common/appscale/common/datastore_index.py @@ -72,6 +72,9 @@ def __init__(self, name, direction): self.name = name self.direction = direction + def __repr__(self): + return u'IndexProperty({!r}, {!r})'.format(self.name, self.direction) + @property def encoded_def(self): """ Returns a string representation of the index property. """ @@ -138,7 +141,8 @@ class DatastoreIndex(object): # Separates fields of an encoded index. ENCODING_DELIMITER = '|' - def __init__(self, project_id, kind, ancestor, properties): + def __init__(self, project_id, kind, ancestor, properties, ready=False, + id_=None): """ Creates a new DatastoreIndex object. Args: @@ -147,17 +151,21 @@ def __init__(self, project_id, kind, ancestor, properties): ancestor: A boolean indicating whether or not the index is for satisfying ancestor queries. properties: A list of IndexProperty objects. + ready: A boolean indicating whether or not the index is ready to be + queried. + id_: An integer specifying the index ID. """ self.project_id = project_id self.kind = kind self.ancestor = ancestor self.properties = properties + self.ready = ready + self.id = id_ - # When creating a new index, assume that it's not ready to be queried yet. - self.ready = False - - # The index ID is assigned by UpdateIndexes. - self.id = None + def __repr__(self): + return u'DatastoreIndex({!r}, {!r}, {!r}, {!r}, {!r}, {!r})'.format( + self.project_id, self.kind, self.ancestor, self.properties, self.ready, + self.id) @property def encoded_def(self): @@ -237,6 +245,9 @@ def from_pb(cls, index_pb): for prop in index_pb.definition().property_list()] datastore_index = cls(project_id, kind, ancestor, properties) datastore_index.id = index_pb.id() + if index_pb.state() == entity_pb.CompositeIndex.READ_WRITE: + datastore_index.ready = True + return datastore_index def to_dict(self): From 4c3f0c178c5390d71c8d1da52666330634805d3a Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 27 Sep 2019 15:34:05 -0700 Subject: [PATCH 159/221] Only query indexes that have been fully populated --- AppDB/appscale/datastore/fdb/indexes.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 7e535771a5..a08797b4d0 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -605,7 +605,11 @@ def _get_perfect_index(self, tr, query): tr, project_id, namespace, decode_str(query.kind()), prop_name) raise gen.Return(single_prop_index) - index_pb = FindIndexToUse(query, self._get_indexes_pb(project_id)) + queryable = [ + index.to_pb() for index in ( + yield self._composite_index_manager.get_definitions(tr, project_id)) + if index.ready] + index_pb = FindIndexToUse(query, queryable) if index_pb is not None: composite_index = yield self._composite_index( tr, project_id, index_pb.id(), namespace) @@ -632,19 +636,6 @@ def _get_indexes(self, tr, project_id, namespace, kind): raise gen.Return(fdb_indexes) - def _get_indexes_pb(self, project_id): - try: - project_index_manager = self.composite_index_manager.projects[project_id] - except KeyError: - raise BadRequest(u'project_id: {} not found'.format(project_id)) - - try: - indexes = project_index_manager.indexes_pb - except IndexInaccessible: - raise InternalError(u'ZooKeeper is not accessible') - - return indexes - @gen.coroutine def _kindless_index(self, tr, project_id, namespace): path = KindlessIndex.directory_path(project_id, namespace) From c5aae23f23d26a41a55158d63a7e02df16c5b4a9 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Mon, 30 Sep 2019 15:29:54 +0300 Subject: [PATCH 160/221] Prepare postgres as root --- AppTaskQueue/test/suites/run-e2e-tests.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/AppTaskQueue/test/suites/run-e2e-tests.sh b/AppTaskQueue/test/suites/run-e2e-tests.sh index 8f1e2cc9a7..a2167d4b8a 100755 --- a/AppTaskQueue/test/suites/run-e2e-tests.sh +++ b/AppTaskQueue/test/suites/run-e2e-tests.sh @@ -131,7 +131,7 @@ scp -o StrictHostKeyChecking=no \ "${USER}@${VM_ADDR}:/tmp/common" # Save DSN string and projects config to variables -PG_DSN="dbname=appscale-test-project user=appscale password=appscale-pwd host=${VM_PRIVATE_IP}" +PG_DSN="dbname=pullqueue-db user=appscale password=appscale-pwd host=${VM_PRIVATE_IP}" TEST_PROJECT='test-project' log "" @@ -141,10 +141,10 @@ log "==========================================================================" ssh -o StrictHostKeyChecking=no -i ${KEY_LOCATION} ${USER}@${VM_ADDR} << COMMANDS set -e echo "=== Preparing Postgres server ===" -sudo /tmp/prepare-postgres.sh --host "${VM_PRIVATE_IP}" \ - --dbname "appscale-test-project" \ - --username "appscale" \ - --password "appscale-pwd" +sudo su -l root -c '/tmp/prepare-postgres.sh --host "${VM_PRIVATE_IP}" \ + --dbname "pullqueue-db" \ + --username "appscale" \ + --password "appscale-pwd"' echo "=== Starting Zookeeper server ===" sudo /tmp/prepare-zookeeper.sh From 7a98f7454da9e452d7cddc44e1304a502472b240 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 30 Sep 2019 11:24:54 -0700 Subject: [PATCH 161/221] Check if a kind has at least one entity This resolves an issue with metadata queries where a recently-deleted kind can be listed. --- AppDB/appscale/datastore/fdb/indexes.py | 34 ++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 8c5b585e85..c92e993549 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -336,8 +336,9 @@ def _usable(self, entry): class KindIterator(object): - def __init__(self, tr, project_dir, namespace): + def __init__(self, tr, tornado_fdb, project_dir, namespace): self._tr = tr + self._tornado_fdb = tornado_fdb self._project_dir = project_dir self._namespace = namespace self._done = False @@ -351,13 +352,39 @@ def next_page(self): ns_dir = self._project_dir.open( self._tr, (KindIndex.DIR_NAME, self._namespace)) kinds = ns_dir.list(self._tr) + populated_kinds = [ + kind for kind, populated in zip( + kinds, (yield [self._populated(ns_dir, kind) for kind in kinds])) + if populated] + results = [IndexEntry(self._project_dir.get_path()[-1], self._namespace, (u'__kind__', kind), None, None) - for kind in kinds] + for kind in populated_kinds] self._done = True raise gen.Return((results, False)) + @gen.coroutine + def _populated(self, ns_dir, kind): + """ Checks if at least one entity exists for a given kind. """ + kind_dir = ns_dir.open(self._tr, (kind,)) + kind_index = KindIndex(kind_dir) + # TODO: Check if the presence of stat entities should mark a kind as being + # populated. + index_slice = kind_index.get_slice(()) + # This query is reversed to increase the likelihood of getting an relevant + # (not marked for GC) entry. + iterator = IndexIterator(self._tr, self._tornado_fdb, kind_index, + index_slice, fetch_limit=1, reverse=True, + snapshot=True) + while True: + results, more_results = yield iterator.next_page() + if results: + raise gen.Return(True) + + if not more_results: + raise gen.Return(False) + class MergeJoinIterator(object): """ @@ -1115,7 +1142,8 @@ def get_iterator(self, tr, query, read_versionstamp=None): if query.has_kind() and query.kind() == u'__kind__': project_dir = yield self._directory_cache.get(tr, (project_id,)) - raise gen.Return(KindIterator(tr, project_dir, namespace)) + raise gen.Return(KindIterator(tr, self._tornado_fdb, project_dir, + namespace)) index = yield self._get_perfect_index(tr, query) reverse = get_scan_direction(query, index) == Query_Order.DESCENDING From e32f2225159fa84b4c4f7b73176032c285e2cb93 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 1 Oct 2019 17:39:38 +0300 Subject: [PATCH 162/221] Improve reliability of communication with Postgres - Set keepalives params and timeouts when establishing connection with Postgres. - Use zookeeper lock and lease to prevent concurrent initialization of Postgres tables and cleanup job. --- .../taskqueue/pg_connection_wrapper.py | 9 +- AppTaskQueue/appscale/taskqueue/queue.py | 208 +++++++++--------- .../appscale/taskqueue/queue_manager.py | 147 ++++++++++--- 3 files changed, 229 insertions(+), 135 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py index aa23b1a844..43540c59b9 100644 --- a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py +++ b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py @@ -38,7 +38,14 @@ def get_connection(self): """ if not self._connection or self._connection.closed: logger.info('Establishing new connection to Postgres server') - self._connection = psycopg2.connect(dsn=self._dsn) + self._connection = psycopg2.connect( + dsn=self._dsn, + connect_timeout=60, + options='-c statement_timeout=10000', + keepalives_idle=60, + keepalives_interval=15, + keepalives_count=4 + ) return self._connection def close(self): diff --git a/AppTaskQueue/appscale/taskqueue/queue.py b/AppTaskQueue/appscale/taskqueue/queue.py index 1caa4d45a9..1046b3f6d1 100644 --- a/AppTaskQueue/appscale/taskqueue/queue.py +++ b/AppTaskQueue/appscale/taskqueue/queue.py @@ -203,6 +203,11 @@ def is_connection_error(err): return isinstance(err, psycopg2.InterfaceError) +retry_pg_connection = retrying.retry( + retrying_timeout=10, retry_on_exception=is_connection_error +) + + class PostgresPullQueue(Queue): """ Before using Postgres implementation, make sure that @@ -210,10 +215,6 @@ class PostgresPullQueue(Queue): /etc/postgresql/9.5/main/pg_hba.conf """ - retry_pg_connection = retrying.retry( - retrying_timeout=10, retry_on_exception=is_connection_error - ) - TTL_INTERVAL_AFTER_DELETED = '7 days' # The maximum number of tasks that can be leased at a time. @@ -228,7 +229,7 @@ class PostgresPullQueue(Queue): # The number of seconds to keep the index cache. MAX_CACHE_DURATION = 30 - def __init__(self, queue_info, app): + def __init__(self, queue_info, app, queue_id): """ Create a PostgresPullQueue object. Args: @@ -236,104 +237,21 @@ def __init__(self, queue_info, app): app: A string containing the application ID. """ super(PostgresPullQueue, self).__init__(queue_info, app) - self.connection_key = self.app - self.ensure_project_schema_created() - self.queue_id = self.ensure_queue_registered() - self.ensure_tasks_table_created() - - # When multiple TQ servers are notified by ZK about new queue - # they sometimes get IntegrityError despite 'IF NOT EXISTS' - @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) - def ensure_project_schema_created(self): - pg_connection = pg_wrapper.get_connection() - with pg_connection: - with pg_connection.cursor() as pg_cursor: - logger.info('Ensuring "{schema_name}" schema is created' - .format(schema_name=self.schema_name)) - pg_cursor.execute( - 'CREATE SCHEMA IF NOT EXISTS "{schema_name}";' - .format(schema_name=self.schema_name) - ) - - # When multiple TQ servers are notified by ZK about new queue - # they sometimes get IntegrityError despite 'IF NOT EXISTS' - @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) - def ensure_queue_registered(self): - pg_connection = pg_wrapper.get_connection() - with pg_connection: - with pg_connection.cursor() as pg_cursor: - logger.info('Ensuring "{}" table is created' - .format(self.queues_table_name)) - pg_cursor.execute( - 'CREATE TABLE IF NOT EXISTS "{queues_table}" (' - ' id SERIAL,' - ' queue_name varchar(100) NOT NULL UNIQUE' - ');' - .format(queues_table=self.queues_table_name) - ) - pg_cursor.execute( - 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' - .format(queues_table=self.queues_table_name), - vars={'queue_name': self.name} - ) - row = pg_cursor.fetchone() - if row: - return row[0] - - logger.info('Registering queue "{}" in "{}" table' - .format(self.name, self.queues_table_name)) - pg_cursor.execute( - 'INSERT INTO "{queues_table}" (queue_name) ' - 'VALUES (%(queue_name)s) ON CONFLICT DO NOTHING;' - 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' - .format(queues_table=self.queues_table_name), - vars={'queue_name': self.name} - ) - row = pg_cursor.fetchone() - logger.info('Queue "{}" was registered with ID "{}"' - .format(self.name, row[0])) - return row[0] - - # When multiple TQ servers are notified by ZK about new queue - # they sometimes get IntegrityError despite 'IF NOT EXISTS' - @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) - def ensure_tasks_table_created(self): - pg_connection = pg_wrapper.get_connection() - with pg_connection: - with pg_connection.cursor() as pg_cursor: - logger.info('Ensuring "{}" table is created' - .format(self.tasks_table_name)) - pg_cursor.execute( - 'CREATE TABLE IF NOT EXISTS "{table_name}" (' - ' task_name varchar(500) NOT NULL,' - ' time_deleted timestamp DEFAULT NULL,' - ' time_enqueued timestamp NOT NULL,' - ' lease_count integer NOT NULL,' - ' lease_expires timestamp NOT NULL,' - ' payload bytea,' - ' tag varchar(500),' - ' PRIMARY KEY (task_name)' - ');' - 'CREATE INDEX IF NOT EXISTS "{table_name}_eta_retry_tag_index" ' - ' ON "{table_name}" USING BTREE (lease_expires, lease_count, tag) ' - ' WHERE time_deleted IS NULL;' - 'CREATE INDEX IF NOT EXISTS "{table_name}_retry_eta_tag_index" ' - ' ON "{table_name}" (lease_count, lease_expires, tag) ' - ' WHERE time_deleted IS NULL;' - .format(table_name=self.tasks_table_name) - ) + self.queue_id = queue_id + self.schema_name = self.get_schema_name(app) + self.tasks_table_name = self.get_tasks_table_name(app, queue_id) - @property - def schema_name(self): - return 'appscale_{}'.format(self.app) + @staticmethod + def get_schema_name(project_id): + return 'appscale_{}'.format(project_id) - @property - def queues_table_name(self): - return '{}.queues'.format(self.schema_name) + @classmethod + def get_queues_table_name(cls, project_id): + return '{}.queues'.format(cls.get_schema_name(project_id)) - @property - def tasks_table_name(self): - return '{}.tasks_{}'.format(self.schema_name, self.queue_id) + @classmethod + def get_tasks_table_name(cls, project_id, queue_id): + return '{}.tasks_{}'.format(cls.get_schema_name(project_id), queue_id) @retry_pg_connection def add_task(self, task): @@ -839,3 +757,93 @@ def __repr__(self): """ return ''.format( self.name, self.app, self.task_retry_limit) + + +@retry_pg_connection +def ensure_project_schema_created(project_id): + pg_connection = pg_wrapper.get_connection() + schema_name = PostgresPullQueue.get_schema_name(project_id) + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{schema_name}" schema is created' + .format(schema_name=schema_name)) + pg_cursor.execute( + 'CREATE SCHEMA IF NOT EXISTS "{schema_name}";' + .format(schema_name=schema_name) + ) + + +@retry_pg_connection +def ensure_queues_table_created(project_id): + pg_connection = pg_wrapper.get_connection() + queues_table_name = PostgresPullQueue.get_queues_table_name(project_id) + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{}" table is created'.format(queues_table_name)) + pg_cursor.execute( + 'CREATE TABLE IF NOT EXISTS "{queues_table}" (' + ' id SERIAL,' + ' queue_name varchar(100) NOT NULL UNIQUE' + ');' + .format(queues_table=queues_table_name) + ) + + +@retry_pg_connection +def ensure_queue_registered(project_id, queue_name): + pg_connection = pg_wrapper.get_connection() + queues_table_name = PostgresPullQueue.get_queues_table_name(project_id) + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' + .format(queues_table=queues_table_name), + vars={'queue_name': queue_name} + ) + row = pg_cursor.fetchone() + if row: + return row[0] + + logger.info('Registering queue "{}" in "{}" table' + .format(queue_name, queues_table_name)) + pg_cursor.execute( + 'INSERT INTO "{queues_table}" (queue_name) ' + 'VALUES (%(queue_name)s) ON CONFLICT DO NOTHING;' + 'SELECT id FROM "{queues_table}" WHERE queue_name = %(queue_name)s;' + .format(queues_table=queues_table_name), + vars={'queue_name': queue_name} + ) + row = pg_cursor.fetchone() + logger.info('Queue "{}" was registered with ID "{}"' + .format(queue_name, row[0])) + return row[0] + + +@retry_pg_connection +def ensure_tasks_table_created(project_id, queue_id): + pg_connection = pg_wrapper.get_connection() + tasks_table_name = PostgresPullQueue.get_tasks_table_name( + project_id, queue_id + ) + with pg_connection: + with pg_connection.cursor() as pg_cursor: + logger.info('Ensuring "{}" table is created'.format(tasks_table_name)) + pg_cursor.execute( + 'CREATE TABLE IF NOT EXISTS "{table_name}" (' + ' task_name varchar(500) NOT NULL,' + ' time_deleted timestamp DEFAULT NULL,' + ' time_enqueued timestamp NOT NULL,' + ' lease_count integer NOT NULL,' + ' lease_expires timestamp NOT NULL,' + ' payload bytea,' + ' tag varchar(500),' + ' PRIMARY KEY (task_name)' + ');' + 'CREATE INDEX IF NOT EXISTS "{table_name}_eta_retry_tag_index" ' + ' ON "{table_name}" USING BTREE (lease_expires, lease_count, tag) ' + ' WHERE time_deleted IS NULL;' + 'CREATE INDEX IF NOT EXISTS "{table_name}_retry_eta_tag_index" ' + ' ON "{table_name}" (lease_count, lease_expires, tag) ' + ' WHERE time_deleted IS NULL;' + .format(table_name=tasks_table_name) + ) diff --git a/AppTaskQueue/appscale/taskqueue/queue_manager.py b/AppTaskQueue/appscale/taskqueue/queue_manager.py index acf56aaa8a..17a1ed0d57 100644 --- a/AppTaskQueue/appscale/taskqueue/queue_manager.py +++ b/AppTaskQueue/appscale/taskqueue/queue_manager.py @@ -1,20 +1,22 @@ """ Keeps track of queue configuration details for producer connections. """ import json -import random from kazoo.exceptions import ZookeeperError -from tornado import gen from tornado.ioloop import IOLoop, PeriodicCallback -from .queue import PushQueue, PostgresPullQueue +from .queue import ( + PushQueue, PostgresPullQueue, ensure_queue_registered, + ensure_queues_table_created, ensure_project_schema_created, + ensure_tasks_table_created +) from .utils import logger, create_celery_for_app class ProjectQueueManager(dict): """ Keeps track of queue configuration details for a single project. """ - FLUSH_DELETED_INTERVAL = 3 * 60 * 60 # 3h + FLUSH_DELETED_INTERVAL = 1 * 60 * 60 # 1h def __init__(self, zk_client, project_id): """ Creates a new ProjectQueueManager. @@ -28,18 +30,29 @@ def __init__(self, zk_client, project_id): self.project_id = project_id self._configure_periodical_flush() - self.queues_node = '/appscale/projects/{}/queues'.format(project_id) + project_node = '/appscale/projects/{}/'.format(project_id) + self.queues_node = project_node + 'queues' + self.pullqueues_initialization_lock = zk_client.Lock( + project_node + 'pullqueues_initialization_lock' + ) + self.pullqueues_initialized_version_node = ( + project_node + 'pullqueues_initialized_version' + ) + self.pullqueues_cleanup_lease_node = ( + project_node + 'pullqueues_cleanup_lease' + ) self.watch = zk_client.DataWatch(self.queues_node, self._update_queues_watch) self.celery = None self.rates = None self._stopped = False - def update_queues(self, queue_config): + def update_queues(self, queue_config, znode_stats): """ Caches new configuration details and cleans up old state. Args: queue_config: A JSON string specifying queue configuration. + znode_stats: An instance of ZnodeStats. """ logger.info('Updating queues for {}'.format(self.project_id)) if not queue_config: @@ -52,20 +65,29 @@ def update_queues(self, queue_config): for queue_name in to_stop: del self[queue_name] - # Add new queues. - for queue_name in new_queue_config: - if queue_name in self: - continue + self._update_push_queues( + ((queue_name, queue) for queue_name, queue in new_queue_config.items() + if queue.get('mode', 'push') == 'push') + ) - queue_info = new_queue_config[queue_name] - queue_info['name'] = queue_name - if 'mode' not in queue_info or queue_info['mode'] == 'push': - self[queue_name] = PushQueue(queue_info, self.project_id) - else: - self[queue_name] = PostgresPullQueue(queue_info, self.project_id) + self._update_pull_queues( + ((queue_name, queue) for queue_name, queue in new_queue_config.items() + if queue.get('mode', 'push') != 'push'), + znode_stats + ) - # Establish a new Celery connection based on the new queues, and close the - # old one. + def _update_push_queues(self, new_push_queue_configs): + """ Caches new push queue configuration details. + + Args: + new_push_queue_configs: A sequence of (queue_name, queue_info) tuples. + """ + for queue_name, queue in new_push_queue_configs: + queue['name'] = queue_name + self[queue_name] = PushQueue(queue, self.project_id) + + # Establish a new Celery connection based on the new queues, + # and close the old one. push_queues = [queue for queue in self.values() if isinstance(queue, PushQueue)] old_rates = self.rates @@ -79,6 +101,64 @@ def update_queues(self, queue_config): for queue in push_queues: queue.celery = self.celery + def _update_pull_queues(self, new_pull_queue_configs, znode_stats): + """ Caches new pull queue configuration details. + + Args: + new_pull_queue_configs: A sequence of (queue_name, queue_info) tuples. + znode_stats: An instance of ZnodeStats. + """ + new_version = znode_stats.last_modified + if self._get_pullqueue_initialized_version() < new_version: + # Only one TaskQueue server proceeds with Postgres tables initialization. + with self.pullqueues_initialization_lock: + # Double check after acquiring lock. + if self._get_pullqueue_initialized_version() < new_version: + # Ensure project schema and queues registry table are created. + ensure_project_schema_created(self.project_id) + ensure_queues_table_created(self.project_id) + # Ensure all queues are registered and tasks tables are created. + for queue_name, queue in new_pull_queue_configs: + queue['name'] = queue_name + queue_id = ensure_queue_registered(self.project_id, queue_name) + ensure_tasks_table_created(self.project_id, queue_id) + # Instantiate PostgresPullQueue with registration queue ID. + self[queue_name] = PostgresPullQueue(queue, self.project_id, queue_id) + + # Report new initialized version of Postgres tables. + self._set_pullqueue_initialized_version(new_version) + return + + # Postgres tables are already created, just instantiate PostgresPullQueue. + for queue_name, queue in new_pull_queue_configs: + queue['name'] = queue_name + queue_id = ensure_queue_registered(self.project_id, queue_name) + self[queue_name] = PostgresPullQueue(queue, self.project_id, queue_id) + + def _get_pullqueue_initialized_version(self): + """ Retrieves zookeeper node holding version of PullQueues configs + which is currently provisioned in Postgres. + """ + initialized_version = -1 + version_node = self.pullqueues_initialized_version_node + if self.zk_client.exists(version_node): + initialized_version = self.zk_client.get(version_node) + return initialized_version + + def _set_pullqueue_initialized_version(self, version): + """ Sets zookeeper node holding version of PullQueues configs + which is currently provisioned in Postgres. + + Args: + version: A number representing last modification time + of queue configs node in zookeeper. + """ + version_node = self.pullqueues_initialized_version_node + if self.zk_client.exists(version_node): + self.zk_client.set(version_node, version) + else: + self.zk_client.create(version_node, version) + def ensure_watch(self): """ Restart the watch if it has been cancelled. """ if self._stopped: @@ -91,7 +171,7 @@ def stop(self): if self.celery is not None: self.celery.close() - def _update_queues_watch(self, queue_config, _): + def _update_queues_watch(self, queue_config, znode_stats): """ Handles updates to a queue configuration node. Since this runs in a separate thread, it doesn't change any state directly. @@ -99,6 +179,7 @@ def _update_queues_watch(self, queue_config, _): Args: queue_config: A JSON string specifying queue configuration. + znode_stats: An instance of ZnodeStats. """ main_io_loop = IOLoop.instance() @@ -115,27 +196,25 @@ def _update_queues_watch(self, queue_config, _): self._stopped = True return False - main_io_loop.add_callback(self.update_queues, queue_config) + main_io_loop.add_callback(self.update_queues, queue_config, znode_stats) def _configure_periodical_flush(self): """ Creates and starts periodical callback to clear old deleted tasks. """ - @gen.coroutine def flush_deleted(): - """ Calls flush_deleted method for all PostgresPullQueues - with asynchronous delay to avoid concentration of flush queries - to SQL server during short period of time. + """ Attempts to lease a right to cleanup old deleted tasks. + If it could lease the right it removes old deleted tasks for project + pull queues. """ - yield gen.sleep(random.random() * self.FLUSH_DELETED_INTERVAL / 2) - postgres_pull_queues = (q for q in self.values() - if isinstance(q, PostgresPullQueue)) - for q in postgres_pull_queues: - yield gen.sleep(3) - q.flush_deleted() - - # def schedule_flush_deleted(): - # main_io_loop = IOLoop.instance() - # main_io_loop.add_callback(flush_deleted) + # Avoid too frequent cleanup by using zookeeper lease recipe. + lease = self.zk_client.NonBlockingLease( + self.pullqueues_cleanup_lease_node, self.FLUSH_DELETED_INTERVAL * 0.8 + ) + if lease: + postgres_pull_queues = (q for q in self.values() + if isinstance(q, PostgresPullQueue)) + for queue in postgres_pull_queues: + queue.flush_deleted() PeriodicCallback(flush_deleted, self.FLUSH_DELETED_INTERVAL * 1000).start() From 60b42fc814e8a4089006460383fda3b7731fbf64 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Tue, 1 Oct 2019 18:23:12 +0300 Subject: [PATCH 163/221] Add postgres option to UA Server --- AppDB/appscale/datastore/scripts/ua_server.py | 472 +++++++++++++++++- .../datastore/scripts/ua_server_backup.py | 134 ++++- .../datastore/scripts/ua_server_restore.py | 146 +++++- AppDB/setup.py | 1 + 4 files changed, 739 insertions(+), 14 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index df23d0e0b6..f750c48200 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -15,9 +15,10 @@ import time from kazoo.client import KazooClient, KazooState, NodeExistsError +import psycopg2 from tornado import gen -from appscale.common import appscale_info +from appscale.common import appscale_info, retrying from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS from appscale.datastore import appscale_datastore from appscale.datastore.dbconstants import ( @@ -66,6 +67,115 @@ logger = logging.getLogger(__name__) +zk_client = None + +table_name = "ua_server" + + +def is_connection_error(err): + """ This function is used as retry criteria. + It also makes possible lazy load of psycopg2 package. + + Args: + err: an instance of Exception. + Returns: + True if error is related to connection, False otherwise. + """ + return isinstance(err, psycopg2.InterfaceError) + + +retry_pg_connection = retrying.retry( + retrying_timeout=10, retry_on_exception=is_connection_error +) + + +class PostgresConnectionWrapper(object): + """ Implements automatic reconnection to Postgresql server. """ + + def __init__(self, dsn=None): + self._dsn = dsn + self._connection = None + + def set_dsn(self, dsn): + """ Resets PostgresConnectionWrapper to use new DSN string. + Args: + dsn: a str representing Postgres DSN string. + """ + if self._connection and not self._connection.closed: + self.close() + self._connection = None + self._dsn = dsn + + def get_connection(self): + """ Provides postgres connection. It can either return existing + working connection or establish new one. + Returns: + An instance of psycopg2 connection. + """ + if not self._connection or self._connection.closed: + logger.info('Establishing new connection to Postgres server') + self._connection = psycopg2.connect(dsn=self._dsn) + return self._connection + + def close(self): + """ Closes psycopg2 connection. + """ + return self._connection.close() + + +def init_table(pg_connection_wrapper): + from psycopg2 import IntegrityError # Import psycopg2 lazily + # When multiple TQ servers are notified by ZK about new queue + # they sometimes get IntegrityError despite 'IF NOT EXISTS' + @retrying.retry(max_retries=5, retry_on_exception=IntegrityError) + def ensure_tables_created(): + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'CREATE TABLE IF NOT EXISTS "{table}" (' + ' email varchar(500) NOT NULL,' + ' password varchar(500) NOT NULL,' + ' date_creation timestamp NOT NULL,' + ' date_change timestamp NOT NULL,' + ' date_last_login timestamp NOT NULL,' + ' applications varchar(30)[],' + ' appdrop_rem_token varchar(500),' + ' appdrop_rem_token_exp bigint,' + ' visit_cnt integer,' + ' cookie varchar(4096),' + ' cookie_ip varchar(15),' + ' cookie_exp integer,' + ' cksum integer,' + ' enabled boolean,' + ' type varchar(9),' + ' is_cloud_admin boolean,' + ' capabilities varchar(255),' + ' PRIMARY KEY (email)' + ');' + .format(table=table_name) + ) + + ensure_tables_created() + + +pg_connection_wrapper = None + + +def connect_to_postgres(zk_client): + global pg_connection_wrapper + global_dsn_node = '/appscale/ua_server/postgres_dsn' + if zk_client.exists(global_dsn_node): + pg_dsn = zk_client.get(global_dsn_node) + logger.info('Using PostgreSQL as a backend for UA Server') + else: + pg_dsn = None + logger.info('Using Cassandra as a backend for UA Server') + if pg_dsn: + pg_connection_wrapper = ( + PostgresConnectionWrapper(dsn=pg_dsn[0]) + ) + class Users: attributes_ = USERS_SCHEMA @@ -126,19 +236,39 @@ def arrayit(self): return array + def paramit(self): + params = {} + + for ii in Users.attributes_: + if ii in ["date_creation", "date_change", "date_last_login"]: + t = float(getattr(self, ii + "_")) + timestamp = datetime.datetime.fromtimestamp(t) + params[ii] = timestamp + elif ii in ["appdrop_rem_token_exp", "visit_cnt", "cookie_exp", "cksum"]: + params[ii] = int(getattr(self, ii + "_")) + elif ii in ["enabled", "is_cloud_admin"]: + value = True if getattr(self, ii + "_") == "true" else False + params[ii] = value + else: + params[ii] = getattr(self, ii + "_") + + return params + def unpackit(self, array): for ii in range(0,len(array)): setattr(self, Users.attributes_[ii] + "_", array[ii]) # convert from string to list - if self.applications_: + # TODO: delete it after deleting of Cassandra + if self.applications_ and isinstance(self.applications_, str): self.applications_ = self.applications_.split(':') - else: + elif not self.applications_: self.applications_ = [] return "true" +@retry_pg_connection @tornado_synchronous @gen.coroutine def does_user_exist(username, secret): @@ -146,6 +276,25 @@ def does_user_exist(username, secret): global super_secret if secret != super_secret: raise gen.Return("Error: bad secret") + + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT 1 FROM "{table}" ' + 'WHERE email = %(username)s' + .format(table=table_name), + vars={ + 'username': username, + } + ) + row = pg_cursor.fetchone() + + if not row: + raise gen.Return("false") + raise gen.Return("true") + try: result = yield db.get_entity(USER_TABLE, username, ["email"]) except AppScaleDBConnectionError as db_error: @@ -156,6 +305,7 @@ def does_user_exist(username, secret): raise gen.Return("false") +@retry_pg_connection @tornado_synchronous @gen.coroutine def get_user_data(username, secret): @@ -164,6 +314,40 @@ def get_user_data(username, secret): global user_schema if secret != super_secret: raise gen.Return("Error: bad secret") + + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT * FROM "{table}" ' + 'WHERE email = %(username)s' + .format(table=table_name), + vars={ + 'username': username, + } + ) + result = pg_cursor.fetchone() + + # todo: delete it adter removal of Cassandra + result = list(result) + result[2] = time.mktime(result[2].timetuple()) + result[3] = time.mktime(result[3].timetuple()) + result[4] = time.mktime(result[4].timetuple()) + + if not result: + raise gen.Return('Error: User {} does not exist'.format(username)) + if len(user_schema) != len(result): + raise gen.Return( + "Error: Bad length of user schema vs user result " + "user schema: " + str(user_schema) + " result: " + str(result) + ) + + user = Users("a", "b", "c") + user.unpackit(result) + raise gen.Return(user.stringit()) + + try: result = yield db.get_entity(USER_TABLE, username, user_schema) except AppScaleDBConnectionError as db_error: @@ -184,6 +368,7 @@ def get_user_data(username, secret): raise gen.Return(user.stringit()) +@retry_pg_connection @tornado_synchronous @gen.coroutine def commit_new_user(user, passwd, utype, secret): @@ -204,6 +389,30 @@ def commit_new_user(user, passwd, utype, secret): if ret == "true": raise gen.Return(error) + if pg_connection_wrapper: + n_user = Users(user, passwd, utype) + params = n_user.paramit() + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'INSERT INTO "{table}"' + 'VALUES ( ' + ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' + ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' + ' %(appdrop_rem_token_exp)s, %(visit_cnt)s, %(cookie)s, ' + ' %(cookie_ip)s, %(cookie_exp)s, %(cksum)s, %(enabled)s, %(type)s, ' + ' %(is_cloud_admin)s, %(capabilities)s ' + ') ' + 'RETURNING date_last_login' + .format(table=table_name), + vars=params + ) + row = pg_cursor.fetchone() + if row: + raise gen.Return("true") + raise gen.Return("false") + n_user = Users(user, passwd, utype) array = n_user.arrayit() result = yield db.put_entity(USER_TABLE, user, user_schema, array) @@ -212,6 +421,7 @@ def commit_new_user(user, passwd, utype, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def add_admin_for_app(user, app, secret): @@ -230,6 +440,25 @@ def add_admin_for_app(user, app, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET applications = applications || %(app)s, ' + ' date_change = current_timestamp ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=table_name), + vars={'app': '{' + app + '}', 'user': user} + ) + user_result = pg_cursor.fetchone() + + if user_result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(user)) + try: user_result = yield db.get_entity(USER_TABLE, user, user_schema) except AppScaleDBConnectionError as db_error: @@ -253,6 +482,7 @@ def add_admin_for_app(user, app, secret): raise gen.Return("Error: Unable to update the user.") +@retry_pg_connection @tornado_synchronous @gen.coroutine def get_all_users(secret): @@ -263,6 +493,26 @@ def get_all_users(secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT email FROM "{table}" ' + .format(table=table_name) + ) + emails = pg_cursor.fetchall() + + if not emails: + raise gen.Return("Error: no users in database") + + # this is a placeholder, soap exception happens if returning empty string + userstring = "____" + for email in emails: + userstring += ":" + email[0] + raise gen.Return(userstring) + + result = yield db.get_table(USER_TABLE, user_schema) if result[0] not in ERROR_CODES: raise gen.Return("Error:" + result[0]) @@ -283,6 +533,7 @@ def get_all_users(secret): raise gen.Return(userstring) +@retry_pg_connection @tornado_synchronous @gen.coroutine def commit_new_token(user, token, token_exp, secret): @@ -293,6 +544,31 @@ def commit_new_token(user, token, token_exp, secret): raise gen.Return("Error: bad secret") columns = ['appdrop_rem_token', 'appdrop_rem_token_exp'] + if pg_connection_wrapper: + params = {'token': token, + 'token_exp': token_exp, + 'user': user} + + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET appdrop_rem_token = %(token)s, ' + ' appdrop_rem_token_exp = %(token_exp)s, ' + ' date_change = current_timestamp ' + 'WHERE email = %(user)s ' + 'RETURNING email' + .format(table=table_name), + vars=params + ) + + result = pg_cursor.fetchone() + + if result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(user)) + try: result = yield db.get_entity(USER_TABLE, user, columns) except AppScaleDBConnectionError as db_error: @@ -316,6 +592,7 @@ def commit_new_token(user, token, token_exp, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def change_password(user, password, secret): @@ -329,6 +606,29 @@ def change_password(user, password, secret): if not password: raise gen.Return("Error: Null password") + if pg_connection_wrapper: + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET password = %(password)s ' + 'WHERE email = %(user)s AND enabled = TRUE ' + 'RETURNING enabled' + .format(table=table_name), + vars={'password': password, 'user': user} + ) + row = pg_cursor.fetchone() + + if not row: + raise gen.Return("Error: User must be enabled to change password") + raise gen.Return("true") + try: result = yield db.get_entity(USER_TABLE, user, ['enabled']) except AppScaleDBConnectionError as db_error: @@ -346,6 +646,7 @@ def change_password(user, password, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def enable_user(user, secret): @@ -355,6 +656,28 @@ def enable_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET enabled = TRUE ' + 'WHERE email = %(user)s AND enabled = FALSE ' + 'RETURNING enabled' + .format(table=table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() + + if not row: + raise gen.Return("Error: Trying to enable a user twice") + raise gen.Return("true") + try: result = yield db.get_entity(USER_TABLE, user, ['enabled']) except AppScaleDBConnectionError as db_error: @@ -370,6 +693,7 @@ def enable_user(user, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def disable_user(user, secret): @@ -379,6 +703,28 @@ def disable_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET enabled = FALSE ' + 'WHERE email = %(user)s AND enabled = TRUE ' + 'RETURNING enabled' + .format(table=table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() + + if not row: + raise gen.Return("Error: Trying to disable a user twice") + raise gen.Return("true") + try: result = yield db.get_entity(USER_TABLE, user, ['enabled']) except AppScaleDBConnectionError as db_error: @@ -395,6 +741,7 @@ def disable_user(user, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def delete_user(user, secret): @@ -404,6 +751,27 @@ def delete_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'DELETE FROM "{table}" ' + 'WHERE email = %(user)s AND enabled = FALSE ' + 'RETURNING enabled' + .format(table=table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() + + if not row: + raise gen.Return("Error: unable to delete active user. Disable user first") + raise gen.Return("true") + try: result = yield db.get_entity(USER_TABLE, user, ['enabled']) except AppScaleDBConnectionError as db_error: @@ -421,6 +789,7 @@ def delete_user(user, secret): raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def is_user_enabled(user, secret): @@ -430,6 +799,22 @@ def is_user_enabled(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT enabled FROM "{table}" ' + 'WHERE email = %(user)s' + .format(table=table_name), + vars={'user': user} + ) + result = pg_cursor.fetchone() + + if not result: + raise gen.Return("false") + raise gen.Return(str(result[0]).lower()) + try: result = yield db.get_entity(USER_TABLE, user, ['enabled']) except AppScaleDBConnectionError as db_error: @@ -440,6 +825,7 @@ def is_user_enabled(user, secret): raise gen.Return(result[1]) +@retry_pg_connection @tornado_synchronous @gen.coroutine def is_user_cloud_admin(username, secret): @@ -448,6 +834,22 @@ def is_user_cloud_admin(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT is_cloud_admin FROM "{table}" ' + 'WHERE email = %(user)s ' + .format(table=table_name), + vars={'user': username} + ) + result = pg_cursor.fetchone() + + if not result: + raise gen.Return("false") + raise gen.Return(str(result[0]).lower()) + try: result = yield db.get_entity(USER_TABLE, username, ["is_cloud_admin"]) except AppScaleDBConnectionError as db_error: @@ -459,6 +861,7 @@ def is_user_cloud_admin(username, secret): raise gen.Return("false") +@retry_pg_connection @tornado_synchronous @gen.coroutine def set_cloud_admin_status(username, is_cloud_admin, secret): @@ -466,12 +869,32 @@ def set_cloud_admin_status(username, is_cloud_admin, secret): global super_secret if secret != super_secret: raise gen.Return("Error: bad secret") + + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET is_cloud_admin = %(is_cloud_admin)s ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=table_name), + vars={'is_cloud_admin': is_cloud_admin, 'user': username} + ) + user_result = pg_cursor.fetchone() + + if user_result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(username)) + result = yield db.put_entity(USER_TABLE, username, ['is_cloud_admin'], [is_cloud_admin]) if result[0] not in ERROR_CODES: raise gen.Return("false:" + result[0]) raise gen.Return("true") +@retry_pg_connection @tornado_synchronous @gen.coroutine def get_capabilities(username, secret): @@ -480,6 +903,23 @@ def get_capabilities(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT capabilities FROM "{table}" ' + 'WHERE email = %(user)s ' + .format(table=table_name), + vars={'user': username} + ) + user_result = pg_cursor.fetchone() + + if user_result: + raise gen.Return(user_result[0]) + raise gen.Return('Error: User {} does not exist'.format(username)) + + try: result = yield db.get_entity(USER_TABLE, username, ["capabilities"]) except AppScaleDBConnectionError as db_error: @@ -491,6 +931,7 @@ def get_capabilities(username, secret): raise gen.Return([result[0]]) +@retry_pg_connection @tornado_synchronous @gen.coroutine def set_capabilities(username, capabilities, secret): @@ -498,6 +939,25 @@ def set_capabilities(username, capabilities, secret): global super_secret if secret != super_secret: raise gen.Return("Error: bad secret") + + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE "{table}" ' + 'SET capabilities = %(capabilities)s ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=table_name), + vars={'capabilities': capabilities, 'user': username} + ) + user_result = pg_cursor.fetchone() + + if user_result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(username)) + result = yield db.put_entity(USER_TABLE, username, ['capabilities'], [capabilities]) if result[0] not in ERROR_CODES: raise gen.Return("false:" + result[0]) @@ -514,6 +974,7 @@ def usage(): def register_location(host, port): """ Register service location with ZooKeeper. """ + global zk_client zk_client = KazooClient(hosts=appscale_info.get_zk_locations_string(), connection_retry=ZK_PERSISTENT_RECONNECTS) zk_client.start() @@ -572,6 +1033,11 @@ def main(): register_location(appscale_info.get_private_ip(), bindport) + + connect_to_postgres(zk_client) + if pg_connection_wrapper: + init_table(pg_connection_wrapper) + db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() valid_datastores = appscale_datastore.DatastoreFactory.valid_datastores() diff --git a/AppDB/appscale/datastore/scripts/ua_server_backup.py b/AppDB/appscale/datastore/scripts/ua_server_backup.py index 7824b61e7d..4159f1b176 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_backup.py +++ b/AppDB/appscale/datastore/scripts/ua_server_backup.py @@ -2,14 +2,19 @@ import argparse import csv +import datetime import errno import logging import os import time +from kazoo.client import KazooClient +import psycopg2 from tornado import gen -from appscale.common.constants import LOG_FORMAT + +from appscale.common import appscale_info, retrying +from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS from appscale.datastore import appscale_datastore from appscale.datastore.dbconstants import ( AppScaleDBConnectionError, @@ -23,7 +28,82 @@ logger = logging.getLogger(__name__) +zk_client = None + +table_name = "ua_server" + + +def is_connection_error(err): + """ This function is used as retry criteria. + It also makes possible lazy load of psycopg2 package. + + Args: + err: an instance of Exception. + Returns: + True if error is related to connection, False otherwise. + """ + from psycopg2 import InterfaceError + return isinstance(err, InterfaceError) + + +retry_pg_connection = retrying.retry( + retrying_timeout=10, retry_on_exception=is_connection_error +) + +class PostgresConnectionWrapper(object): + """ Implements automatic reconnection to Postgresql server. """ + + def __init__(self, dsn=None): + self._dsn = dsn + self._connection = None + + def set_dsn(self, dsn): + """ Resets PostgresConnectionWrapper to use new DSN string. + Args: + dsn: a str representing Postgres DSN string. + """ + if self._connection and not self._connection.closed: + self.close() + self._connection = None + self._dsn = dsn + + def get_connection(self): + """ Provides postgres connection. It can either return existing + working connection or establish new one. + Returns: + An instance of psycopg2 connection. + """ + if not self._connection or self._connection.closed: + logger.info('Establishing new connection to Postgres server') + self._connection = psycopg2.connect(dsn=self._dsn) + return self._connection + + def close(self): + """ Closes psycopg2 connection. + """ + return self._connection.close() + + +pg_connection_wrapper = None + + +def connect_to_postgres(zk_client): + global pg_connection_wrapper + global_dsn_node = '/appscale/ua_server/postgres_dsn' + if zk_client.exists(global_dsn_node): + pg_dsn = zk_client.get(global_dsn_node) + logger.info('Using PostgreSQL as a backend for UA Server') + else: + pg_dsn = None + logger.info('Using Cassandra as a backend for UA Server') + if pg_dsn: + pg_connection_wrapper = ( + PostgresConnectionWrapper(dsn=pg_dsn[0]) + ) + + +@retry_pg_connection @tornado_synchronous @gen.coroutine def get_table_sync(datastore, table_name, schema): @@ -34,6 +114,17 @@ def get_table_sync(datastore, table_name, schema): table_name: Table name in datastore. schema: Table schema. """ + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT * FROM "{table}" ' + .format(table=table_name) + ) + result = pg_cursor.fetchall() + raise gen.Return(result) + result = yield datastore.get_table(table_name, schema) raise gen.Return(result) @@ -63,6 +154,23 @@ def create_backup_dir(backup_dir): logger.info("Backup dir created: {0}".format(backup_dir)) +def prepare_for_backup(rows): + """ Converts date fields to timestamp and application list to str. + + Args: + rows: A tuple of all rows in postgres database. + """ + # todo: delete it after removal of Cassandra + for row in rows: + # 2 - 4 indexes of dates + row[2] = datetime.datetime.fromtimestamp(row[2]) + row[3] = datetime.datetime.fromtimestamp(row[3]) + row[4] = datetime.datetime.fromtimestamp(row[4]) + # 5 index of applications list + if row[5]: + row[5] = row[5].split(':') + + def main(): logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) @@ -74,6 +182,13 @@ def main(): if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) + # Configure zookeeper and db access + zk_client = KazooClient( + hosts=','.join(appscale_info.get_zk_node_ips()), + connection_retry=ZK_PERSISTENT_RECONNECTS) + zk_client.start() + connect_to_postgres(zk_client) + datastore_type = 'cassandra' ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() @@ -104,17 +219,26 @@ def main(): raise AppScaleDBConnectionError('No response from cassandra.') schema_cols_num = len(USERS_SCHEMA) - table = get_table_sync(db, USERS_TABLE, user_schema)[1:] - reshaped_table = reshape(table, schema_cols_num) + + if pg_connection_wrapper: + table = get_table_sync(db, table_name, user_schema) + else: + table = get_table_sync(db, USERS_TABLE, user_schema)[1:] + reshaped_table = reshape(table, schema_cols_num) create_backup_dir(BACKUP_FILE_LOCATION) backup_timestamp = time.strftime("%Y-%m-%d_%H-%M-%S") - output_file = '{0}ua_server_{1}.csv'.format(BACKUP_FILE_LOCATION, backup_timestamp) + output_file = '{0}ua_server_{1}.csv'.\ + format(BACKUP_FILE_LOCATION, backup_timestamp) # v1 output format with open(output_file, 'w') as fout: writer = csv.DictWriter(fout, delimiter=',', fieldnames=USERS_SCHEMA) writer.writeheader() - rows = [dict(zip(USERS_SCHEMA, row)) for row in reshaped_table] + if pg_connection_wrapper: + rows = [dict(zip(USERS_SCHEMA, row)) for row in table] + else: + prepare_for_backup(reshaped_table) + rows = [dict(zip(USERS_SCHEMA, row)) for row in reshaped_table] writer.writerows(rows) diff --git a/AppDB/appscale/datastore/scripts/ua_server_restore.py b/AppDB/appscale/datastore/scripts/ua_server_restore.py index 294647d0b2..21af758a2f 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_restore.py +++ b/AppDB/appscale/datastore/scripts/ua_server_restore.py @@ -2,12 +2,16 @@ import argparse import csv +import datetime import logging import time +from kazoo.client import KazooClient +import psycopg2 from tornado import gen -from appscale.common.constants import LOG_FORMAT +from appscale.common import appscale_info, retrying +from appscale.common.constants import LOG_FORMAT, ZK_PERSISTENT_RECONNECTS from appscale.datastore import appscale_datastore from appscale.datastore.dbconstants import ( AppScaleDBConnectionError, @@ -17,9 +21,87 @@ from appscale.datastore.utils import tornado_synchronous +logger = logging.getLogger(__name__) + +zk_client = None + +table_name = "ua_server" + + +def is_connection_error(err): + """ This function is used as retry criteria. + It also makes possible lazy load of psycopg2 package. + + Args: + err: an instance of Exception. + Returns: + True if error is related to connection, False otherwise. + """ + from psycopg2 import InterfaceError + return isinstance(err, InterfaceError) + + +retry_pg_connection = retrying.retry( + retrying_timeout=10, retry_on_exception=is_connection_error +) + + +class PostgresConnectionWrapper(object): + """ Implements automatic reconnection to Postgresql server. """ + + def __init__(self, dsn=None): + self._dsn = dsn + self._connection = None + + def set_dsn(self, dsn): + """ Resets PostgresConnectionWrapper to use new DSN string. + Args: + dsn: a str representing Postgres DSN string. + """ + if self._connection and not self._connection.closed: + self.close() + self._connection = None + self._dsn = dsn + + def get_connection(self): + """ Provides postgres connection. It can either return existing + working connection or establish new one. + Returns: + An instance of psycopg2 connection. + """ + if not self._connection or self._connection.closed: + logger.info('Establishing new connection to Postgres server') + self._connection = psycopg2.connect(dsn=self._dsn) + return self._connection + + def close(self): + """ Closes psycopg2 connection. + """ + return self._connection.close() + + +pg_connection_wrapper = None + + +def connect_to_postgres(zk_client): + global pg_connection_wrapper + global_dsn_node = '/appscale/ua_server/postgres_dsn' + if zk_client.exists(global_dsn_node): + pg_dsn = zk_client.get(global_dsn_node) + logger.info('Using PostgreSQL as a backend for UA Server') + else: + pg_dsn = None + logger.info('Using Cassandra as a backend for UA Server') + if pg_dsn: + pg_connection_wrapper = ( + PostgresConnectionWrapper(dsn=pg_dsn[0]) + ) + + +@retry_pg_connection @tornado_synchronous @gen.coroutine -def put_entity_sync(datastore, table_name, user, schema, array): +def put_entity_sync(datastore, table_name, user, schema, user_data): """ Puts data of specified user from backup to datastore. Args: @@ -27,9 +109,28 @@ def put_entity_sync(datastore, table_name, user, schema, array): table_name: Table name in datastore. user: User name. schema: Table schema. - array: List of all user's fields. + user_data: List or dict (if postgres role is enabled) of all user's fields. """ - result = yield datastore.put_entity(table_name, user, schema, array) + if pg_connection_wrapper: + pg_connection = pg_connection_wrapper.get_connection() + with pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'INSERT INTO "{table}"' + 'VALUES ( ' + ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' + ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' + ' %(appdrop_rem_token_exp)s, %(visit_cnt)s, %(cookie)s, ' + ' %(cookie_ip)s, %(cookie_exp)s, %(cksum)s, %(enabled)s, %(type)s, ' + ' %(is_cloud_admin)s, %(capabilities)s ' + ') ' + 'RETURNING date_last_login' + .format(table=table_name), + vars=user_data + ) + result = pg_cursor.fetchone() + raise gen.Return(result) + result = yield datastore.put_entity(table_name, user, schema, user_data) raise gen.Return(result) def main(): @@ -45,6 +146,13 @@ def main(): if args.verbose: logging.getLogger('appscale').setLevel(logging.DEBUG) + # Configure zookeeper and db access + zk_client = KazooClient( + hosts=','.join(appscale_info.get_zk_node_ips()), + connection_retry=ZK_PERSISTENT_RECONNECTS) + zk_client.start() + connect_to_postgres(zk_client) + datastore_type = 'cassandra' ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() @@ -80,5 +188,31 @@ def main(): reader = csv.DictReader(fin, delimiter=',') # Iterate through all users in file for row in reader: - array = [row[key] for key in USERS_SCHEMA] - put_entity_sync(db, USERS_TABLE, array[0], user_schema, array) + if pg_connection_wrapper: + if not row['applications']: + row['applications'] = None + else: + # delete square brackets added by csv module + apps = row['applications'][1:-1] + # csv module adds extra quotes each time + apps = apps.replace("'", "") + row['applications'] = '{' + apps + '}' + put_entity_sync(db, table_name, row['email'], user_schema, row) + else: + # Convert dates to timestamp + t = str(time.mktime(datetime.datetime.strptime( + row['date_creation'], '%Y-%m-%d %H:%M:%S').timetuple())) + row['date_creation'] = t + t = str(time.mktime(datetime.datetime.strptime( + row['date_change'], '%Y-%m-%d %H:%M:%S').timetuple())) + row['date_change'] = t + t = str(time.mktime(datetime.datetime.strptime( + row['date_last_login'], '%Y-%m-%d %H:%M:%S').timetuple())) + row['date_last_login'] = t + + apps = row['applications'][1:-1] + apps = apps.replace("'", "").replace(', ', ':') + row['applications'] = apps + + array = [row[key] for key in USERS_SCHEMA] + put_entity_sync(db, USERS_TABLE, array[0], user_schema, array) diff --git a/AppDB/setup.py b/AppDB/setup.py index 633b19cfa8..6aeb9f6b35 100644 --- a/AppDB/setup.py +++ b/AppDB/setup.py @@ -15,6 +15,7 @@ 'kazoo', 'monotonic', 'mmh3', + 'psycopg2-binary', 'SOAPpy', 'tornado', 'foundationdb~=6.1.8' From 39ddea65539bbbc2a7d775881418580f137b5998 Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 1 Oct 2019 18:24:07 +0300 Subject: [PATCH 164/221] Ignore TaskQueue test outcomes and venvs --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index bf4a9ce5c6..03badd2120 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ /AppTaskQueue/build /AppTaskQueue/dist /AppTaskQueue/appscale/taskqueue/protocols/*_pb2.py +/AppTaskQueue/test/e2e/venv +/AppTaskQueue/test/suites/logs /AppDB/build /AppDB/dist From 0b46b1128bd5bc1f2f5f54dd0bde69416c0f392d Mon Sep 17 00:00:00 2001 From: Anton Leonov Date: Tue, 1 Oct 2019 20:38:29 +0300 Subject: [PATCH 165/221] Address zookeeper lock and lease issues --- .../taskqueue/pg_connection_wrapper.py | 6 ++-- .../appscale/taskqueue/queue_manager.py | 36 +++++++++++-------- .../test/helpers/restart-taskqueue.sh | 6 ++-- 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py index 43540c59b9..5958f13450 100644 --- a/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py +++ b/AppTaskQueue/appscale/taskqueue/pg_connection_wrapper.py @@ -4,6 +4,7 @@ import psycopg2 from tornado.ioloop import IOLoop +from appscale.common import retrying from appscale.taskqueue.utils import logger @@ -29,6 +30,7 @@ def set_dsn(self, dsn): self._connection = None self._dsn = dsn + @retrying.retry(retrying_timeout=60, backoff_multiplier=1) def get_connection(self): """ Provides postgres connection. It can either return existing working connection or establish new one. @@ -40,8 +42,8 @@ def get_connection(self): logger.info('Establishing new connection to Postgres server') self._connection = psycopg2.connect( dsn=self._dsn, - connect_timeout=60, - options='-c statement_timeout=10000', + connect_timeout=10, + options='-c statement_timeout=60000', keepalives_idle=60, keepalives_interval=15, keepalives_count=4 diff --git a/AppTaskQueue/appscale/taskqueue/queue_manager.py b/AppTaskQueue/appscale/taskqueue/queue_manager.py index 17a1ed0d57..f5fad01ab2 100644 --- a/AppTaskQueue/appscale/taskqueue/queue_manager.py +++ b/AppTaskQueue/appscale/taskqueue/queue_manager.py @@ -1,6 +1,8 @@ """ Keeps track of queue configuration details for producer connections. """ import json +import uuid +from datetime import timedelta from kazoo.exceptions import ZookeeperError from tornado.ioloop import IOLoop, PeriodicCallback @@ -30,22 +32,22 @@ def __init__(self, zk_client, project_id): self.project_id = project_id self._configure_periodical_flush() - project_node = '/appscale/projects/{}/'.format(project_id) - self.queues_node = project_node + 'queues' + self.queues_node = '/appscale/projects/{}/queues'.format(project_id) self.pullqueues_initialization_lock = zk_client.Lock( - project_node + 'pullqueues_initialization_lock' + self.queues_node + '/pullqueues_initialization_lock' ) self.pullqueues_initialized_version_node = ( - project_node + 'pullqueues_initialized_version' + self.queues_node + '/pullqueues_initialized_version' ) self.pullqueues_cleanup_lease_node = ( - project_node + 'pullqueues_cleanup_lease' + self.queues_node + '/pullqueues_cleanup_lease' ) self.watch = zk_client.DataWatch(self.queues_node, self._update_queues_watch) self.celery = None self.rates = None self._stopped = False + self._holder_id = str(uuid.uuid4()) def update_queues(self, queue_config, znode_stats): """ Caches new configuration details and cleans up old state. @@ -57,8 +59,10 @@ def update_queues(self, queue_config, znode_stats): logger.info('Updating queues for {}'.format(self.project_id)) if not queue_config: new_queue_config = {'default': {'rate': '5/s'}} + config_last_modified = 0 else: new_queue_config = json.loads(queue_config.decode('utf-8'))['queue'] + config_last_modified = znode_stats.last_modified # Clean up obsolete queues. to_stop = [queue for queue in self if queue not in new_queue_config] @@ -73,7 +77,7 @@ def update_queues(self, queue_config, znode_stats): self._update_pull_queues( ((queue_name, queue) for queue_name, queue in new_queue_config.items() if queue.get('mode', 'push') != 'push'), - znode_stats + config_last_modified ) def _update_push_queues(self, new_push_queue_configs): @@ -101,14 +105,14 @@ def _update_push_queues(self, new_push_queue_configs): for queue in push_queues: queue.celery = self.celery - def _update_pull_queues(self, new_pull_queue_configs, znode_stats): + def _update_pull_queues(self, new_pull_queue_configs, config_last_modified): """ Caches new pull queue configuration details. Args: new_pull_queue_configs: A sequence of (queue_name, queue_info) tuples. - znode_stats: An instance of ZnodeStats. + config_last_modified: A number representing configs version. """ - new_version = znode_stats.last_modified + new_version = config_last_modified if self._get_pullqueue_initialized_version() < new_version: # Only one TaskQueue server proceeds with Postgres tables initialization. with self.pullqueues_initialization_lock: @@ -139,11 +143,11 @@ def _get_pullqueue_initialized_version(self): """ Retrieves zookeeper node holding version of PullQueues configs which is currently provisioned in Postgres. """ - initialized_version = -1 + initialized_version = b'-1' version_node = self.pullqueues_initialized_version_node if self.zk_client.exists(version_node): - initialized_version = self.zk_client.get(version_node) - return initialized_version + initialized_version = self.zk_client.get(version_node)[0] + return float(initialized_version) def _set_pullqueue_initialized_version(self, version): """ Sets zookeeper node holding version of PullQueues configs @@ -155,9 +159,9 @@ def _set_pullqueue_initialized_version(self, version): """ version_node = self.pullqueues_initialized_version_node if self.zk_client.exists(version_node): - self.zk_client.set(version_node, version) + self.zk_client.set(version_node, str(version).encode()) else: - self.zk_client.create(version_node, version) + self.zk_client.create(version_node, str(version).encode()) def ensure_watch(self): """ Restart the watch if it has been cancelled. """ @@ -207,8 +211,10 @@ def flush_deleted(): pull queues. """ # Avoid too frequent cleanup by using zookeeper lease recipe. + duration = timedelta(seconds=self.FLUSH_DELETED_INTERVAL * 0.8) + holder_id = self._holder_id lease = self.zk_client.NonBlockingLease( - self.pullqueues_cleanup_lease_node, self.FLUSH_DELETED_INTERVAL * 0.8 + self.pullqueues_cleanup_lease_node, duration, holder_id ) if lease: postgres_pull_queues = (q for q in self.values() diff --git a/AppTaskQueue/test/helpers/restart-taskqueue.sh b/AppTaskQueue/test/helpers/restart-taskqueue.sh index b955b9676e..fec271f471 100755 --- a/AppTaskQueue/test/helpers/restart-taskqueue.sh +++ b/AppTaskQueue/test/helpers/restart-taskqueue.sh @@ -115,8 +115,10 @@ PORTS="${PORTS//,/ }" log "Starting taskqueue servers on ports: ${PORTS}" for port in ${PORTS} do - nohup /opt/appscale_venvs/appscale_taskqueue/bin/appscale-taskqueue -p \ - "${port}" --verbose > "/var/log/appscale/taskqueue-${port}.log" 2>&1 & + tq_executable="/opt/appscale_venvs/appscale_taskqueue/bin/appscale-taskqueue" + tq_log="/var/log/appscale/taskqueue-${port}.log" + command="${tq_executable} -p '${port}' --verbose 2>&1 | tee -a ${tq_log}" + nohup sh -c "${command} &" /dev/null 2>/dev/null done log "Ensuring servers are running" From 6e141a59bc5197a3a0a7d3f420ea90dce73069e1 Mon Sep 17 00:00:00 2001 From: Honcharov12 Date: Thu, 3 Oct 2019 18:18:42 +0300 Subject: [PATCH 166/221] Code refactoring --- AppDB/appscale/datastore/scripts/ua_server.py | 75 ++++++++----------- .../datastore/scripts/ua_server_backup.py | 15 ++-- .../datastore/scripts/ua_server_restore.py | 15 ++-- 3 files changed, 43 insertions(+), 62 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index 24e79d52a3..d858de12f6 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -67,12 +67,11 @@ zk_client = None -table_name = "ua_server" +table_name = "ua_users" def is_connection_error(err): """ This function is used as retry criteria. - It also makes possible lazy load of psycopg2 package. Args: err: an instance of Exception. @@ -122,18 +121,16 @@ def close(self): def init_table(pg_connection_wrapper): - from psycopg2 import IntegrityError # Import psycopg2 lazily # When multiple TQ servers are notified by ZK about new queue # they sometimes get IntegrityError despite 'IF NOT EXISTS' - @retrying.retry(max_retries=5, retry_on_exception=IntegrityError) + @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) def ensure_tables_created(): - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'CREATE TABLE IF NOT EXISTS "{table}" (' ' email varchar(500) NOT NULL,' - ' password varchar(500) NOT NULL,' + ' pw varchar(500) NOT NULL,' ' date_creation timestamp NOT NULL,' ' date_change timestamp NOT NULL,' ' date_last_login timestamp NOT NULL,' @@ -151,7 +148,8 @@ def ensure_tables_created(): ' capabilities varchar(255),' ' PRIMARY KEY (email)' ');' - .format(table=table_name) + 'CREATE SCHEMA IF NOT EXISTS "appscale_ua";' + .format(table=table_name) ) ensure_tables_created() @@ -276,8 +274,7 @@ def does_user_exist(username, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'SELECT 1 FROM "{table}" ' @@ -314,14 +311,13 @@ def get_user_data(username, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT * FROM "{table}" ' + 'SELECT {columns} FROM "{table}" ' 'WHERE email = %(username)s' - .format(table=table_name), - vars={ + .format(table=table_name, columns=', '.join(USERS_SCHEMA)), + vars={ 'username': username, } ) @@ -390,11 +386,10 @@ def commit_new_user(user, passwd, utype, secret): if pg_connection_wrapper: n_user = Users(user, passwd, utype) params = n_user.paramit() - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'INSERT INTO "{table}"' + 'INSERT INTO "{table}" ({columns}) ' 'VALUES ( ' ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' @@ -403,7 +398,7 @@ def commit_new_user(user, passwd, utype, secret): ' %(is_cloud_admin)s, %(capabilities)s ' ') ' 'RETURNING date_last_login' - .format(table=table_name), + .format(table=table_name, columns=', '.join(USERS_SCHEMA)), vars=params ) row = pg_cursor.fetchone() @@ -439,8 +434,7 @@ def add_admin_for_app(user, app, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' @@ -492,8 +486,7 @@ def get_all_users(secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'SELECT email FROM "{table}" ' @@ -547,8 +540,7 @@ def commit_new_token(user, token, token_exp, secret): 'token_exp': token_exp, 'user': user} - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' @@ -609,13 +601,13 @@ def change_password(user, password, secret): exist = does_user_exist(user, secret) if exist != "true": raise gen.Return('Error: User {} does not exist'.format(user)) - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' - 'SET password = %(password)s ' + 'SET pw = %(password)s ' 'WHERE email = %(user)s AND enabled = TRUE ' 'RETURNING enabled' .format(table=table_name), @@ -659,8 +651,8 @@ def enable_user(user, secret): exist = does_user_exist(user, secret) if exist != "true": raise gen.Return('Error: User {} does not exist'.format(user)) - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' @@ -706,8 +698,8 @@ def disable_user(user, secret): exist = does_user_exist(user, secret) if exist != "true": raise gen.Return('Error: User {} does not exist'.format(user)) - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' @@ -754,8 +746,8 @@ def delete_user(user, secret): exist = does_user_exist(user, secret) if exist != "true": raise gen.Return('Error: User {} does not exist'.format(user)) - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'DELETE FROM "{table}" ' @@ -798,8 +790,7 @@ def is_user_enabled(user, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'SELECT enabled FROM "{table}" ' @@ -833,8 +824,7 @@ def is_user_cloud_admin(username, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'SELECT is_cloud_admin FROM "{table}" ' @@ -869,8 +859,7 @@ def set_cloud_admin_status(username, is_cloud_admin, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' @@ -902,8 +891,7 @@ def get_capabilities(username, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'SELECT capabilities FROM "{table}" ' @@ -939,8 +927,7 @@ def set_capabilities(username, capabilities, secret): raise gen.Return("Error: bad secret") if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( 'UPDATE "{table}" ' diff --git a/AppDB/appscale/datastore/scripts/ua_server_backup.py b/AppDB/appscale/datastore/scripts/ua_server_backup.py index 4159f1b176..9162d3c86c 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_backup.py +++ b/AppDB/appscale/datastore/scripts/ua_server_backup.py @@ -30,20 +30,18 @@ zk_client = None -table_name = "ua_server" +table_name = "ua_users" def is_connection_error(err): """ This function is used as retry criteria. - It also makes possible lazy load of psycopg2 package. Args: err: an instance of Exception. Returns: True if error is related to connection, False otherwise. """ - from psycopg2 import InterfaceError - return isinstance(err, InterfaceError) + return isinstance(err, psycopg2.InterfaceError) retry_pg_connection = retrying.retry( @@ -115,12 +113,11 @@ def get_table_sync(datastore, table_name, schema): schema: Table schema. """ if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT * FROM "{table}" ' - .format(table=table_name) + 'SELECT {columns} FROM "{table}" ' + .format(table=table_name, columns=', '.join(schema)) ) result = pg_cursor.fetchall() raise gen.Return(result) @@ -221,7 +218,7 @@ def main(): schema_cols_num = len(USERS_SCHEMA) if pg_connection_wrapper: - table = get_table_sync(db, table_name, user_schema) + table = get_table_sync(db, table_name, USERS_SCHEMA) else: table = get_table_sync(db, USERS_TABLE, user_schema)[1:] reshaped_table = reshape(table, schema_cols_num) diff --git a/AppDB/appscale/datastore/scripts/ua_server_restore.py b/AppDB/appscale/datastore/scripts/ua_server_restore.py index 21af758a2f..78fc266f89 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_restore.py +++ b/AppDB/appscale/datastore/scripts/ua_server_restore.py @@ -25,20 +25,18 @@ zk_client = None -table_name = "ua_server" +table_name = "ua_users" def is_connection_error(err): """ This function is used as retry criteria. - It also makes possible lazy load of psycopg2 package. Args: err: an instance of Exception. Returns: True if error is related to connection, False otherwise. """ - from psycopg2 import InterfaceError - return isinstance(err, InterfaceError) + return isinstance(err, psycopg2.InterfaceError) retry_pg_connection = retrying.retry( @@ -112,11 +110,10 @@ def put_entity_sync(datastore, table_name, user, schema, user_data): user_data: List or dict (if postgres role is enabled) of all user's fields. """ if pg_connection_wrapper: - pg_connection = pg_connection_wrapper.get_connection() - with pg_connection: + with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'INSERT INTO "{table}"' + 'INSERT INTO "{table}" ({columns}) ' 'VALUES ( ' ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' @@ -125,7 +122,7 @@ def put_entity_sync(datastore, table_name, user, schema, user_data): ' %(is_cloud_admin)s, %(capabilities)s ' ') ' 'RETURNING date_last_login' - .format(table=table_name), + .format(table=table_name, columns=', '.join(schema)), vars=user_data ) result = pg_cursor.fetchone() @@ -197,7 +194,7 @@ def main(): # csv module adds extra quotes each time apps = apps.replace("'", "") row['applications'] = '{' + apps + '}' - put_entity_sync(db, table_name, row['email'], user_schema, row) + put_entity_sync(db, table_name, row['email'], USERS_SCHEMA, row) else: # Convert dates to timestamp t = str(time.mktime(datetime.datetime.strptime( From d1771be7677204917d83dace0a2ac62bb89885e4 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 3 Oct 2019 17:48:18 -0700 Subject: [PATCH 167/221] Don't run groomers for FDB datastore backend This prevents the groomer and the transaction groomer from running when fdb_clusterfile_content is defined since they serve no purpose for the FDB backend. --- AppController/djinn.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index d47099023c..e022b7eaec 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -1170,6 +1170,7 @@ def get_database_information(secret) def run_groomer(secret) return BAD_SECRET_MSG unless valid_secret?(secret) return NOT_READY if @nodes.empty? + return INVALID_REQUEST if @options.key?('fdb_clusterfile_content') Thread.new { run_groomer_command = `which appscale-groomer`.chomp @@ -3260,10 +3261,13 @@ def start_stop_api_services @done_initializing = true Djinn.log_info("UserAppServer is ready.") + groomer_required = !@options.key?('fdb_clusterfile_content') + # The services below depends directly or indirectly on the UAServer to # be operational. So we start them after we test the UAServer. threads = [] - if my_node.is_db_master? or my_node.is_db_slave? or my_node.is_zookeeper? + if groomer_required && (my_node.is_db_master? || my_node.is_db_slave? || + my_node.is_zookeeper?) threads << Thread.new { if my_node.is_db_master? or my_node.is_db_slave? start_groomer_service From cd8c4bed5b9f67add36f9403489e2e8175302f28 Mon Sep 17 00:00:00 2001 From: whoarethebritons Date: Mon, 7 Oct 2019 10:14:57 -0700 Subject: [PATCH 168/221] tasks should use http to avoid scheme redirects since they use HAProxy --- AppTaskQueue/appscale/taskqueue/push_worker.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index 3ffef2d159..6aa814cfa6 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -154,16 +154,8 @@ def execute_task(task, headers, args): update_task(task_name, TASK_STATES.FAILED) return - # Targets do not get X-Forwarded-Proto from nginx, they use haproxy port. - headers['X-Forwarded-Proto'] = url.scheme - if url.scheme == 'http': - connection = httplib.HTTPConnection(remote_host, url.port) - elif url.scheme == 'https': - connection = httplib.HTTPSConnection(remote_host, url.port) - else: - logger.error("Task %s tried to use url scheme %s, " - "which is not supported." % ( - args['task_name'], url.scheme)) + # Tasks should use HTTP to bypass scheme redirects since they use HAProxy. + connection = httplib.HTTPConnection(remote_host, url.port) skip_host = False if b'host' in headers or b'Host' in headers: From 41b55611cca4e3b829b643a263ba1e157fb0dd78 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 8 Oct 2019 10:37:32 -0700 Subject: [PATCH 169/221] Forbid uppercase letters in channel ID Authentication does not work with newer versions of ejabberd when the username contains an uppercase character. 18.01 (the version in Bionic) does not invoke the external auth script. In 18.06, the auth script is invoked, but the username is lowercased, so it won't match what the UAServer has stored. --- AppServer/google/appengine/api/xmpp/xmpp_service_real.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/AppServer/google/appengine/api/xmpp/xmpp_service_real.py b/AppServer/google/appengine/api/xmpp/xmpp_service_real.py index c29a4f4a77..35164ca2fd 100755 --- a/AppServer/google/appengine/api/xmpp/xmpp_service_real.py +++ b/AppServer/google/appengine/api/xmpp/xmpp_service_real.py @@ -201,7 +201,13 @@ def _Dynamic_CreateChannel(self, request, response): if '@' in application_key: raise apiproxy_errors.ApplicationError( channel_service_pb.ChannelServiceError.INVALID_CHANNEL_KEY) - + + # Ejabberd 18.01 does not invoke the external auth script if username has + # any uppercase characters. + if application_key != application_key.lower(): + raise apiproxy_errors.ApplicationError( + channel_service_pb.ChannelServiceError.INVALID_CHANNEL_KEY) + appname = os.environ['APPNAME'] unique_app_id = hashlib.sha1(appname + application_key).hexdigest() client_id = 'channel~%s~%s@%s' % (unique_app_id, From ff694afdfec08e2e501a73e1c1f108e63572d887 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 8 Oct 2019 14:59:37 -0700 Subject: [PATCH 170/221] Fix comment typo --- AppDB/appscale/datastore/fdb/indexes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index c92e993549..aa090616a8 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -372,7 +372,7 @@ def _populated(self, ns_dir, kind): # TODO: Check if the presence of stat entities should mark a kind as being # populated. index_slice = kind_index.get_slice(()) - # This query is reversed to increase the likelihood of getting an relevant + # This query is reversed to increase the likelihood of getting a relevant # (not marked for GC) entry. iterator = IndexIterator(self._tr, self._tornado_fdb, kind_index, index_slice, fetch_limit=1, reverse=True, From 3b8d68fba348c6c92e086fd72d4f6ec2d8451f86 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 11 Oct 2019 13:38:38 -0700 Subject: [PATCH 171/221] Fix bootstrap check for dev tag --- bootstrap.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstrap.sh b/bootstrap.sh index f32ed0fead..880838057e 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -110,7 +110,7 @@ while [ $# -gt 0 ]; do if [ "${1}" = "--tag" ]; then shift; if [ -z "${1}" ]; then usage; fi GIT_TAG="${1}"; - if [${GIT_TAG} != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi + if [ "${GIT_TAG}" != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi shift; continue fi if [ "${1}" = "-t" ]; then From 59c94fa1327cc609d8e7fe5366984050c6248ef4 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 11 Oct 2019 13:39:29 -0700 Subject: [PATCH 172/221] Fix install on docker where systemctl commands may fail --- debian/appscale_install_functions.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 1c0dcb83d3..fc0c77771a 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -396,11 +396,11 @@ installservice() echo "Linking appscale common systemd drop-in" for APPSCALE_SYSTEMD_SERVICE in ${DESTDIR}/lib/systemd/system/appscale-*.service; do [ -d "${APPSCALE_SYSTEMD_SERVICE}.d" ] || mkdir "${APPSCALE_SYSTEMD_SERVICE}.d" - ln -t "${APPSCALE_SYSTEMD_SERVICE}.d" ${DESTDIR}/lib/systemd/system/appscale-.d/10-appscale-common.conf + ln -ft "${APPSCALE_SYSTEMD_SERVICE}.d" ${DESTDIR}/lib/systemd/system/appscale-.d/10-appscale-common.conf done fi - systemctl daemon-reload + systemctl daemon-reload || true # Enable AppController on system reboots. systemctl enable appscale-controller || true From a3c6b0511f7dec656d0bd6284339176b3eb509d1 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 11 Oct 2019 16:34:10 -0700 Subject: [PATCH 173/221] Filter out empty namespaces This returns only populated namespaces during a metadata query. --- AppDB/appscale/datastore/fdb/indexes.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index d3fb27e489..1f69336c24 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -336,8 +336,10 @@ def _usable(self, entry): class NamespaceIterator(object): - def __init__(self, tr, project_dir): + """ Iterates over a list of namespaces in a project. """ + def __init__(self, tr, tornado_fdb, project_dir): self._tr = tr + self._tornado_fdb = tornado_fdb self._project_dir = project_dir self._done = False @@ -350,6 +352,14 @@ def next_page(self): ns_dir = self._project_dir.open(self._tr, (KindIndex.DIR_NAME,)) namespaces = ns_dir.list(self._tr) + # Filter out namespaces that don't have at least one kind. + kinds_by_ns = yield [KindIterator(self._tr, self._tornado_fdb, + self._project_dir, namespace).next_page() + for namespace in namespaces] + namespaces = [ + namespace for namespace, (kinds, _) in zip(namespaces, kinds_by_ns) + if kinds] + # The API uses an ID of 1 to label the default namespace. results = [IndexEntry(self._project_dir.get_path()[-1], u'', (u'__namespace__', namespace or 1), None, None) @@ -360,6 +370,7 @@ def next_page(self): class KindIterator(object): + """ Iterates over a list of kinds in a namespace. """ def __init__(self, tr, tornado_fdb, project_dir, namespace): self._tr = tr self._tornado_fdb = tornado_fdb @@ -1166,7 +1177,7 @@ def get_iterator(self, tr, query, read_versionstamp=None): if query.has_kind() and query.kind() == u'__namespace__': project_dir = yield self._directory_cache.get(tr, (project_id,)) - raise gen.Return(NamespaceIterator(tr, project_dir)) + raise gen.Return(NamespaceIterator(tr, self._tornado_fdb, project_dir)) elif query.has_kind() and query.kind() == u'__kind__': project_dir = yield self._directory_cache.get(tr, (project_id,)) raise gen.Return(KindIterator(tr, self._tornado_fdb, project_dir, From 89163620967df34e5de2b81e398a0c0ea030ce29 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 14 Oct 2019 15:36:12 -0700 Subject: [PATCH 174/221] Use HAProxy location for the host header Since the host header defines which service the push task goes to in App Engine, this roughly preserves the same behavior (since the target service has already been selected at this point). There are some edge cases (especially during service relocation) where this behaves differently, but the routing pieces need more work to support those differences. --- AppTaskQueue/appscale/taskqueue/push_worker.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/AppTaskQueue/appscale/taskqueue/push_worker.py b/AppTaskQueue/appscale/taskqueue/push_worker.py index 6aa814cfa6..26d14133f6 100644 --- a/AppTaskQueue/appscale/taskqueue/push_worker.py +++ b/AppTaskQueue/appscale/taskqueue/push_worker.py @@ -157,17 +157,12 @@ def execute_task(task, headers, args): # Tasks should use HTTP to bypass scheme redirects since they use HAProxy. connection = httplib.HTTPConnection(remote_host, url.port) - skip_host = False - if b'host' in headers or b'Host' in headers: - skip_host = True - skip_accept_encoding = False if 'accept-encoding' in headers or 'Accept-Encoding' in headers: skip_accept_encoding = True connection.putrequest(method, urlpath, - skip_host=skip_host, skip_accept_encoding=skip_accept_encoding) # Update the task headers @@ -175,6 +170,14 @@ def execute_task(task, headers, args): headers['X-AppEngine-TaskExecutionCount'] = str(task.request.retries) for header in headers: + # Avoid changing the host header from the HAProxy location. Though GAE + # supports host-based routing, we need to make some additional changes + # before we can behave in a similar manner. Using the HAProxy location + # for the host header allows the dispatcher to try extracting a port, + # which it uses to set environment variables for the request. + if header == b'Host': + continue + connection.putheader(header, headers[header]) if 'content-type' not in headers or 'Content-Type' not in headers: From 3483bab2e4cfbad6933523183d10f79da27232fe Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Mon, 14 Oct 2019 15:39:15 -0700 Subject: [PATCH 175/221] Guess the intended scheme as a fallback This should only apply to the rare case when a client makes a request without going through Nginx and they've overridden the host header with a port-less value. --- AppServer/google/appengine/tools/devappserver2/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppServer/google/appengine/tools/devappserver2/module.py b/AppServer/google/appengine/tools/devappserver2/module.py index 77753bd57e..98ae1f5465 100644 --- a/AppServer/google/appengine/tools/devappserver2/module.py +++ b/AppServer/google/appengine/tools/devappserver2/module.py @@ -581,7 +581,7 @@ def _handle_request_impl(self, environ, start_response, inst=None, try: environ['SERVER_PORT'] = environ['HTTP_HOST'].split(':')[1] except IndexError: - scheme = environ['HTTP_X_FORWARDED_PROTO'] + scheme = environ.get('HTTP_X_FORWARDED_PROTO', 'http') if scheme == 'http': environ['SERVER_PORT'] = 80 else: From 455b31965cf53e2f382623bbd176f2bf89430a16 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 15 Oct 2019 14:30:36 -0700 Subject: [PATCH 176/221] Set multiple field when appropriate --- AppDB/appscale/datastore/fdb/indexes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index d3fb27e489..062d623926 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -256,12 +256,12 @@ def prop_result(self): entity = entity_pb.EntityProto() entity.mutable_key().MergeFrom(self.key) entity.mutable_entity_group().MergeFrom(self.group) + prop_names = list(zip(*self.properties))[0] for prop_name, value in self.properties: prop = entity.add_property() prop.set_name(prop_name) prop.set_meaning(entity_pb.Property.INDEX_VALUE) - # TODO: Check if this is sometimes True. - prop.set_multiple(False) + prop.set_multiple(prop_names.count(prop_name) > 1) prop.mutable_value().MergeFrom(value) return entity From 1cce41180f3cf7c2fbae686f5585a03363e4e1b9 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 15 Oct 2019 14:31:12 -0700 Subject: [PATCH 177/221] Add support for property metadata queries This allows clients to request a list of property names and types for all kinds in a given namespace. --- AppDB/appscale/datastore/fdb/codecs.py | 4 + AppDB/appscale/datastore/fdb/data.py | 8 ++ AppDB/appscale/datastore/fdb/indexes.py | 107 ++++++++++++++++++++++++ 3 files changed, 119 insertions(+) diff --git a/AppDB/appscale/datastore/fdb/codecs.py b/AppDB/appscale/datastore/fdb/codecs.py index 564b9ddf5b..daf3b76cf1 100644 --- a/AppDB/appscale/datastore/fdb/codecs.py +++ b/AppDB/appscale/datastore/fdb/codecs.py @@ -33,6 +33,10 @@ USER_CODE = 0x24 REFERENCE_CODE = 0x27 +# These are defined for clarity when selecting property type limits. +MIN_INT64_CODE = INT64_ZERO_CODE - 8 +MAX_INT64_CODE = INT64_ZERO_CODE + 8 + # Ensures the shorter of two variable-length values (with identical prefixes) # is placed before the longer one. Otherwise, the following byte(s) could # determine the sort order. It also allows a decoder to find the end of the diff --git a/AppDB/appscale/datastore/fdb/data.py b/AppDB/appscale/datastore/fdb/data.py index 976e898707..29263a3b7b 100644 --- a/AppDB/appscale/datastore/fdb/data.py +++ b/AppDB/appscale/datastore/fdb/data.py @@ -438,6 +438,14 @@ def get_entry(self, tr, index_entry, snapshot=False): index_entry.project_id, index_entry.namespace, index_entry.path, encoded_entity=entity.Encode()) raise gen.Return(version_entry) + elif index_entry.kind == u'__property__': + entity = index_entry.prop_result() + entity.clear_entity_group() + entity.mutable_entity_group() + version_entry = VersionEntry( + index_entry.project_id, index_entry.namespace, index_entry.path, + encoded_entity=entity.Encode()) + raise gen.Return(version_entry) version_entry = yield self.get_version_from_path( tr, index_entry.project_id, index_entry.namespace, index_entry.path, diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index 062d623926..f7f0274d04 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -14,6 +14,7 @@ from tornado import gen from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.fdb import codecs from appscale.datastore.fdb.codecs import ( decode_str, decode_value, encode_value, encode_versionstamp_index, Path) from appscale.datastore.fdb.sdk import FindIndexToUse, ListCursor @@ -410,6 +411,76 @@ def _populated(self, ns_dir, kind): raise gen.Return(False) +class PropertyIterator(object): + """ Iterates over a list of indexed property names for a kind. """ + PROPERTY_TYPES = (u'NULL', u'INT64', u'BOOLEAN', u'STRING', u'DOUBLE', + u'POINT', u'USER', u'REFERENCE') + + def __init__(self, tr, tornado_fdb, project_dir, namespace): + self._tr = tr + self._tornado_fdb = tornado_fdb + self._project_dir = project_dir + self._namespace = namespace + self._done = False + + @gen.coroutine + def next_page(self): + if self._done: + raise gen.Return(([], False)) + + # TODO: This can be made async. + ns_dir = self._project_dir.open( + self._tr, (SinglePropIndex.DIR_NAME, self._namespace)) + kinds = ns_dir.list(self._tr) + # TODO: Check if stat entities belong in kinds. + kind_dirs = [ns_dir.open(self._tr, (kind,)) for kind in kinds] + results = [] + for kind, kind_dir in zip(kinds, kind_dirs): + # TODO: This can be made async. + prop_names = kind_dir.list(self._tr) + for prop_name in prop_names: + prop_dir = kind_dir.open(self._tr, (prop_name,)) + index = SinglePropIndex(prop_dir) + populated_map = yield [self._populated(index, type_name) + for type_name in self.PROPERTY_TYPES] + populated_types = tuple( + type_ for type_, populated in zip(self.PROPERTY_TYPES, populated_map) + if populated) + if not populated_types: + continue + + project_id = self._project_dir.get_path()[-1] + path = (u'__kind__', kind, u'__property__', prop_name) + properties = [] + for prop_type in populated_types: + prop_value = entity_pb.PropertyValue() + prop_value.set_stringvalue(prop_type) + properties.append((u'property_representation', prop_value)) + + results.append(CompositeEntry(project_id, self._namespace, path, + properties, None, None)) + + self._done = True + raise gen.Return((results, False)) + + @gen.coroutine + def _populated(self, prop_index, type_name): + """ Checks if at least one entity exists for a given type name. """ + index_slice = prop_index.type_range(type_name) + # This query is reversed to increase the likelihood of getting a relevant + # (not marked for GC) entry. + iterator = IndexIterator(self._tr, self._tornado_fdb, prop_index, + index_slice, fetch_limit=1, reverse=True, + snapshot=True) + while True: + results, more_results = yield iterator.next_page() + if results: + raise gen.Return(True) + + if not more_results: + raise gen.Return(False) + + class MergeJoinIterator(object): """ Returns pages of index entry results from multiple ranges. It ignores @@ -949,6 +1020,38 @@ def decode(self, kv): return PropertyEntry(self.project_id, self.namespace, path, self.prop_name, value, commit_versionstamp, deleted_versionstamp) + def type_range(self, type_name): + """ Returns a slice that encompasses all values for a property type. """ + if type_name == u'NULL': + start = six.int2byte(codecs.NULL_CODE) + stop = six.int2byte(codecs.NULL_CODE + 1) + elif type_name == u'INT64': + start = six.int2byte(codecs.MIN_INT64_CODE) + stop = six.int2byte(codecs.MAX_INT64_CODE + 1) + elif type_name == u'BOOLEAN': + start = six.int2byte(codecs.FALSE_CODE) + stop = six.int2byte(codecs.TRUE_CODE + 1) + elif type_name == u'STRING': + start = six.int2byte(codecs.BYTES_CODE) + stop = six.int2byte(codecs.BYTES_CODE + 1) + elif type_name == u'DOUBLE': + start = six.int2byte(codecs.DOUBLE_CODE) + stop = six.int2byte(codecs.DOUBLE_CODE + 1) + elif type_name == u'POINT': + start = six.int2byte(codecs.POINT_CODE) + stop = six.int2byte(codecs.POINT_CODE + 1) + elif type_name == u'USER': + start = six.int2byte(codecs.USER_CODE) + stop = six.int2byte(codecs.USER_CODE + 1) + elif type_name == u'REFERENCE': + start = six.int2byte(codecs.REFERENCE_CODE) + stop = six.int2byte(codecs.REFERENCE_CODE + 1) + else: + raise InternalError(u'Unknown type name') + + return slice(self.directory.rawPrefix + start, + self.directory.rawPrefix + stop) + class CompositeIndex(Index): """ @@ -1171,6 +1274,10 @@ def get_iterator(self, tr, query, read_versionstamp=None): project_dir = yield self._directory_cache.get(tr, (project_id,)) raise gen.Return(KindIterator(tr, self._tornado_fdb, project_dir, namespace)) + elif query.has_kind() and query.kind() == u'__property__': + project_dir = yield self._directory_cache.get(tr, (project_id,)) + raise gen.Return(PropertyIterator(tr, self._tornado_fdb, project_dir, + namespace)) index = yield self._get_perfect_index(tr, query) reverse = get_scan_direction(query, index) == Query_Order.DESCENDING From ff2c3822b94d5394a8e2a742c62c1008c248cc25 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 16 Oct 2019 10:06:15 -0700 Subject: [PATCH 178/221] Catch unexpected errors when checking process line The PosixPsutil raises a `NoMethodError` when the process name no longer exists. This is meant to catch that and similar exceptions that the library has not dealt with. --- AppController/lib/ejabberd.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/AppController/lib/ejabberd.rb b/AppController/lib/ejabberd.rb index 6656716964..42e11da5b1 100644 --- a/AppController/lib/ejabberd.rb +++ b/AppController/lib/ejabberd.rb @@ -63,7 +63,10 @@ def self.ensure_correct_epmd() begin next unless process.name == 'epmd' process.terminate if process.cmdline.include?('-daemon') - rescue PosixPsutil::NoSuchProcess + # The PosixPsutil library does not always raise NoSuchProcess when + # the given process no longer has a name. StandardError is meant + # as a fallback when it doesn't behave properly. + rescue PosixPsutil::NoSuchProcess, StandardError next end } From e32f58854461181ba471f7b8ea27d0355dcf2d37 Mon Sep 17 00:00:00 2001 From: Graziano Obertelli Date: Wed, 16 Oct 2019 15:14:04 -0700 Subject: [PATCH 179/221] Allow ovewrite of deployed files This is needed to have the up --update option to work correctly. --- system/units.d/appscale-.d/10-appscale-common.conf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/system/units.d/appscale-.d/10-appscale-common.conf b/system/units.d/appscale-.d/10-appscale-common.conf index f8f2c40509..0a4070ec9d 100644 --- a/system/units.d/appscale-.d/10-appscale-common.conf +++ b/system/units.d/appscale-.d/10-appscale-common.conf @@ -16,4 +16,6 @@ RestartSec=10 LimitNOFILE=200000 # Security PrivateTmp=true -ProtectSystem=true +# Enabling the following will prevent the up --update option to work +# correctly. +#ProtectSystem=true From edbaee4267a82b73447e9b570f8fdf060e39b57c Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 17 Oct 2019 16:01:27 -0700 Subject: [PATCH 180/221] Ensure property_representation values are repeated The client should parse the values as a list of repeated props even when there is only one property value. --- AppDB/appscale/datastore/fdb/indexes.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index f7f0274d04..fbcc96b593 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -257,14 +257,21 @@ def prop_result(self): entity = entity_pb.EntityProto() entity.mutable_key().MergeFrom(self.key) entity.mutable_entity_group().MergeFrom(self.group) - prop_names = list(zip(*self.properties))[0] - for prop_name, value in self.properties: + + def add_prop(prop_name, multiple, value): prop = entity.add_property() prop.set_name(prop_name) prop.set_meaning(entity_pb.Property.INDEX_VALUE) - prop.set_multiple(prop_names.count(prop_name) > 1) + prop.set_multiple(multiple) prop.mutable_value().MergeFrom(value) + for prop_name, value in self.properties: + if isinstance(value, list): + for multiple_val in value: + add_prop(prop_name, True, multiple_val) + else: + add_prop(prop_name, False, value) + return entity def cursor_result(self, ordered_props): @@ -451,14 +458,17 @@ def next_page(self): project_id = self._project_dir.get_path()[-1] path = (u'__kind__', kind, u'__property__', prop_name) - properties = [] + prop_values = [] for prop_type in populated_types: prop_value = entity_pb.PropertyValue() prop_value.set_stringvalue(prop_type) - properties.append((u'property_representation', prop_value)) + prop_values.append(prop_value) - results.append(CompositeEntry(project_id, self._namespace, path, - properties, None, None)) + # TODO: Consider giving metadata results their own entry class. + entry = CompositeEntry( + project_id, self._namespace, path, + [(u'property_representation', prop_values)], None, None) + results.append(entry) self._done = True raise gen.Return((results, False)) From adbefa257d8331e6624b44375b0cad4a82df81b6 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Sun, 20 Oct 2019 17:07:45 -0700 Subject: [PATCH 181/221] Add support for allocating sequential IDs This allows the FDB backend to fulfill AllocateIdsRequests. --- .../datastore/datastore_distributed.py | 16 +++- AppDB/appscale/datastore/fdb/codecs.py | 21 +++-- AppDB/appscale/datastore/fdb/fdb_datastore.py | 78 +++++++++++++++++-- .../appscale/datastore/fdb/sequential_ids.py | 65 ++++++++++++++++ AppDB/appscale/datastore/scripts/datastore.py | 14 +++- 5 files changed, 177 insertions(+), 17 deletions(-) create mode 100644 AppDB/appscale/datastore/fdb/sequential_ids.py diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py index 6cdccd876e..5d8f005646 100644 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ b/AppDB/appscale/datastore/datastore_distributed.py @@ -490,15 +490,21 @@ def update_composite_index(self, app_id, index): self.logger.info('Updated {} index entries.'.format(entries_updated)) @gen.coroutine - def allocate_size(self, project, size): + def allocate_size(self, project, namespace, path_prefix, size): """ Allocates a block of IDs for a project. Args: project: A string specifying the project ID. + namespace: A string specifying a namespace. + path_prefix: A tuple specifying the model key's path (omitting the final + ID). size: An integer specifying the number of IDs to reserve. Returns: A tuple of integers specifying the start and end ID. """ + # The Cassandra backend does not implement path-specific allocators. + del namespace, path_prefix + if project not in self.sequential_allocators: self.sequential_allocators[project] = EntityIDAllocator( self.datastore_batch.session, project) @@ -508,15 +514,21 @@ def allocate_size(self, project, size): raise gen.Return((start_id, end_id)) @gen.coroutine - def allocate_max(self, project, max_id): + def allocate_max(self, project, namespace, path_prefix, max_id): """ Reserves all IDs up to the one given. Args: project: A string specifying the project ID. + namespace: A string specifying the namespace. + path_prefix: A tuple specifying the model key's path (omitting the final + ID). max_id: An integer specifying the maximum ID to allocated. Returns: A tuple of integers specifying the start and end ID. """ + # The Cassandra backend does not implement path-specific allocators. + del namespace, path_prefix + if project not in self.sequential_allocators: self.sequential_allocators[project] = EntityIDAllocator( self.datastore_batch.session, project) diff --git a/AppDB/appscale/datastore/fdb/codecs.py b/AppDB/appscale/datastore/fdb/codecs.py index 564b9ddf5b..63fcbc4185 100644 --- a/AppDB/appscale/datastore/fdb/codecs.py +++ b/AppDB/appscale/datastore/fdb/codecs.py @@ -320,7 +320,8 @@ class Path(object): NAME_MARKER = 0x1D @classmethod - def pack(cls, path, prefix=b'', omit_terminator=False, reverse=False): + def pack(cls, path, prefix=b'', omit_terminator=False, reverse=False, + allow_partial=False): if not isinstance(path, tuple): path = cls.flatten(path) @@ -328,8 +329,16 @@ def pack(cls, path, prefix=b'', omit_terminator=False, reverse=False): kind_marker = encode_marker(cls.KIND_MARKER, reverse) for index in range(0, len(path), 2): kind = path[index] - id_or_name = path[index + 1] encoded_items.append(Text.encode(kind, kind_marker, reverse)) + + try: + id_or_name = path[index + 1] + except IndexError: + if allow_partial: + continue + + raise + encoded_items.append(cls.encode_id_or_name(id_or_name, reverse)) terminator = b'' if omit_terminator else encode_marker(TERMINATOR, reverse) @@ -375,7 +384,7 @@ def unpack(cls, blob, pos, reverse=False): return tuple(items), pos @staticmethod - def flatten(path): + def flatten(path, allow_partial=False): """ Converts a key path protobuf object to a tuple. """ if isinstance(path, entity_pb.PropertyValue): element_list = path.referencevalue().pathelement_list() @@ -385,7 +394,7 @@ def flatten(path): element_list = path.element_list() return tuple(item for element in element_list - for item in Path.encode_element(element)) + for item in Path.encode_element(element, allow_partial)) @staticmethod def decode(flat_path, reference_value=False): @@ -411,12 +420,14 @@ def decode(flat_path, reference_value=False): return path @staticmethod - def encode_element(element): + def encode_element(element, allow_partial=False): """ Converts a path element protobuf object to a tuple. """ if element.has_id(): id_or_name = int(element.id()) elif element.has_name(): id_or_name = decode_str(element.name()) + elif allow_partial: + id_or_name = None else: raise BadRequest(u'All path elements must either have a name or ID') diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 79a653147d..3381183abd 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -28,10 +28,12 @@ from appscale.datastore.fdb.gc import GarbageCollector from appscale.datastore.fdb.indexes import ( get_order_info, IndexManager, KEY_PROP) +from appscale.datastore.fdb.sequential_ids import ( + old_max_id, sequential_id_key, SequentialIDsNamespace) from appscale.datastore.fdb.transactions import TransactionManager from appscale.datastore.fdb.utils import ( - ABSENT_VERSION, fdb, FDBErrorCodes, next_entity_version, DS_ROOT, - ScatteredAllocator, TornadoFDB) + _MAX_SEQUENTIAL_ID, ABSENT_VERSION, DS_ROOT, fdb, FDBErrorCodes, + next_entity_version, ScatteredAllocator, TornadoFDB) sys.path.append(APPSCALE_PYTHON_APPSERVER) from google.appengine.datastore import entity_pb @@ -55,17 +57,17 @@ def start(self, fdb_clusterfile): self._db = fdb.open(fdb_clusterfile) self._tornado_fdb = TornadoFDB(IOLoop.current()) ds_dir = fdb.directory.create_or_open(self._db, DS_ROOT) - directory_cache = DirectoryCache(self._db, self._tornado_fdb, ds_dir) - directory_cache.initialize() + self._directory_cache = DirectoryCache(self._db, self._tornado_fdb, ds_dir) + self._directory_cache.initialize() - self._data_manager = DataManager(self._tornado_fdb, directory_cache) + self._data_manager = DataManager(self._tornado_fdb, self._directory_cache) self.index_manager = IndexManager( - self._db, self._tornado_fdb, self._data_manager, directory_cache) + self._db, self._tornado_fdb, self._data_manager, self._directory_cache) self._tx_manager = TransactionManager( - self._db, self._tornado_fdb, directory_cache) + self._db, self._tornado_fdb, self._directory_cache) self._gc = GarbageCollector( self._db, self._tornado_fdb, self._data_manager, self.index_manager, - self._tx_manager, directory_cache) + self._tx_manager, self._directory_cache) self._gc.start() @gen.coroutine @@ -381,6 +383,66 @@ def add_indexes(self, project_id, indexes): zk_client = self.index_manager.composite_index_manager._zk_client merge_indexes(zk_client, project_id, indexes) + @gen.coroutine + def allocate_size(self, project_id, namespace, path_prefix, size, retries=5): + tr = self._db.create_transaction() + + key = yield sequential_id_key(tr, project_id, namespace, path_prefix, + self._directory_cache) + old_max = yield old_max_id(tr, key, self._tornado_fdb) + + new_max = old_max + size + # TODO: Check behavior on reaching max sequential ID. + if new_max > _MAX_SEQUENTIAL_ID: + raise BadRequest( + u'There are not enough remaining IDs to satisfy request') + + tr[key] = SequentialIDsNamespace.encode_value(new_max) + + try: + yield self._tornado_fdb.commit(tr) + except fdb.FDBError as fdb_error: + if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: + raise InternalError(fdb_error.description) + + retries -= 1 + if retries < 0: + raise InternalError(fdb_error.description) + + range_start, range_end = yield self.allocate_size( + project_id, namespace, path_prefix, size, retries) + raise gen.Return((range_start, range_end)) + + raise gen.Return((old_max + 1, new_max)) + + @gen.coroutine + def allocate_max(self, project_id, namespace, path_prefix, new_max, + retries=5): + tr = self._db.create_transaction() + + key = yield sequential_id_key(tr, project_id, namespace, path_prefix, + self._directory_cache) + old_max = yield old_max_id(tr, key, self._tornado_fdb) + + if new_max > old_max: + tr[key] = SequentialIDsNamespace.encode_value(new_max) + + try: + yield self._tornado_fdb.commit(tr) + except fdb.FDBError as fdb_error: + if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: + raise InternalError(fdb_error.description) + + retries -= 1 + if retries < 0: + raise InternalError(fdb_error.description) + + range_start, range_end = yield self.allocate_max( + project_id, namespace, path_prefix, new_max, retries) + raise gen.Return((range_start, range_end)) + + raise gen.Return((old_max + 1, max(new_max, old_max))) + @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): last_element = entity.key().path().element(-1) diff --git a/AppDB/appscale/datastore/fdb/sequential_ids.py b/AppDB/appscale/datastore/fdb/sequential_ids.py new file mode 100644 index 0000000000..83ad8edce5 --- /dev/null +++ b/AppDB/appscale/datastore/fdb/sequential_ids.py @@ -0,0 +1,65 @@ +""" Handles storage details for sequential ID allocation. """ +from tornado import gen + +from appscale.datastore.fdb.codecs import Int64, Path +from appscale.datastore.fdb.utils import hash_tuple + + +class SequentialIDsNamespace(object): + """ + A SequentialIDsNamespace handles the encoding and decoding details for + allocate operations requested by the client. These can be explicit calls to + reserve a range of IDs or part of a "put" operation that specifies the + sequential allocator. + + The directory path looks like (, 'sequential-ids', ). + + Within this directory, keys are encoded as + + (missing the ID from the final + element). + + The value is the largest ID that has been allocated. + """ + DIR_NAME = u'sequential-ids' + + def __init__(self, directory): + self.directory = directory + + @classmethod + def directory_path(cls, project_id, namespace): + return project_id, cls.DIR_NAME, namespace + + def encode_key(self, path_prefix): + scatter_byte = hash_tuple(path_prefix) + encoded_path = Path.pack(path_prefix, omit_terminator=True, + allow_partial=True) + return self.directory.rawPrefix + scatter_byte + encoded_path + + @staticmethod + def encode_value(largest_allocated): + return Int64.encode(largest_allocated) + + @staticmethod + def decode_value(value): + marker = value[0] + pos = 1 + return Int64.decode(marker, value, pos)[0] + + +@gen.coroutine +def sequential_id_key(tr, project_id, namespace, path_prefix, directory_cache): + """ Looks up the FDB key for the max sequential ID. """ + dir_path = SequentialIDsNamespace.directory_path(project_id, namespace) + directory = yield directory_cache.get(tr, dir_path) + sequential_ids_ns = SequentialIDsNamespace(directory) + raise gen.Return(sequential_ids_ns.encode_key(path_prefix)) + + +@gen.coroutine +def old_max_id(tr, key, tornado_fdb): + """ Retrieves the max allocated sequential ID for a path. """ + old_max = yield tornado_fdb.get(tr, key) + if not old_max.present(): + raise gen.Return(0) + else: + raise gen.Return(SequentialIDsNamespace.decode_value(old_max)) diff --git a/AppDB/appscale/datastore/scripts/datastore.py b/AppDB/appscale/datastore/scripts/datastore.py index 36e33f9a77..d1ad7586b3 100644 --- a/AppDB/appscale/datastore/scripts/datastore.py +++ b/AppDB/appscale/datastore/scripts/datastore.py @@ -13,6 +13,7 @@ import sys import kazoo +import six import tornado.httpserver import tornado.web from kazoo.retry import KazooRetry @@ -24,6 +25,7 @@ DATASTORE_SERVERS_NODE, ZK_PERSISTENT_RECONNECTS) from appscale.common.datastore_index import DatastoreIndex from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER +from appscale.datastore.fdb.codecs import Path from kazoo.client import KazooState from kazoo.exceptions import NodeExistsError, NoNodeError from tornado import gen @@ -596,12 +598,20 @@ def allocate_ids_request(self, app_id, http_request_data): ('', datastore_pb.Error.BAD_REQUEST, 'Either size or max must be set.')) + if not request.has_model_key(): + raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, + 'Model key must be set')) + + namespace = six.text_type(request.model_key().name_space()) + path_prefix = Path.flatten(request.model_key().path(), + allow_partial=True)[:-1] + if request.has_size(): coroutine = datastore_access.allocate_size - args = (app_id, request.size()) + args = (app_id, namespace, path_prefix, request.size()) else: coroutine = datastore_access.allocate_max - args = (app_id, request.max()) + args = (app_id, namespace, path_prefix, request.max()) try: start, end = yield coroutine(*args) From a3d0ee8736f8abe44faa0c52a7c2ccda9d96536f Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Tue, 15 Oct 2019 12:53:45 -0700 Subject: [PATCH 182/221] Update changelog and release notes --- RELEASE | 7 +++++++ VERSION | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/RELEASE b/RELEASE index 6623b5fd0f..0b2fd384f4 100644 --- a/RELEASE +++ b/RELEASE @@ -5,6 +5,13 @@ /_/ |_/ .___/ .___//____/\___/\__,_/_/ \___/ /_/ /_/ +AppScale version 3.8.1, released October 2019 +Highlights of features and defects fixed in this release: + - Fixes a regression in the 3.8 release where push tasks could not be executed + if the task URL had "secure: always" set. + +Known Issues: + AppScale version 3.8.0, released September 2019 Highlights of features and defects fixed in this release: - Login continue scheme diff --git a/VERSION b/VERSION index ae30a4da65..0c1ca211cc 100644 --- a/VERSION +++ b/VERSION @@ -4,4 +4,4 @@ / ___ |/ /_/ / /_/ /___/ / /__/ /_/ / // __/ /_/ |_/ .___/ .___//____/\___/\__,_/_/ \___/ /_/ /_/ -AppScale version 3.8.0 +AppScale version 3.8.1 From 3fb32dc55347dd7139f390f942a50eaff93ecba9 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 23 Oct 2019 13:33:47 -0700 Subject: [PATCH 183/221] Docker special handling for cron/sshd removed --- debian/appscale_build.sh | 3 --- scripts/fast-start.sh | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/debian/appscale_build.sh b/debian/appscale_build.sh index dbad204304..a4f25121cc 100755 --- a/debian/appscale_build.sh +++ b/debian/appscale_build.sh @@ -59,9 +59,6 @@ if grep docker /proc/1/cgroup > /dev/null ; then # Make sure we have default locale. ${PKG_CMD} install --assume-yes locales locale-gen en_US en_US.UTF-8 - # Docker images miss the following. - mkdir -p /var/run/sshd - chmod 755 /var/run/sshd fi export APPSCALE_HOME_RUNTIME=`pwd` diff --git a/scripts/fast-start.sh b/scripts/fast-start.sh index f4a76b6373..b5f46d17d5 100755 --- a/scripts/fast-start.sh +++ b/scripts/fast-start.sh @@ -111,10 +111,6 @@ PRIVATE_IP="" if [ -z "${PROVIDER}" ]; then if grep docker /proc/1/cgroup > /dev/null ; then - # We need to start sshd by hand. - /usr/sbin/sshd - # Force Start cron - /usr/sbin/cron PROVIDER="Docker" elif lspci | grep VirtualBox > /dev/null ; then PROVIDER="VirtualBox" @@ -212,7 +208,7 @@ if [ ! -e AppScalefile ]; then [ -z "$PRIVATE_IP" ] && { echo "Cannot get private IP of instance!" ; exit 1 ; } # Tell the user what we detected. - echo "Detected enviroment: ${PROVIDER}" + echo "Detected environment: ${PROVIDER}" echo "Private IP found: ${PRIVATE_IP}" echo "Public IP found: ${LOGIN}" From 94b90f849a10dea32921fde12e0b1375cb97c4f1 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 24 Oct 2019 14:02:26 -0700 Subject: [PATCH 184/221] Return an empty list of kinds if ns doesn't exist This allows a client to make a metadata query before any entities have been written. --- AppDB/appscale/datastore/fdb/indexes.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index d3fb27e489..9a9db07edc 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -373,8 +373,13 @@ def next_page(self): raise gen.Return(([], False)) # TODO: This can be made async. - ns_dir = self._project_dir.open( - self._tr, (KindIndex.DIR_NAME, self._namespace)) + try: + ns_dir = self._project_dir.open( + self._tr, (KindIndex.DIR_NAME, self._namespace)) + except ValueError: + # If the namespace does not exist, there are no kinds there. + raise gen.Return(([], False)) + kinds = ns_dir.list(self._tr) populated_kinds = [ kind for kind, populated in zip( From 1f969b48d408f0fe6445b33199e77d09adaff1e5 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 25 Oct 2019 15:11:06 -0700 Subject: [PATCH 185/221] Get kind name from key rather than entity group In GAE, metadata queries do not populate the entity group field. --- AppDashboard/lib/datastore_viewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppDashboard/lib/datastore_viewer.py b/AppDashboard/lib/datastore_viewer.py index 910775d10b..30a23bf6bf 100644 --- a/AppDashboard/lib/datastore_viewer.py +++ b/AppDashboard/lib/datastore_viewer.py @@ -378,7 +378,7 @@ def _get_kinds(cls, ds_access, namespace): query.set_kind('__kind__') result = datastore_pb.QueryResult() ds_access._Dynamic_RunQuery(query, result) - kinds = [entity.entity_group().element(0).name() + kinds = [entity.key().path().element(0).name() for entity in result.result_list()] return sorted(kinds) From 45727a794bf860d043cc75925503ffa84fa4ec2b Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 23 Oct 2019 14:08:42 -0700 Subject: [PATCH 186/221] AppController set postgres dsn in zookeeper for ua server --- AppController/lib/zkinterface.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/AppController/lib/zkinterface.rb b/AppController/lib/zkinterface.rb index d5e96d1a18..f710d005db 100644 --- a/AppController/lib/zkinterface.rb +++ b/AppController/lib/zkinterface.rb @@ -319,6 +319,10 @@ def self.set_postgres_dsn(postgres_dsn) dsn_node = '/appscale/tasks/postgres_dsn' ensure_path(dsn_node) set(dsn_node, postgres_dsn, NOT_EPHEMERAL) + + ua_dsn_node = '/appscale/ua_server/postgres_dsn' + ensure_path(ua_dsn_node) + set(ua_dsn_node, postgres_dsn, NOT_EPHEMERAL) end def self.run_zookeeper_operation(&block) From d74d93ed496f6cc8d17fc24f9ae21dc67e091474 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 28 Oct 2019 12:13:29 -0700 Subject: [PATCH 187/221] AppDB remove test that is not valid with foundationdb --- AppDB/test/e2e/test_queries.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/AppDB/test/e2e/test_queries.py b/AppDB/test/e2e/test_queries.py index 474c1923d1..9506846ccc 100644 --- a/AppDB/test/e2e/test_queries.py +++ b/AppDB/test/e2e/test_queries.py @@ -70,19 +70,6 @@ def test_separator_in_name(self): self.assertEqual(entity['color'], 'red') self.assertEqual(entity['create_time'], create_time) - @gen_test - def test_separator_in_kind(self): - # The Cloud Datastore API allows these key names, but AppScale forbids them - # because ':' is used to separate kind names and key names when encoding a - # path. - entity = Entity('Invalid:Kind', _app=PROJECT_ID) - try: - yield self.datastore.put(entity) - except BadRequest: - pass - else: - raise Exception('Expected BadRequest. No error was thrown.') - class TestQueryLimit(AsyncTestCase): CASSANDRA_PAGE_SIZE = 5000 From a5fc003da7adca5597296e56dda07065711049b5 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 28 Oct 2019 11:15:09 -0700 Subject: [PATCH 188/221] FoundationDB datastore should return allocated key id on put --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 4c024f4f95..9f8c2a62ad 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -123,7 +123,7 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): for entity in put_request.entity_list(): write_entry = writes[entity.key().Encode()][1] - put_response.add_key().CopyFrom(entity.key()) + put_response.add_key().CopyFrom(write_entry.key) if write_entry.version != ABSENT_VERSION: put_response.add_version(write_entry.version) From 01e19510c17811c89a681c62e328105bbd903243 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 29 Oct 2019 01:22:53 -0700 Subject: [PATCH 189/221] FoundationDB do not collapse puts for entities pending id allocation --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 9f8c2a62ad..b44e73d52b 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -85,12 +85,13 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) - writes = {entity.key().Encode(): (VersionEntry.from_key(entity.key()), - VersionEntry.from_key(entity.key())) + writes = {self._collapsible_id(entity): + (VersionEntry.from_key(entity.key()), + VersionEntry.from_key(entity.key())) for entity in put_request.entity_list()} else: # Eliminate multiple puts to the same key. - puts_by_key = {entity.key().Encode(): entity + puts_by_key = {self._collapsible_id(entity): entity for entity in put_request.entity_list()} writes = yield {key: self._upsert(tr, entity) for key, entity in six.iteritems(puts_by_key)} @@ -122,7 +123,7 @@ def dynamic_put(self, project_id, put_request, put_response, retries=5): self._gc.clear_later(old_entries, versionstamp_future.wait().value) for entity in put_request.entity_list(): - write_entry = writes[entity.key().Encode()][1] + write_entry = writes[self._collapsible_id(entity)][1] put_response.add_key().CopyFrom(write_entry.key) if write_entry.version != ABSENT_VERSION: put_response.add_version(write_entry.version) @@ -408,11 +409,7 @@ def add_indexes(self, project_id, indexes): @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): - last_element = entity.key().path().element(-1) - auto_id = False - if not last_element.has_name(): - auto_id = not (last_element.has_id() and last_element.id() != 0) - + auto_id = self._auto_id(entity) if auto_id: # Avoid mutating the object given. new_entity = entity_pb.EntityProto() @@ -486,14 +483,6 @@ def _apply_mutations(self, tr, project_id, queried_groups, mutations, futures[encoded_key] = self._data_manager.get_latest( tr, key, include_data=encoded_key in require_data) - # Fetch remaining entities that were mutated. - for mutation in mutations: - key = (mutation if isinstance(mutation, entity_pb.Reference) - else mutation.key()) - encoded_key = key.Encode() - if encoded_key not in futures: - futures[encoded_key] = self._data_manager.get_latest(tr, key) - group_updates = yield group_update_futures group_updates = [vs for vs in group_updates if vs is not None] if any(commit_vs > read_versionstamp for commit_vs in group_updates): @@ -513,10 +502,10 @@ def _apply_mutations(self, tr, project_id, queried_groups, mutations, mutation_futures = [] for mutation in self._collapse_mutations(mutations): if isinstance(mutation, entity_pb.Reference): - old_entry_future = futures[mutation.Encode()] + old_entry_future = futures.get(mutation.Encode()) mutation_futures.append(self._delete(tr, mutation, old_entry_future)) else: - old_entry_future = futures[mutation.key().Encode()] + old_entry_future = futures.get(mutation.key().Encode()) mutation_futures.append(self._upsert(tr, mutation, old_entry_future)) responses = yield mutation_futures @@ -532,11 +521,20 @@ def _collapse_mutations(mutations): if isinstance(mutation, entity_pb.Reference): key = mutation.Encode() else: - key = mutation.key().Encode() + key = FDBDatastore._collapsible_id(mutation) mutations_by_key[key] = mutation - return tuple(mutation for key, mutation in six.iteritems(mutations_by_key)) + return tuple(mutation for mutation in six.itervalues(mutations_by_key)) + + @staticmethod + def _collapsible_id(entity): + """ The "collapsible" identity is the encoded key or the entity if + an identity will be allocated later. """ + if FDBDatastore._auto_id(entity): + return id(entity) + else: + return entity.key().Encode() @staticmethod def _enforce_max_groups(mutations): @@ -554,3 +552,12 @@ def _enforce_max_groups(mutations): if len(mutated_groups) > 25: raise BadRequest(u'Too many entity groups modified in transaction') + + @staticmethod + def _auto_id(entity): + """ Should perform auto identity allocation for entity. """ + last_element = entity.key().path().element(-1) + auto_id = False + if not last_element.has_name(): + auto_id = not (last_element.has_id() and last_element.id() != 0) + return auto_id \ No newline at end of file From 74f620c0582f45d285a77e25b0ca1451bed916bf Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 29 Oct 2019 10:34:20 -0700 Subject: [PATCH 190/221] AppDB test client fetch all query results --- AppDB/test/e2e/client.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/AppDB/test/e2e/client.py b/AppDB/test/e2e/client.py index 1383ee8699..3587ca0476 100644 --- a/AppDB/test/e2e/client.py +++ b/AppDB/test/e2e/client.py @@ -68,11 +68,17 @@ def get(self, key, txid=None): @gen.coroutine def run_query(self, query): + entities = [] query_pb = query._ToPb() encoded_response = yield self._make_request('RunQuery', query_pb.Encode()) results_pb = datastore_pb.QueryResult(encoded_response) - raise gen.Return( - [Entity.FromPb(entity) for entity in results_pb.result_list()]) + entities.extend([Entity.FromPb(entity) for entity in results_pb.result_list()]) + while results_pb.has_more_results() and len(results_pb.result_list()) >= 100: + query_pb.set_offset(query_pb.offset() + len(results_pb.result_list())) + encoded_response = yield self._make_request('RunQuery', query_pb.Encode()) + results_pb = datastore_pb.QueryResult(encoded_response) + entities.extend([Entity.FromPb(entity) for entity in results_pb.result_list()]) + raise gen.Return(entities) @gen.coroutine def put(self, entity, txid=None): From 8988829aa8e12fdc0f699106b1cf2de51ebc7d8a Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Wed, 30 Oct 2019 15:43:30 -0700 Subject: [PATCH 191/221] Reconnect to XMPP server when connection is closed This simplifies connection handling a bit, and it fixes a problem on bionic that was preventing reconnections. --- XMPPReceiver/test/test_xmpp_receiver.py | 22 ++++--------- XMPPReceiver/xmpp_receiver.py | 44 +++++++++---------------- 2 files changed, 22 insertions(+), 44 deletions(-) diff --git a/XMPPReceiver/test/test_xmpp_receiver.py b/XMPPReceiver/test/test_xmpp_receiver.py index 94477b786a..bd1cdc20b0 100644 --- a/XMPPReceiver/test/test_xmpp_receiver.py +++ b/XMPPReceiver/test/test_xmpp_receiver.py @@ -5,10 +5,7 @@ import httplib import logging import os -import re -import select import sys -import types import unittest @@ -39,6 +36,7 @@ def setUp(self): flexmock(logging) logging.should_receive('basicConfig').and_return() logging.should_receive('info').with_args(str).and_return() + logging.should_receive('error').with_args(str).and_return() # and mock out all calls to try to make stderr write to the logger fake_open = flexmock(sys.modules['__builtin__']) @@ -65,7 +63,7 @@ def test_connect_to_xmpp_but_it_is_down(self): receiver = XMPPReceiver(self.appid, self.login_ip, self.load_balancer_ip, self.password) - self.assertRaises(SystemExit, receiver.listen_for_messages, messages_to_listen_for=1) + self.assertRaises(SystemExit, receiver.listen_for_messages) def test_connect_to_xmpp_but_cannot_auth(self): @@ -81,8 +79,7 @@ def test_connect_to_xmpp_but_cannot_auth(self): receiver = XMPPReceiver(self.appid, self.login_ip, self.load_balancer_ip, self.password) - self.assertRaises(SystemExit, receiver.listen_for_messages, - messages_to_listen_for=1) + self.assertRaises(SystemExit, receiver.listen_for_messages) def test_receive_one_message(self): @@ -101,23 +98,16 @@ def test_receive_one_message(self): fake_client.should_receive('sendInitPresence').and_return() # and make sure that we only process one message - fake_client.should_receive('Process').with_args(1).once() + fake_client.should_receive('Process').and_return(len('the message')).\ + and_return(0) flexmock(xmpp) xmpp.should_receive('Client').with_args(self.login_ip, debug=[]) \ .and_return(fake_client) - # finally, mock out 'select', and have it put in a message - flexmock(select) - message = {"the socket" : "xmpp"} - select.should_receive('select').with_args(['the socket'], [], [], 1) \ - .and_return(message, None, None) - receiver = XMPPReceiver(self.appid, self.login_ip, self.load_balancer_ip, self.password) - actual_messages_sent = receiver.listen_for_messages( - messages_to_listen_for=1) - self.assertEquals(1, actual_messages_sent) + receiver.listen_for_messages() def test_message_results_in_post(self): diff --git a/XMPPReceiver/xmpp_receiver.py b/XMPPReceiver/xmpp_receiver.py index 595dae4c29..425ba2ac94 100644 --- a/XMPPReceiver/xmpp_receiver.py +++ b/XMPPReceiver/xmpp_receiver.py @@ -12,7 +12,6 @@ import httplib import logging import os -import select import sys import urllib @@ -137,16 +136,8 @@ def xmpp_presence(self, conn, event): conn.send(xmpp.Presence(to=who, typ='subscribe')) - def listen_for_messages(self, messages_to_listen_for=-1): - """Polls the XMPP server for messages, responding to any that are seen. - - Args: - messages_to_listen_for: An int that represents how many messages we - should listen for. If set to the default value (-1), then we listen - for an infinite number of messages. - Returns: - An int that indicates how many messages were processed. - """ + def listen_for_messages(self): + """ Creates a connection to the XMPP server and listens for messages. """ jid = xmpp.protocol.JID(self.my_jid) client = xmpp.Client(jid.getDomain(), debug=[]) @@ -167,23 +158,20 @@ def listen_for_messages(self, messages_to_listen_for=-1): client.sendInitPresence(requestRoster=0) - socketlist = {client.Connection._sock:'xmpp'} - - logging.info("About to begin processing requests") - - messages_processed = 0 - while messages_processed != messages_to_listen_for: - (input_data, _, __) = select.select(socketlist.keys(), [], [], 1) - for _ in input_data: - try: - client.Process(1) - messages_processed += 1 - except xmpp.protocol.Conflict: - logging.info("Lost connection after processing {0} messages" \ - .format(messages_processed)) - return messages_processed - - return messages_processed + logging.info('Listening for incoming messages') + while True: + try: + response = client.Process(timeout=1) + except xmpp.protocol.Conflict: + lost_connection = True + else: + # A closed connection is supposed to result in a response of 0, but + # it seems `None` also indicates that. + lost_connection = response is None or response == 0 + + if lost_connection: + logging.error('Lost connection') + return if __name__ == "__main__": From 5e3a7c56f6092d9b8b3059eded03f02a735d51b8 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 30 Oct 2019 21:28:28 -0700 Subject: [PATCH 192/221] AppController reload systemd configuration on terminate --- AppController/terminate.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/AppController/terminate.rb b/AppController/terminate.rb index 58b599be1c..c59df4d468 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -71,6 +71,8 @@ def self.erase_appscale_state # TODO: Use the constant in djinn.rb (ZK_LOCATIONS_JSON_FILE) `rm -f #{APPSCALE_CONFIG_DIR}/zookeeper_locations.json` `rm -f #{APPSCALE_CONFIG_DIR}/zookeeper_locations` + + `systemctl daemon-reload` print "OK" end From db9d6cdd8ddf62167d3c739b29788e37d8f1c81c Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 25 Oct 2019 07:57:21 -0700 Subject: [PATCH 193/221] UA server, don't require cassandra when using postgres --- AppDB/appscale/datastore/scripts/ua_server.py | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index d858de12f6..76ef5eeeb1 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -1018,34 +1018,35 @@ def main(): register_location(appscale_info.get_private_ip(), bindport) - - connect_to_postgres(zk_client) - if pg_connection_wrapper: - init_table(pg_connection_wrapper) - - db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() valid_datastores = appscale_datastore.DatastoreFactory.valid_datastores() if datastore_type not in valid_datastores: - raise Exception('{} not in valid datastores ({})'. - format(datastore_type, valid_datastores)) + raise Exception('{} not in valid datastores ({})'. + format(datastore_type, valid_datastores)) - # Keep trying until it gets the schema. - timeout = 5 - while 1: - try: - user_schema = db.get_schema_sync(USER_TABLE) - except AppScaleDBConnectionError: - time.sleep(timeout) - continue - - if user_schema[0] in ERROR_CODES: - user_schema = user_schema[1:] - Users.attributes_ = user_schema - else: - time.sleep(timeout) - continue - break + connect_to_postgres(zk_client) + if pg_connection_wrapper: + init_table(pg_connection_wrapper) + db = None + else: + db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) + + # Keep trying until it gets the schema. + timeout = 5 + while 1: + try: + user_schema = db.get_schema_sync(USER_TABLE) + except AppScaleDBConnectionError: + time.sleep(timeout) + continue + + if user_schema[0] in ERROR_CODES: + user_schema = user_schema[1:] + Users.attributes_ = user_schema + else: + time.sleep(timeout) + continue + break ip = "0.0.0.0" server = SOAPpy.SOAPServer((ip, bindport)) From 14866775cd500d6e44a852150c923018425373b8 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 31 Oct 2019 12:12:32 -0700 Subject: [PATCH 194/221] UA server, create users table in ua schema --- AppDB/appscale/datastore/scripts/ua_server.py | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index 76ef5eeeb1..48dd5c5354 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -57,6 +57,11 @@ # The schema we use to store user information. user_schema = [] +# Postgres names +schema_name = "appscale_ua" +table_name = "ua_users" +full_table_name = "{}.{}".format(schema_name, table_name) + # Different types of valid users created. VALID_USER_TYPES = ["user", "xmpp_user", "app", "channel"] @@ -67,9 +72,6 @@ zk_client = None -table_name = "ua_users" - - def is_connection_error(err): """ This function is used as retry criteria. @@ -121,14 +123,13 @@ def close(self): def init_table(pg_connection_wrapper): - # When multiple TQ servers are notified by ZK about new queue - # they sometimes get IntegrityError despite 'IF NOT EXISTS' @retrying.retry(max_retries=5, retry_on_exception=psycopg2.IntegrityError) def ensure_tables_created(): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'CREATE TABLE IF NOT EXISTS "{table}" (' + 'CREATE SCHEMA IF NOT EXISTS {schema};' + 'CREATE TABLE IF NOT EXISTS {schema}.{table} (' ' email varchar(500) NOT NULL,' ' pw varchar(500) NOT NULL,' ' date_creation timestamp NOT NULL,' @@ -148,8 +149,7 @@ def ensure_tables_created(): ' capabilities varchar(255),' ' PRIMARY KEY (email)' ');' - 'CREATE SCHEMA IF NOT EXISTS "appscale_ua";' - .format(table=table_name) + .format(schema=schema_name, table=table_name) ) ensure_tables_created() @@ -277,9 +277,9 @@ def does_user_exist(username, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT 1 FROM "{table}" ' + 'SELECT 1 FROM {table} ' 'WHERE email = %(username)s' - .format(table=table_name), + .format(table=full_table_name), vars={ 'username': username, } @@ -314,9 +314,9 @@ def get_user_data(username, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT {columns} FROM "{table}" ' + 'SELECT {columns} FROM {table} ' 'WHERE email = %(username)s' - .format(table=table_name, columns=', '.join(USERS_SCHEMA)), + .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), vars={ 'username': username, } @@ -389,7 +389,7 @@ def commit_new_user(user, passwd, utype, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'INSERT INTO "{table}" ({columns}) ' + 'INSERT INTO {table} ({columns}) ' 'VALUES ( ' ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' @@ -398,7 +398,7 @@ def commit_new_user(user, passwd, utype, secret): ' %(is_cloud_admin)s, %(capabilities)s ' ') ' 'RETURNING date_last_login' - .format(table=table_name, columns=', '.join(USERS_SCHEMA)), + .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), vars=params ) row = pg_cursor.fetchone() @@ -437,12 +437,12 @@ def add_admin_for_app(user, app, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET applications = applications || %(app)s, ' ' date_change = current_timestamp ' 'WHERE email = %(user)s ' 'RETURNING date_change' - .format(table=table_name), + .format(table=full_table_name), vars={'app': '{' + app + '}', 'user': user} ) user_result = pg_cursor.fetchone() @@ -489,8 +489,8 @@ def get_all_users(secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT email FROM "{table}" ' - .format(table=table_name) + 'SELECT email FROM {table}' + .format(table=full_table_name) ) emails = pg_cursor.fetchall() @@ -543,13 +543,13 @@ def commit_new_token(user, token, token_exp, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET appdrop_rem_token = %(token)s, ' ' appdrop_rem_token_exp = %(token_exp)s, ' ' date_change = current_timestamp ' 'WHERE email = %(user)s ' 'RETURNING email' - .format(table=table_name), + .format(table=full_table_name), vars=params ) @@ -606,11 +606,11 @@ def change_password(user, password, secret): with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET pw = %(password)s ' 'WHERE email = %(user)s AND enabled = TRUE ' 'RETURNING enabled' - .format(table=table_name), + .format(table=full_table_name), vars={'password': password, 'user': user} ) row = pg_cursor.fetchone() @@ -655,11 +655,11 @@ def enable_user(user, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET enabled = TRUE ' 'WHERE email = %(user)s AND enabled = FALSE ' 'RETURNING enabled' - .format(table=table_name), + .format(table=full_table_name), vars={'user': user} ) row = pg_cursor.fetchone() @@ -702,11 +702,11 @@ def disable_user(user, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET enabled = FALSE ' 'WHERE email = %(user)s AND enabled = TRUE ' 'RETURNING enabled' - .format(table=table_name), + .format(table=full_table_name), vars={'user': user} ) row = pg_cursor.fetchone() @@ -750,10 +750,10 @@ def delete_user(user, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'DELETE FROM "{table}" ' + 'DELETE FROM {table} ' 'WHERE email = %(user)s AND enabled = FALSE ' 'RETURNING enabled' - .format(table=table_name), + .format(table=full_table_name), vars={'user': user} ) row = pg_cursor.fetchone() @@ -793,9 +793,9 @@ def is_user_enabled(user, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT enabled FROM "{table}" ' + 'SELECT enabled FROM {table} ' 'WHERE email = %(user)s' - .format(table=table_name), + .format(table=full_table_name), vars={'user': user} ) result = pg_cursor.fetchone() @@ -827,9 +827,9 @@ def is_user_cloud_admin(username, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT is_cloud_admin FROM "{table}" ' + 'SELECT is_cloud_admin FROM {table} ' 'WHERE email = %(user)s ' - .format(table=table_name), + .format(table=full_table_name), vars={'user': username} ) result = pg_cursor.fetchone() @@ -862,11 +862,11 @@ def set_cloud_admin_status(username, is_cloud_admin, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET is_cloud_admin = %(is_cloud_admin)s ' 'WHERE email = %(user)s ' 'RETURNING date_change' - .format(table=table_name), + .format(table=full_table_name), vars={'is_cloud_admin': is_cloud_admin, 'user': username} ) user_result = pg_cursor.fetchone() @@ -894,9 +894,9 @@ def get_capabilities(username, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'SELECT capabilities FROM "{table}" ' + 'SELECT capabilities FROM {table} ' 'WHERE email = %(user)s ' - .format(table=table_name), + .format(table=full_table_name), vars={'user': username} ) user_result = pg_cursor.fetchone() @@ -930,11 +930,11 @@ def set_capabilities(username, capabilities, secret): with pg_connection_wrapper.get_connection() as pg_connection: with pg_connection.cursor() as pg_cursor: pg_cursor.execute( - 'UPDATE "{table}" ' + 'UPDATE {table} ' 'SET capabilities = %(capabilities)s ' 'WHERE email = %(user)s ' 'RETURNING date_change' - .format(table=table_name), + .format(table=full_table_name), vars={'capabilities': capabilities, 'user': username} ) user_result = pg_cursor.fetchone() From 4b3bfa1424547f4770b3b437899130314fe09cf8 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 31 Oct 2019 16:28:04 -0700 Subject: [PATCH 195/221] Remove debug print statement --- AppDB/test/e2e/test_queries.py | 1 - 1 file changed, 1 deletion(-) diff --git a/AppDB/test/e2e/test_queries.py b/AppDB/test/e2e/test_queries.py index 473f9a9f44..06e2f19531 100644 --- a/AppDB/test/e2e/test_queries.py +++ b/AppDB/test/e2e/test_queries.py @@ -117,7 +117,6 @@ def test_batch_put_index_entries(self): # Ensure the last specified mutation is the one that matters. query = Query('Greeting', projection=['content'], _app=PROJECT_ID) response = yield self.datastore.run_query(query) - print('response: {}'.format(response)) self.assertEqual(len(response), 1) entity = response[0] From 7eb28a0ee5c017bee000f26aaeb3d994f6edb467 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Thu, 31 Oct 2019 16:59:52 -0700 Subject: [PATCH 196/221] Ensure coroutine is called correctly --- AppDB/appscale/datastore/datastore_distributed.py | 3 ++- AppDB/appscale/datastore/scripts/datastore.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py index 5f5a01ade5..0bea39f3fa 100644 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ b/AppDB/appscale/datastore/datastore_distributed.py @@ -3249,6 +3249,7 @@ def rollback_transaction(self, app_id, txid): except zktransaction.ZKTransactionException as error: raise InternalError(str(error)) + @gen.coroutine def get_indexes(self, project_id): """ Retrieves list of indexes for a project. @@ -3270,7 +3271,7 @@ def get_indexes(self, project_id): except IndexInaccessible: raise InternalError('ZooKeeper is not accessible') - return indexes + raise gen.Return(indexes) @gen.coroutine def add_indexes(self, project_id, indexes): diff --git a/AppDB/appscale/datastore/scripts/datastore.py b/AppDB/appscale/datastore/scripts/datastore.py index abca872ade..18a907a238 100644 --- a/AppDB/appscale/datastore/scripts/datastore.py +++ b/AppDB/appscale/datastore/scripts/datastore.py @@ -558,7 +558,7 @@ def get_indices_request(self, app_id): global datastore_access response = datastore_pb.CompositeIndices() try: - indices = datastore_access.get_indexes(app_id) + indices = yield datastore_access.get_indexes(app_id) except (dbconstants.AppScaleDBConnectionError, dbconstants.InternalError) as error: logger.exception( From 8fbc55c5d14ae63263033b1d329d15602825faed Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 1 Nov 2019 11:43:46 -0700 Subject: [PATCH 197/221] Add postgresql initialization step to faststart --- scripts/fast-start.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scripts/fast-start.sh b/scripts/fast-start.sh index 78ed6db799..a2cd1c45b9 100755 --- a/scripts/fast-start.sh +++ b/scripts/fast-start.sh @@ -208,6 +208,15 @@ echo "Configuring local foundationdb" --fdbcli-command 'configure new single ssd' FDB_CLUSTERFILE_CONTENT=$(cat /etc/foundationdb/fdb.cluster) +echo "Configuring local postgresql" +POSTGRES_PASSWORD=$(openssl rand -base64 18) +/root/appscale-thirdparties/postgres/configure-and-start-postgres.sh \ + --host 127.0.0.1 \ + --dbname appscale \ + --username appscale \ + --password "${POSTGRES_PASSWORD}" +POSTGRES_DSN="dbname=appscale user=appscale password=${POSTGRES_PASSWORD} host=127.0.0.1" + # Let's make sure we don't overwrite and existing AppScalefile. if [ ! -e AppScalefile ]; then # Let's make sure we detected the IPs. @@ -238,6 +247,7 @@ if [ ! -e AppScalefile ]; then echo "admin_pass : $ADMIN_PASSWD" >> AppScalefile fi echo "fdb_clusterfile_content : ${FDB_CLUSTERFILE_CONTENT}" >> AppScalefile + echo "postgres_dsn : ${POSTGRES_DSN}" >> AppScalefile echo "group : faststart-${PROVIDER}" >> AppScalefile echo "done." From 9ae34427f61fdc6bf34766c3c272825f552516e7 Mon Sep 17 00:00:00 2001 From: Chris Donati Date: Fri, 1 Nov 2019 15:18:00 -0700 Subject: [PATCH 198/221] Handle deleted entitiy entries correctly This fixes a bug in the stats buffer update handling. The "present" attribute indicates whether or not there is an FDB key value present for the datastore entity key. Fetching a deleted entity will return a VersionEntry with "present"=True and "has_entity"=False. --- AppDB/appscale/datastore/fdb/stats/containers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/stats/containers.py b/AppDB/appscale/datastore/fdb/stats/containers.py index cb2a96d19c..7cdaafe06d 100644 --- a/AppDB/appscale/datastore/fdb/stats/containers.py +++ b/AppDB/appscale/datastore/fdb/stats/containers.py @@ -285,7 +285,7 @@ def update(self, old_entry, new_entry, index_stats): delta.count += 1 delta.bytes += len(new_entry.encoded) - if old_entry.present: + if old_entry.has_entity: delta.count -= 1 delta.bytes -= len(old_entry.encoded) @@ -355,7 +355,7 @@ def update(self, old_entry, new_entry, index_stats): namespace = old_entry.namespace kind = old_entry.kind apply_props = create_apply_props(self.entity_stats, namespace, kind) - if old_entry.present: + if old_entry.has_entity: apply_props(old_entry.decoded.property_list(), subtract=True) apply_props(old_entry.decoded.raw_property_list(), subtract=True) From ae62b7e74c2882093ca4ac24e5463053234db988 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 1 Nov 2019 17:04:50 -0700 Subject: [PATCH 199/221] Remove unused environment/home configuration --- AppController/djinn.rb | 8 -------- AppController/djinnServer.rb | 4 ---- debian/appscale_install_functions.sh | 12 ------------ 3 files changed, 24 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index a4131a133c..e2a573ea33 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -890,7 +890,6 @@ def enforce_options end # Ensure we have the correct EC2 credentials available. - ENV['EC2_URL'] = @options['ec2_url'] if @options['ec2_access_key'].nil? @options['ec2_access_key'] = @options['EC2_ACCESS_KEY'] @options['ec2_secret_key'] = @options['EC2_SECRET_KEY'] @@ -3043,13 +3042,6 @@ def parse_options FileUtils.mkdir_p(my_key_dir) Djinn.log_run("chmod 600 #{APPSCALE_CONFIG_DIR}/ssh.key") Djinn.log_run("cp -p #{APPSCALE_CONFIG_DIR}/ssh.key #{my_key_loc}") - - # AWS and Euca need some evironmental variables. - if ["ec2", "euca"].include?(@options['infrastructure']) - ENV['EC2_ACCESS_KEY'] = @options['ec2_access_key'] - ENV['EC2_SECRET_KEY'] = @options['ec2_secret_key'] - ENV['EC2_URL'] = @options['ec2_url'] - end end def got_all_data diff --git a/AppController/djinnServer.rb b/AppController/djinnServer.rb index 8cbe2f1309..31e552c0bd 100755 --- a/AppController/djinnServer.rb +++ b/AppController/djinnServer.rb @@ -20,10 +20,6 @@ def initialize(*args) end end - -environment = YAML.load_file('/etc/appscale/environment.yaml') -environment.each { |k,v| ENV[k] = v } - APPSCALE_HOME = ENV['APPSCALE_HOME'] # Import for AppController diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index fc0c77771a..62c2c10d34 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -155,18 +155,6 @@ EOF # This create link to appscale settings. mkdir -pv ${DESTDIR}${CONFIG_DIR} - cat < Date: Mon, 4 Nov 2019 18:06:44 -0800 Subject: [PATCH 200/221] Install wheel for hermes and search2 for bdist_wheel --- SearchService2/build-scripts/ensure_searchservice2.sh | 1 + debian/appscale_install_functions.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/SearchService2/build-scripts/ensure_searchservice2.sh b/SearchService2/build-scripts/ensure_searchservice2.sh index cff291c375..0376759d42 100755 --- a/SearchService2/build-scripts/ensure_searchservice2.sh +++ b/SearchService2/build-scripts/ensure_searchservice2.sh @@ -25,6 +25,7 @@ if ! "${PIP}" --version | grep 'python 3\.' ; then fi echo "Upgrading appscale-common.." +"${PIP}" install wheel "${PIP}" install --upgrade --no-deps "${COMMON_DIR}" echo "Installing appscale-common dependencies if any missing.." "${PIP}" install "${COMMON_DIR}" diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index fc0c77771a..2813fa43b6 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -611,6 +611,7 @@ installhermes() python3 -m venv /opt/appscale_venvs/hermes/ # Install Hermes and its dependencies in it HERMES_PIP=/opt/appscale_venvs/hermes/bin/pip + ${HERMES_PIP} install wheel ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/common ${HERMES_PIP} install ${APPSCALE_HOME}/common ${HERMES_PIP} install --upgrade --no-deps ${APPSCALE_HOME}/AdminServer From 575e2efc4b529f62eb03bb0beabde269ae9c8ee0 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 4 Nov 2019 23:58:15 -0800 Subject: [PATCH 201/221] Bootstrap allow options for git clone --- bootstrap.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bootstrap.sh b/bootstrap.sh index 880838057e..a5ae25ce28 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -15,6 +15,7 @@ APPSCALE_TOOLS_BRANCH="master" AGENTS_BRANCH="master" THIRDPARTIES_BRANCH="master" GIT_TAG="last" +GIT_CLONE_OPTS="--no-checkout" UNIT_TEST="N" BRANCH_PARAM_SPECIFIED="N" @@ -113,6 +114,11 @@ while [ $# -gt 0 ]; do if [ "${GIT_TAG}" != "dev" ]; then TAG_PARAM_SPECIFIED="Y"; fi shift; continue fi + if [ "${1}" = "--clone-opts" ]; then + shift; if [ -z "${1}" ]; then usage; fi + GIT_CLONE_OPTS="${1}" + shift; continue + fi if [ "${1}" = "-t" ]; then UNIT_TEST="Y" shift; continue @@ -228,19 +234,19 @@ fi echo "Cloning appscale repositories" # We split the commands, to ensure it fails if branch doesn't # exists (Precise git will not fail otherwise). -git clone ${APPSCALE_REPO} appscale +git clone ${GIT_CLONE_OPTS} ${APPSCALE_REPO} appscale (cd appscale; git checkout ${APPSCALE_TARGET}) VERSION=$(cat /root/appscale/VERSION | grep -oE "[0-9]+\.[0-9]+\.[0-9]+") -git clone ${APPSCALE_TOOLS_REPO} appscale-tools +git clone ${GIT_CLONE_OPTS} ${APPSCALE_TOOLS_REPO} appscale-tools (cd appscale-tools; git checkout "${TOOLS_TARGET}") if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 3.8.0; then - git clone ${AGENTS_REPO} appscale-agents + git clone ${GIT_CLONE_OPTS} ${AGENTS_REPO} appscale-agents (cd appscale-agents; git checkout "${AGENTS_TARGET}") fi if [ "${RELY_ON_TAG}" = "N" ] || version_ge "${VERSION}" 4.0.0; then - git clone ${THIRDPARTIES_REPO} appscale-thirdparties + git clone ${GIT_CLONE_OPTS} ${THIRDPARTIES_REPO} appscale-thirdparties (cd appscale-thirdparties; git checkout "${THIRDPARTIES_TARGET}") fi From 0ac1acc7921be7e725826426456802c30f6c03f4 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 4 Nov 2019 12:43:19 -0800 Subject: [PATCH 202/221] AppController host ip files move to /var/lib/appscale/hosts link in /etc --- AppController/djinn.rb | 32 ++++++++++++++++------------ AppController/lib/search.rb | 2 +- AppController/terminate.rb | 12 +---------- debian/appscale_install_functions.sh | 12 +++++++++++ 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index a4131a133c..e9d33c8483 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -93,7 +93,7 @@ # The location on the local file system where we store information about # where ZooKeeper servers are located. -ZK_LOCATIONS_FILE = '/etc/appscale/zookeeper_locations'.freeze +ZK_LOCATIONS_FILE = '/var/lib/appscale/hosts/zookeeper_locations'.freeze # The location of the logrotate scripts. LOGROTATE_DIR = '/etc/logrotate.d'.freeze @@ -295,13 +295,17 @@ class Djinn # files are written to. APPSCALE_CONFIG_DIR = '/etc/appscale'.freeze + # The location on the local filesystem where AppScale-related host + # location files are written to. + APPSCALE_HOSTS_DIR = '/var/lib/appscale/hosts'.freeze + # The tools uses this location to find deployments info. TODO: to remove # this dependency. APPSCALE_TOOLS_CONFIG_DIR = '/root/.appscale'.freeze # The location on the local filesystem where the AppController writes # the location of all the nodes which are taskqueue nodes. - TASKQUEUE_FILE = "#{APPSCALE_CONFIG_DIR}/taskqueue_nodes".freeze + TASKQUEUE_FILE = "#{APPSCALE_HOSTS_DIR}/taskqueue_nodes".freeze APPSCALE_HOME = ENV['APPSCALE_HOME'] @@ -1657,8 +1661,8 @@ def job_start(secret) # We reload our old IPs (if we find them) so we can check later if # they changed and act accordingly. begin - @my_private_ip = HelperFunctions.read_file("#{APPSCALE_CONFIG_DIR}/my_private_ip") - @my_public_ip = HelperFunctions.read_file("#{APPSCALE_CONFIG_DIR}/my_public_ip") + @my_private_ip = HelperFunctions.read_file("#{APPSCALE_HOSTS_DIR}/my_private_ip") + @my_public_ip = HelperFunctions.read_file("#{APPSCALE_HOSTS_DIR}/my_public_ip") rescue Errno::ENOENT Djinn.log_info("Couldn't find my old my_public_ip or my_private_ip.") @my_private_ip = nil @@ -4013,38 +4017,38 @@ def write_locations taskqueue_content = taskqueue_ips.join("\n") + "\n" head_node_private_ip = get_shadow.private_ip - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/head_node_private_ip", + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/head_node_private_ip", "#{head_node_private_ip}\n") Djinn.log_info("All private IPs: #{all_ips}.") - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/all_ips", all_ips_content) + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/all_ips", all_ips_content) Djinn.log_info("Load balancer location(s): #{load_balancer_ips}.") - load_balancer_file = "#{APPSCALE_CONFIG_DIR}/load_balancer_ips" + load_balancer_file = "#{APPSCALE_HOSTS_DIR}/load_balancer_ips" HelperFunctions.write_file(load_balancer_file, load_balancer_content) Djinn.log_info("Deployment public name/IP: #{login_ip}.") - login_file = "#{APPSCALE_CONFIG_DIR}/login_ip" + login_file = "#{APPSCALE_HOSTS_DIR}/login_ip" HelperFunctions.write_file(login_file, login_content) Djinn.log_info("Memcache locations: #{memcache_ips}.") - memcache_file = "#{APPSCALE_CONFIG_DIR}/memcache_ips" + memcache_file = "#{APPSCALE_HOSTS_DIR}/memcache_ips" HelperFunctions.write_file(memcache_file, memcache_content) Djinn.log_info("Taskqueue locations: #{taskqueue_ips}.") HelperFunctions.write_file(TASKQUEUE_FILE, taskqueue_content) Djinn.log_info("Database master is at #{master_ips}, slaves are at #{slave_ips}.") - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/masters", "#{master_content}") + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/masters", "#{master_content}") unless slaves_content.chomp.empty? - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/slaves", + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/slaves", slaves_content) end Djinn.log_info("My public IP is #{my_public}, and my private is #{my_private}.") - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/my_public_ip", "#{my_public}") - HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/my_private_ip", "#{my_private}") + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/my_public_ip", "#{my_public}") + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/my_private_ip", "#{my_private}") Djinn.log_info("Writing num_of_nodes as #{num_of_nodes}.") HelperFunctions.write_file("#{APPSCALE_CONFIG_DIR}/num_of_nodes", "#{num_of_nodes}\n") @@ -4057,7 +4061,7 @@ def write_locations Djinn.log_info("Search2 service locations: #{search2_ips}.") unless search2_content.chomp.empty? - HelperFunctions.write_file('/etc/appscale/search2_ips', + HelperFunctions.write_file("#{APPSCALE_HOSTS_DIR}/search2_ips", search2_content) end end diff --git a/AppController/lib/search.rb b/AppController/lib/search.rb index 764f7cd9e2..e766968429 100644 --- a/AppController/lib/search.rb +++ b/AppController/lib/search.rb @@ -26,7 +26,7 @@ module Search PYTHON_EXEC = 'python'.freeze # Search location file. - SEARCH_LOCATION_FILE = '/etc/appscale/search_ip'.freeze + SEARCH_LOCATION_FILE = '/var/lib/appscale/hosts/search_ip'.freeze # Service name for use with helper SERVICE_NAME_SEARCH = 'appscale-search'.freeze diff --git a/AppController/terminate.rb b/AppController/terminate.rb index c59df4d468..2807244828 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -52,17 +52,8 @@ def self.erase_appscale_state `rm -f #{APPSCALE_CONFIG_DIR}/port-*.txt` # Remove location files. - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/all_ips") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/load_balancer_ips") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/login_ip") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/masters") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/memcache_ips") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/my_private_ip") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/my_public_ip") + `rm -f /var/lib/appscale/hosts/*` FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/num_of_nodes") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/search_ip") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/slaves") - FileUtils.rm_f("#{APPSCALE_CONFIG_DIR}/taskqueue_nodes") `rm -f /run/systemd/system/appscale-*.target.wants/*` `rm -f /run/appscale/appscale-*.env` @@ -70,7 +61,6 @@ def self.erase_appscale_state # TODO: Use the constant in djinn.rb (ZK_LOCATIONS_JSON_FILE) `rm -f #{APPSCALE_CONFIG_DIR}/zookeeper_locations.json` - `rm -f #{APPSCALE_CONFIG_DIR}/zookeeper_locations` `systemctl daemon-reload` print "OK" diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index fc0c77771a..42a8521fd2 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -174,6 +174,8 @@ EOF mkdir -pv /var/appscale/version_assets + mkdir -pv /var/lib/appscale/hosts + # This puts in place the logrotate rules. if [ -d /etc/logrotate.d/ ]; then cp -v ${APPSCALE_HOME}/system/logrotate.d/* /etc/logrotate.d/ @@ -595,6 +597,16 @@ installcommon() { pip install --upgrade --no-deps ${APPSCALE_HOME}/common pip install ${APPSCALE_HOME}/common + + # link /etc/ ip files to host state files + IP_FILES="all_ips head_node_private_ip load_balancer_ips login_ip masters" + IP_FILES="${IP_FILES} memcache_ips my_private_ip my_public_ip search_ip" + IP_FILES="${IP_FILES} search2_ips slaves taskqueue_nodes" + IP_FILES="${IP_FILES} zookeeper_locations" + for IP_FILE in ${IP_FILES}; do + [ ! -f "/etc/appscale/${IP_FILE}" ] || rm -v "/etc/appscale/${IP_FILE}" + ln -s -T "/var/lib/appscale/hosts/${IP_FILE}" "/etc/appscale/${IP_FILE}" + done } installadminserver() From 89ae227fb2fa53641abc819ac571c95948b567ba Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 5 Nov 2019 15:27:38 -0800 Subject: [PATCH 203/221] AppController fix warning on application relocate --- system/units/appscale-controller.service | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/system/units/appscale-controller.service b/system/units/appscale-controller.service index d09b068786..ddedd46c94 100644 --- a/system/units/appscale-controller.service +++ b/system/units/appscale-controller.service @@ -11,7 +11,9 @@ Environment=HOME=/root ExecStart=/usr/bin/ruby -w /root/appscale/AppController/djinnServer.rb SyslogIdentifier=%p # Security -CapabilityBoundingSet=CAP_DAC_OVERRIDE CAP_SETGID CAP_SETUID CAP_CHOWN CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_NET_ADMIN CAP_NET_RAW CAP_KILL +CapabilityBoundingSet=CAP_DAC_OVERRIDE CAP_SETGID CAP_SETUID CAP_CHOWN \ + CAP_SYS_MODULE CAP_AUDIT_WRITE CAP_NET_ADMIN CAP_NET_BIND_SERVICE \ + CAP_NET_RAW CAP_KILL [Install] WantedBy=appscale-control.target multi-user.target \ No newline at end of file From 6f1b79fa5b84207ec8080af0f159d35b0a8f2d06 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 12 Nov 2019 09:35:29 -0800 Subject: [PATCH 204/221] Ensure coroutine is called correctly, part two --- AppDB/appscale/datastore/datastore_distributed.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py index 8398effa06..012d888d6b 100644 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ b/AppDB/appscale/datastore/datastore_distributed.py @@ -573,7 +573,7 @@ def put_entities(self, app, entities): """ self.logger.debug('Inserting {} entities'.format(len(entities))) - composite_indexes = self.get_indexes(app) + composite_indexes = yield self.get_indexes(app) by_group = {} for entity in entities: @@ -937,7 +937,8 @@ def dynamic_delete(self, app_id, delete_request): if last_path.type() not in ent_kinds: ent_kinds.append(last_path.type()) - filtered_indexes = [index for index in self.get_indexes(app_id) + composite_indexes = yield self.get_indexes(app_id) + filtered_indexes = [index for index in composite_indexes if index.definition().entity_type() in ent_kinds] if delete_request.has_transaction(): @@ -2983,7 +2984,8 @@ def __get_query_results(self, query): filter_info = self.generate_filter_info(filters) order_info = self.generate_order_info(orders) - index_to_use = _FindIndexToUse(query, self.get_indexes(app_id)) + composite_indexes = yield self.get_indexes(app_id) + index_to_use = _FindIndexToUse(query, composite_indexes) if index_to_use is not None: result, more_results = yield self.composite_v2(query, filter_info, index_to_use) @@ -3133,7 +3135,7 @@ def apply_txn_changes(self, app, txn): raise dbconstants.TooManyGroupsException( 'Too many groups in transaction') - composite_indices = self.get_indexes(app) + composite_indices = yield self.get_indexes(app) decoded_groups = [entity_pb.Reference(group) for group in tx_groups] self.transaction_manager.set_groups(app, txn, decoded_groups) From 8342af201e50cc5cc90b88b2010aacb8e240bdf4 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 12 Nov 2019 15:38:55 -0800 Subject: [PATCH 205/221] Rework stats module to support all stat kinds (fix merge) --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 5cb47979ed..b68a88e9b9 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -378,9 +378,11 @@ def apply_txn_changes(self, project_id, txid, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - stat_diffs = [(project_id, old_entry.namespace, old_entry.path, stats) - for old_entry, _, stats in writes if stats is not None] - IOLoop.current().spawn_callback(self._stats_buffer.apply_diffs, stat_diffs) + mutations = [(old_entry, new_entry, index_stats) + for old_entry, new_entry, index_stats in writes + if index_stats is not None] + IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, + mutations) logger.debug(u'Finished applying {}:{}'.format(project_id, txid)) @@ -535,7 +537,7 @@ def _delete(self, tr, key, old_entry_future=None): old_entry = yield old_entry_future if not old_entry.present: - raise gen.Return((old_entry, None)) + raise gen.Return((old_entry, None, None)) new_version = next_entity_version(old_entry.version) yield self._data_manager.put(tr, key, new_version, b'') From 2db5c30c0e45c84725d27fe87699d449e66398f9 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 6 Nov 2019 13:54:39 -0800 Subject: [PATCH 206/221] AppController remove hard-coded infrastructure checks --- AppController/djinn.rb | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 405b4e9341..5bf52a3ba8 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -2840,11 +2840,8 @@ def update_state_with_new_local_ip # Next, find out this machine's public IP address. In a cloud deployment, we # have to rely on the metadata server, while in a cluster deployment, it's # the same as the private IP. - if ["ec2", "euca", "gce"].include?(@options['infrastructure']) - new_public_ip = HelperFunctions.get_public_ip_from_metadata_service - else - new_public_ip = new_private_ip - end + new_public_ip = HelperFunctions.get_public_ip_from_metadata_service + new_public_ip = new_private_ip if new_public_ip.nil? # Finally, replace anywhere that the old public or private IP addresses were # used with the new one. @@ -3601,8 +3598,9 @@ def stop_groomer_service Djinn.log_info("Done stopping groomer service.") end + # Cloud if infrastructure is configured def is_cloud? - return ['ec2', 'euca', 'gce', 'azure'].include?(@options['infrastructure']) + return /^[a-zA-Z0-9_-]{3,}$/.match?(@options['infrastructure']) end def update_python_package(target, pip="pip") From 95f99f3020c9ae266aa6dcf9526466a7e37868fe Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 7 Nov 2019 10:58:29 -0800 Subject: [PATCH 207/221] AppController remove check for instance type configuration --- AppController/djinn.rb | 7 ------- 1 file changed, 7 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 5bf52a3ba8..8a5937ff70 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -2236,13 +2236,6 @@ def start_roles_on_nodes(ips_hash, secret) return NOT_ENOUGH_OPEN_NODES end - # Ensure we have the default type to use for the autoscaled nodes. - if @options['instance_type'].nil? - Djinn.log_warn('instance_type is undefined, hence no ' \ - 'spawning of instance is possible.') - return NOT_ENOUGH_OPEN_NODES - end - Djinn.log_info("Need to spawn #{new_nodes_roles.length} VMs.") # We create here the needed nodes, with open role and no disk. From fc96f764db4626a465d3ea171d0f8740fc495d58 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 20 Nov 2019 17:09:46 -0800 Subject: [PATCH 208/221] AppController parameter for the ec2 launch template agent --- AppController/djinn.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 8a5937ff70..dc226167ba 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -437,6 +437,7 @@ class Djinn PARAMETER_DEFAULT = 1 PARAMETER_SHOW = 2 PARAMETERS_AND_CLASS = { + 'aws_launch_template_id' => [String, nil, true], 'aws_subnet_id' => [String, nil, true], 'aws_vpc_id' => [String, nil, true], 'azure_subscription_id' => [String, nil, false], From 6e97644332879f33eb7302c063510e7cdc1dc66f Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 20 Nov 2019 17:33:48 -0800 Subject: [PATCH 209/221] AppController remove cassandra use --- AppController/djinn.rb | 201 +-------------------- AppController/djinnServer.rb | 2 - AppController/lib/app_controller_client.rb | 8 - AppController/lib/cassandra_helper.rb | 192 -------------------- AppController/terminate.rb | 15 +- AppController/test/tc_djinn.rb | 1 - scripts/setup_cassandra_config_files.py | 55 ------ 7 files changed, 5 insertions(+), 469 deletions(-) delete mode 100644 AppController/lib/cassandra_helper.rb delete mode 100755 scripts/setup_cassandra_config_files.py diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 405b4e9341..ba957795ae 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -306,8 +306,8 @@ class Djinn APPSCALE_HOME = ENV['APPSCALE_HOME'] # The location on the local filesystem where we save data that should be - # persisted across AppScale deployments. Currently this is Cassandra data, - # ZooKeeper data, and Google App Engine apps that users upload. + # persisted across AppScale deployments. Currently this is ZooKeeper data, + # and Google App Engine apps that users upload. PERSISTENT_MOUNT_POINT = '/opt/appscale'.freeze # The location where we can find the Python 2.7 executable, included because @@ -472,7 +472,7 @@ class Djinn 'region' => [String, nil, true], 'replication' => [Integer, '1', true], 'project' => [String, nil, false], - 'table' => [String, 'cassandra', false], + 'table' => [String, 'cassandra', false], # deprecated 'use_spot_instances' => [TrueClass, nil, false], 'user_commands' => [String, nil, true], 'verbose' => [TrueClass, 'False', true], @@ -1155,34 +1155,6 @@ def get_database_information(secret) return JSON.dump(tree) end - # Runs the Groomer service that the Datastore provides, which cleans up - # deleted entries and generates statistics about the entities stored for each - # application. - # - # Args: - # secret: A String with the shared key for authentication. - # Returns: - # 'OK' if the groomer was invoked, and BAD_SECRET_MSG if the user failed to - # authenticate correctly. - def run_groomer(secret) - return BAD_SECRET_MSG unless valid_secret?(secret) - return NOT_READY if @nodes.empty? - return INVALID_REQUEST if @options.key?('fdb_clusterfile_content') - - Thread.new { - run_groomer_command = `which appscale-groomer`.chomp - if my_node.is_db_master? - Djinn.log_run(run_groomer_command) - else - db_master = get_db_master - HelperFunctions.run_remote_command(db_master.private_ip, - run_groomer_command, db_master.ssh_key, NO_OUTPUT) - end - } - - return 'OK' - end - # Queries the AppController for a list of instance variables whose names match # the given regular expression, as well as the values associated with each # match. @@ -1487,38 +1459,6 @@ def set_read_only(read_only, secret) return 'OK' end - # Checks if the primary database node is ready. For Cassandra, this is needed - # because the seed node needs to start before the other nodes. - # Args: - # secret: A string that authenticates the caller. - # Returns: - # A string indicating whether or not the primary database node is ready. - def primary_db_is_up(secret) - return BAD_SECRET_MSG unless valid_secret?(secret) - return NOT_READY if @nodes.empty? - - primary_ip = get_db_master.private_ip - unless my_node.is_db_master? - Djinn.log_debug("Asking #{primary_ip} if database is ready.") - acc = AppControllerClient.new(get_db_master.private_ip, @@secret) - begin - return acc.primary_db_is_up - rescue FailedNodeException => e - Djinn.log_warn("Unable to ask #{primary_ip} if database is ready: #{e.to_s}.") - return NOT_READY - end - end - - lock_obtained = NODETOOL_LOCK.try_lock - begin - return NOT_READY unless lock_obtained - ready = nodes_ready.include?(primary_ip) - return "#{ready}" - ensure - NODETOOL_LOCK.unlock if lock_obtained - end - end - # Resets a user's password. # # Args: @@ -1716,18 +1656,6 @@ def job_start(secret) # credentials). enforce_options - # Load datastore helper. - # TODO: this should be the class or module. - table = @options['table'] - # require db_file - begin - require "#{table}_helper" - rescue => e - backtrace = e.backtrace.join("\n") - HelperFunctions.log_and_crash("Unable to find #{table} helper." \ - " Please verify datastore type: #{e}\n#{backtrace}") - end - # If we have uncommitted changes, we rebuild/reinstall the # corresponding packages to ensure we are using the latest code. build_uncommitted_changes @@ -2888,19 +2816,7 @@ def update_state_with_new_local_ip # Writes any custom configuration data in /etc/appscale to ZooKeeper. def set_custom_config - cassandra_config = {'num_tokens' => 256} - begin - contents = File.read("#{APPSCALE_CONFIG_DIR}/cassandra") - cassandra_config = JSON.parse(contents) - rescue Errno::ENOENT - Djinn.log_debug('No custom cassandra configuration found.') - rescue JSON::ParserError - Djinn.log_error('Invalid JSON in custom cassandra configuration.') - end ZKInterface.ensure_path('/appscale/config') - ZKInterface.set('/appscale/config/cassandra', JSON.dump(cassandra_config), - false) - Djinn.log_info('Set custom cassandra configuration.') if @options.key?('default_max_appserver_memory') ZKInterface.set_runtime_params( @@ -3152,78 +3068,12 @@ def start_stop_api_services db_master = node.private_ip if node.roles.include?('db_master') } } - setup_db_config_files(db_master, my_node.private_ip) - - threads << Thread.new { - Djinn.log_info("Starting database services.") - db_nodes = nil - @state_change_lock.synchronize { - db_nodes = @nodes.count{|node| node.is_db_master? or node.is_db_slave?} - } - needed_nodes = needed_for_quorum(db_nodes, - Integer(@options['replication'])) - - # If this machine is running other services, decrease Cassandra's max - # heap size. - heap_reduction = 0 - heap_reduction += 0.25 if my_node.is_compute? - if my_node.is_taskqueue_master? || my_node.is_taskqueue_slave? - heap_reduction += 0.15 - end - heap_reduction += 0.15 if my_node.is_search2? - heap_reduction = heap_reduction.round(2) - - if my_node.is_db_master? - start_db_master(false, needed_nodes, db_nodes, heap_reduction) - prime_database - else - start_db_slave(false, needed_nodes, db_nodes, heap_reduction) - end - } - else - stop_db_master - stop_db_slave end # We now wait for the essential services to go up. Djinn.log_info('Waiting for DB services ... ') threads.each { |t| t.join } - # Autoscaled nodes do not need to check if the datastore is primed: if - # we got this far, it must be primed. - am_i_autoscaled = false - get_autoscaled_nodes.each { |node| - if node.private_ip == my_node.private_ip - am_i_autoscaled = true - Djinn.log_info("Skipping database layout check on scaled node.") - break - end - } - unless am_i_autoscaled - Djinn.log_info('Ensuring necessary database tables are present') - sleep(SMALL_WAIT) until system("#{PRIME_SCRIPT} --check > /dev/null 2>&1") - - Djinn.log_info('Ensuring data layout version is correct') - layout_script = `which appscale-data-layout`.chomp - retries = 10 - loop { - output = `#{layout_script} --db-type cassandra 2>&1` - if $?.exitstatus == 0 - break - elsif $?.exitstatus == INVALID_VERSION_EXIT_CODE - HelperFunctions.log_and_crash( - 'Unexpected data layout version. Please run "appscale upgrade".') - elsif retries.zero? - HelperFunctions.log_and_crash( - 'Exceeded retries while trying to check data layout.') - else - Djinn.log_warn("Error while checking data layout:\n#{output}") - sleep(SMALL_WAIT) - end - retries -= 1 - } - end - if my_node.is_db_master? or my_node.is_db_slave? @state = "Starting UAServer" # Start the UserAppServer and wait till it's ready. @@ -3358,44 +3208,6 @@ def start_stop_api_services end end - # Creates database tables in the underlying datastore to hold information - # about the users that interact with AppScale clouds, and about the - # applications that AppScale hosts (including data that the apps themselves - # read and write). - # - # Raises: - # SystemExit: If the database could not be primed for use with AppScale, - # after ten retries. - def prime_database - table = @options['table'] - prime_script = `which appscale-prime-#{table}`.chomp - replication = Integer(@options['replication']) - retries = 10 - Djinn.log_info('Ensuring necessary tables have been created') - loop { - prime_cmd = "#{prime_script} --replication #{replication} >> " \ - '/var/log/appscale/prime_db.log 2>&1' - return if system(prime_cmd) - retries -= 1 - Djinn.log_warn("Failed to prime database. #{retries} retries left.") - - # If this has failed 10 times in a row, it's probably a - # "Column ID mismatch" error that seems to be caused by creating tables - # as the cluster is settling. Running a repair may fix the issue. - if retries == 1 - @state = 'Running a Cassandra repair.' - Djinn.log_warn(@state) - system("#{NODETOOL} repair") - end - - break if retries.zero? - Kernel.sleep(SMALL_WAIT) - } - - @state = "Failed to prime #{table}." - HelperFunctions.log_and_crash(@state, WAIT_TO_CRASH) - end - def start_blobstore_server # Each node uses the active load balancer to access the Datastore. BlobServer.start(get_load_balancer.private_ip, DatastoreServer::PROXY_PORT) @@ -3518,11 +3330,6 @@ def assign_datastore_processes # startup. return unless my_node.is_shadow? - backend = 'cassandra' - if @options.key?('fdb_clusterfile_content') - backend = 'fdb' - end - Djinn.log_info("Assigning datastore processes.") verbose = @options['verbose'].downcase == 'true' db_nodes = [] @@ -3536,7 +3343,7 @@ def assign_datastore_processes # machine. db_nodes.each { |node| assignments = {} - assignments['datastore'] = {'backend' => backend, 'verbose' => verbose} + assignments['datastore'] = {'backend' => 'fdb', 'verbose' => verbose} ZKInterface.set_machine_assignments(node.private_ip, assignments) Djinn.log_debug("Node #{node.private_ip} got #{assignments}.") } diff --git a/AppController/djinnServer.rb b/AppController/djinnServer.rb index 8cbe2f1309..10e4a7bab8 100755 --- a/AppController/djinnServer.rb +++ b/AppController/djinnServer.rb @@ -76,7 +76,6 @@ def on_init add_method(@djinn, "get_online_users_list", "secret") add_method(@djinn, "start_roles_on_nodes", "ips_hash", "secret") add_method(@djinn, "gather_logs", "secret") - add_method(@djinn, "run_groomer", "secret") add_method(@djinn, "get_property", "property_regex", "secret") add_method(@djinn, "set_property", "property_name", "property_value", "secret") @@ -90,7 +89,6 @@ def on_init add_method(@djinn, "does_user_exist", "username", "secret") add_method(@djinn, "create_user", "username", "password", "account_type" ,"secret") add_method(@djinn, "set_admin_role", "username", "is_cloud_admin", "capabilities" ,"secret") - add_method(@djinn, "primary_db_is_up", "secret") add_method(@djinn, "update_cron", "project_id", "secret") end end diff --git a/AppController/lib/app_controller_client.rb b/AppController/lib/app_controller_client.rb index c29ab60ac7..832322432d 100644 --- a/AppController/lib/app_controller_client.rb +++ b/AppController/lib/app_controller_client.rb @@ -62,7 +62,6 @@ def initialize(ip, secret) @conn.add_method('get_property', 'property_regex', 'secret') @conn.add_method('set_property', 'property_name', 'property_value', 'secret') @conn.add_method('set_node_read_only', 'read_only', 'secret') - @conn.add_method('primary_db_is_up', 'secret') @conn.add_method('get_app_upload_status', 'reservation_id', 'secret') @conn.add_method('get_cluster_stats_json', 'secret') @conn.add_method('get_node_stats_json', 'secret') @@ -178,13 +177,6 @@ def set_node_read_only(read_only) } end - # Checks if the Cassandra seed node is up. - def primary_db_is_up - make_call(NO_TIMEOUT, RETRY_ON_FAIL, 'primary_db_is_up') { - @conn.primary_db_is_up(@secret) - } - end - # Checks the status of an app upload. def get_app_upload_status(reservation_id) make_call(NO_TIMEOUT, RETRY_ON_FAIL, 'get_app_upload_status') { diff --git a/AppController/lib/cassandra_helper.rb b/AppController/lib/cassandra_helper.rb deleted file mode 100644 index da37ccd74c..0000000000 --- a/AppController/lib/cassandra_helper.rb +++ /dev/null @@ -1,192 +0,0 @@ -# Programmer: Navraj Chohan -require 'djinn' -require 'node_info' -require 'helperfunctions' -require 'service_helper' -require 'set' - -# A String that indicates where we write the process ID that Cassandra runs -# on at this machine. -PID_FILE = '/tmp/appscale-cassandra.pid'.freeze - -# A String that indicates where we install Cassandra on this machine. -CASSANDRA_DIR = '/opt/cassandra'.freeze - -# Name for service as per helper. -CASSANDRA_SERVICE_NAME = "appscale-cassandra".freeze - -# The location of the script that sets up Cassandra's config files. -SETUP_CONFIG_SCRIPT = "#{APPSCALE_HOME}/scripts/setup_cassandra_config_files.py".freeze - -# The location of the nodetool binary. -NODETOOL = "#{CASSANDRA_DIR}/cassandra/bin/nodetool".freeze - -# The location of the script that creates the initial tables. -PRIME_SCRIPT = `which appscale-prime-cassandra`.chomp - -# The number of seconds to allow Cassandra to take while starting up. -START_TIMEOUT = 60 - -# The location of the Cassandra data directory. -CASSANDRA_DATA_DIR = '/opt/appscale/cassandra'.freeze - -# Writes all the configuration files necessary to start Cassandra on this -# machine. -# -# Args: -# master_ip: A String corresponding to the private FQDN or IP address of the -# machine hosting the Database Master role. -def setup_db_config_files(master_ip, local_ip) - setup_script = "#{SETUP_CONFIG_SCRIPT} --local-ip #{local_ip} "\ - "--master-ip #{master_ip}" - until system(setup_script) - Djinn.log_warn('Error while setting up Cassandra configuration. Retrying.') - sleep(Djinn::SMALL_WAIT) - end -end - -# Starts Cassandra on this machine. Because this machine runs the DB Master -# role, it starts Cassandra first. -# -# Args: -# clear_datastore: Remove any pre-existent data in the database. -# needed: The number of nodes required for quorum. -# desired: The total number of database nodes. -# heap_reduction: A decimal representing a reduction factor for max heap -# (eg. .2 = 80% of normal calculation). -def start_db_master(clear_datastore, needed, desired, heap_reduction) - @state = 'Starting up Cassandra seed node' - Djinn.log_info(@state) - start_cassandra(clear_datastore, needed, desired, heap_reduction) -end - -# Starts Cassandra on this machine. This is identical to starting Cassandra as a -# Database Master role, with the extra step of waiting for the DB Master to boot -# Cassandra up. -# -# Args: -# clear_datastore: Remove any pre-existent data in the database. -# needed: The number of nodes required for quorum. -# desired: The total number of database nodes. -# heap_reduction: A decimal representing a reduction factor for max heap -# (eg. .2 = 80% of normal calculation). -def start_db_slave(clear_datastore, needed, desired, heap_reduction) - seed_node = get_db_master.private_ip - @state = "Waiting for Cassandra seed node at #{seed_node} to start" - Djinn.log_info(@state) - acc = AppControllerClient.new(seed_node, HelperFunctions.get_secret) - loop do - begin - break if acc.primary_db_is_up == 'true' - rescue FailedNodeException - Djinn.log_warn( - "Failed to check if Cassandra is up at #{seed_node}") - end - sleep(Djinn::SMALL_WAIT) - end - - start_cassandra(clear_datastore, needed, desired, heap_reduction) -end - -# Waits for enough database nodes to be up. -def wait_for_desired_nodes(needed, desired) - sleep(Djinn::SMALL_WAIT) until system("#{NODETOOL} status > /dev/null 2>&1") - loop do - ready = nodes_ready.length - Djinn.log_debug("#{ready} nodes are up. #{needed} are needed.") - break if ready >= needed - sleep(Djinn::SMALL_WAIT) - end - - # Wait longer for all the nodes. This reduces errors during table creation. - begin - Timeout.timeout(60) { - loop do - ready = nodes_ready.length - Djinn.log_debug("#{ready} nodes are up. #{desired} are desired.") - break if ready >= desired - sleep(Djinn::SMALL_WAIT) - end - } - rescue Timeout::Error - Djinn.log_info('Not all database nodes are ready, but there are enough ' \ - 'to achieve a quorum for every key.') - end -end - -# Starts Cassandra, and waits for enough nodes to be "Up Normal". -# -# Args: -# clear_datastore: Remove any pre-existent data in the database. -# needed: The number of nodes required for quorum. -# desired: The total number of database nodes. -# heap_reduction: A decimal representing a reduction factor for max heap -# (eg. .2 = 80% of normal calculation). -def start_cassandra(clear_datastore, needed, desired, heap_reduction) - if clear_datastore - Djinn.log_info('Erasing datastore contents') - Djinn.log_run("rm -rf #{CASSANDRA_DATA_DIR}") - end - - service_env = {} - if heap_reduction > 0 - service_env[:HEAP_REDUCTION] = heap_reduction - end - ServiceHelper.write_environment(CASSANDRA_SERVICE_NAME, service_env) - ServiceHelper.start(CASSANDRA_SERVICE_NAME) - - # Ensure enough Cassandra nodes are available. - Djinn.log_info('Waiting for Cassandra to start') - wait_for_desired_nodes(needed, desired) -end - -# Kills Cassandra on this machine. -def stop_db_master - Djinn.log_info('Stopping Cassandra master') - ServiceHelper.stop(CASSANDRA_SERVICE_NAME) -end - -# Kills Cassandra on this machine. -def stop_db_slave - Djinn.log_info('Stopping Cassandra slave') - ServiceHelper.stop(CASSANDRA_SERVICE_NAME) -end - -# Calculates the number of nodes needed for a quorum for every token. -def needed_for_quorum(total_nodes, replication) - if total_nodes < 1 || replication < 1 - raise Exception('At least 1 database machine is needed.') - end - if replication > total_nodes - raise Exception( - 'The replication factor cannot exceed the number of database machines.') - end - - can_fail = (replication / 2.0 - 1).ceil - total_nodes - can_fail -end - -# Returns an array of nodes in 'Up Normal' state. -def nodes_ready - # Example output of `nodetool gossipinfo`: - # /192.168.33.10 - # ... - # STATUS:15272:NORMAL,f02dd17... - # LOAD:263359:1.29168682182E11 - # ... - # /192.168.33.11 - # ... - output = `"#{NODETOOL}" gossipinfo` - return [] unless $?.exitstatus == 0 - - live_nodes = Set[] - current_node = nil - output.split("\n").each { |line| - current_node = line[1..-1] if line.start_with?('/') - next if current_node.nil? - if line.include?('STATUS') && line.include?('NORMAL') - live_nodes.add(current_node) - end - } - live_nodes.to_a -end diff --git a/AppController/terminate.rb b/AppController/terminate.rb index c59df4d468..ec45307e08 100644 --- a/AppController/terminate.rb +++ b/AppController/terminate.rb @@ -114,7 +114,6 @@ def self.erase_appscale_full_state `rm -rf /etc/cron.d/appscale-*` # Delete stored data. - `rm -rf /opt/appscale/cassandra` `rm -rf /opt/appscale/zookeeper` `rm -rf /opt/appscale/logserver/*` `rm -rf /opt/appscale/apps` @@ -128,20 +127,8 @@ def self.erase_appscale_full_state # Tells any services that persist data across AppScale runs to stop writing # new data to the filesystem, since killing them is imminent. # - # For right now, this is just Cassandra and ZooKeeper. + # For right now, this is just ZooKeeper. def self.disable_database_writes - # First, tell Cassandra that no more writes should be accepted on this node. - ifconfig = `ifconfig` - bound_addrs = ifconfig.scan(/inet .*?(\d+.\d+.\d+.\d+) /).flatten - bound_addrs.delete("127.0.0.1") - ip = bound_addrs[0] - - # Make sure we have cassandra running, otherwise nodetool may get - # stuck. - if system("systemctl --quiet is-active appscale-cassandra.service") - `/opt/cassandra/cassandra/bin/nodetool -h #{ip} -p 7199 drain` - end - # Next, stop ZooKeeper politely `systemctl stop zookeeper.service` end diff --git a/AppController/test/tc_djinn.rb b/AppController/test/tc_djinn.rb index 6e2448696c..273c127704 100644 --- a/AppController/test/tc_djinn.rb +++ b/AppController/test/tc_djinn.rb @@ -56,7 +56,6 @@ def test_functions_w_bad_secret assert_equal(BAD_SECRET_MSG, djinn.job_start(@secret)) assert_equal(BAD_SECRET_MSG, djinn.get_online_users_list(@secret)) assert_equal(BAD_SECRET_MSG, djinn.start_roles_on_nodes({}, @secret)) - assert_equal(BAD_SECRET_MSG, djinn.run_groomer(@secret)) assert_equal(BAD_SECRET_MSG, djinn.get_property('baz', @secret)) assert_equal(BAD_SECRET_MSG, djinn.set_property('baz', 'qux', @secret)) end diff --git a/scripts/setup_cassandra_config_files.py b/scripts/setup_cassandra_config_files.py deleted file mode 100755 index 2eacdb3b19..0000000000 --- a/scripts/setup_cassandra_config_files.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python2 -""" This script writes all the configuration files necessary to start Cassandra -on this machine.""" - -import argparse -import os -import pkgutil -import sys - -from kazoo.client import KazooClient - -from appscale.common import appscale_info -from appscale.common.deployment_config import DeploymentConfig -from appscale.common.deployment_config import InvalidConfig - -sys.path.append(os.path.join(os.path.dirname(__file__), '../AppDB')) -from appscale.datastore.cassandra_env.cassandra_interface import\ - CASSANDRA_INSTALL_DIR - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Creates Cassandra's configuration files") - parser.add_argument('--local-ip', required=True, - help='The private IP address of this machine.') - parser.add_argument('--master-ip', required=True, - help='The private IP address of the database master.') - parser.add_argument('--zk-locations', required=False, - help='The location of Zookeeper.') - args = parser.parse_args() - zk_locations = args.zk_locations if args.zk_locations else \ - appscale_info.get_zk_locations_string() - zk_client = KazooClient(hosts=zk_locations) - zk_client.start() - deployment_config = DeploymentConfig(zk_client) - cassandra_config = deployment_config.get_config('cassandra') - if 'num_tokens' not in cassandra_config: - raise InvalidConfig('num_tokens not specified in deployment config.') - num_tokens = cassandra_config['num_tokens'] - - replacements = {'APPSCALE-LOCAL': args.local_ip, - 'APPSCALE-MASTER': args.master_ip, - 'APPSCALE-NUM-TOKENS': num_tokens} - - for filename in ('cassandra.yaml', 'cassandra-env.sh'): - dest_file_path = os.path.join(CASSANDRA_INSTALL_DIR, 'cassandra', 'conf', - filename) - contents = pkgutil.get_data('appscale.datastore.cassandra_env', - 'templates/{}'.format(filename)) - for key, replacement in replacements.items(): - if replacement is None: - replacement = '' - contents = contents.replace(key, str(replacement)) - with open(dest_file_path, 'w') as dest_file: - dest_file.write(contents) From 7ba586dc60cebb6d4ca838f0e8fdcbb21ff869db Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 27 Nov 2019 14:44:43 -0800 Subject: [PATCH 210/221] AppController remove cassandra use, remove groomers --- AppController/djinn.rb | 36 ------------------ AppController/lib/groomer_service.rb | 38 ------------------- system/rsyslog.d/20-appscale-service.conf | 6 --- system/units/appscale-groomer.service | 13 ------- .../appscale-transaction-groomer.service | 13 ------- 5 files changed, 106 deletions(-) delete mode 100644 AppController/lib/groomer_service.rb delete mode 100644 system/units/appscale-groomer.service delete mode 100644 system/units/appscale-transaction-groomer.service diff --git a/AppController/djinn.rb b/AppController/djinn.rb index ba957795ae..a2f63a5a82 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -32,7 +32,6 @@ require 'datastore_server' require 'ejabberd' require 'error_app' -require 'groomer_service' require 'haproxy' require 'helperfunctions' require 'hermes_client' @@ -3105,27 +3104,8 @@ def start_stop_api_services @done_initializing = true Djinn.log_info("UserAppServer is ready.") - groomer_required = !@options.key?('fdb_clusterfile_content') - # The services below depends directly or indirectly on the UAServer to # be operational. So we start them after we test the UAServer. - threads = [] - if groomer_required && (my_node.is_db_master? || my_node.is_db_slave? || - my_node.is_zookeeper?) - threads << Thread.new { - if my_node.is_db_master? or my_node.is_db_slave? - start_groomer_service - verbose = @options['verbose'].downcase == 'true' - GroomerService.start_transaction_groomer(verbose) - end - } - else - threads << Thread.new { - stop_groomer_service - GroomerService.stop_transaction_groomer - } - end - if my_node.is_memcache? threads << Thread.new { start_memcache } else @@ -3299,15 +3279,6 @@ def start_hermes Djinn.log_info("Done starting Hermes service.") end - # Starts the groomer service on this node. The groomer cleans the datastore of deleted - # items and removes old logs. - def start_groomer_service - @state = "Starting Groomer Service" - Djinn.log_info("Starting groomer service.") - GroomerService.start - Djinn.log_info("Done starting groomer service.") - end - def start_soap_server db_master_ip = nil @state_change_lock.synchronize { @@ -3401,13 +3372,6 @@ def stop_app_manager_server ServiceHelper.stop('appscale-instance-manager') end - # Stops the groomer service. - def stop_groomer_service - Djinn.log_info("Stopping groomer service.") - GroomerService.stop - Djinn.log_info("Done stopping groomer service.") - end - def is_cloud? return ['ec2', 'euca', 'gce', 'azure'].include?(@options['infrastructure']) end diff --git a/AppController/lib/groomer_service.rb b/AppController/lib/groomer_service.rb deleted file mode 100644 index 273f9a768f..0000000000 --- a/AppController/lib/groomer_service.rb +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/ruby -w - - -$:.unshift File.join(File.dirname(__FILE__)) -require 'helperfunctions' -require 'service_helper' - - -# Starts and stops the datastore groomer service. -module GroomerService - - # Groomer service name for use with helper - SERVICE_NAME_GROOMER = 'appscale-groomer'.freeze - - # Transaction groomer service name for use with helper - SERVICE_NAME_TX_GROOMER = 'appscale-transaction-groomer'.freeze - - # Starts the Groomer Service on this machine. - def self.start() - ServiceHelper.start(SERVICE_NAME_GROOMER) - end - - # Stops the groomer service running on this machine. - def self.stop() - ServiceHelper.stop(SERVICE_NAME_GROOMER) - end - - def self.start_transaction_groomer(verbose) - service_env = {} - service_env[:APPSCALE_OPTION_VERBOSE] = '--verbose' if verbose - ServiceHelper.write_environment(SERVICE_NAME_TX_GROOMER, service_env) - ServiceHelper.start(SERVICE_NAME_TX_GROOMER) - end - - def self.stop_transaction_groomer - ServiceHelper.stop(SERVICE_NAME_TX_GROOMER) - end -end diff --git a/system/rsyslog.d/20-appscale-service.conf b/system/rsyslog.d/20-appscale-service.conf index 8444c40183..5e3319e6f3 100644 --- a/system/rsyslog.d/20-appscale-service.conf +++ b/system/rsyslog.d/20-appscale-service.conf @@ -36,9 +36,6 @@ template(name="APPSCALE_INSTANCE_LOGNAME" type="list") { :programname, isequal, "appscale-flower" /var/log/appscale/flower.log;APPSCALE & stop -:programname, isequal, "appscale-groomer" /var/log/appscale/groomer_service.log;APPSCALE -& stop - :programname, isequal, "appscale-hermes" /var/log/appscale/hermes.log;APPSCALE & stop @@ -54,8 +51,5 @@ template(name="APPSCALE_INSTANCE_LOGNAME" type="list") { :programname, startswith, "appscale-taskqueue-" -?APPSCALE_LOGNAME;APPSCALE & stop -:programname, isequal, "appscale-transaction-groomer" /var/log/appscale/transaction_groomer.log;APPSCALE -& stop - :programname, isequal, "appscale-uaserver" /var/log/appscale/uaserver.log;APPSCALE & stop diff --git a/system/units/appscale-groomer.service b/system/units/appscale-groomer.service deleted file mode 100644 index cd38eb62e7..0000000000 --- a/system/units/appscale-groomer.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=AppScale Datastore Groomer -Before=appscale-service.target -PartOf=appscale-service.target - -[Service] -ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-groomer-service -SyslogIdentifier=%p -# Resource controls -MemoryLimit=512M - -[Install] -WantedBy=appscale-service.target \ No newline at end of file diff --git a/system/units/appscale-transaction-groomer.service b/system/units/appscale-transaction-groomer.service deleted file mode 100644 index f652806850..0000000000 --- a/system/units/appscale-transaction-groomer.service +++ /dev/null @@ -1,13 +0,0 @@ -[Unit] -Description=AppScale Datastore Transaction Groomer -Before=appscale-service.target -PartOf=appscale-service.target - -[Service] -ExecStart=/usr/bin/python2 ${APPSCALE_SERVICE_BIN}/appscale-transaction-groomer $APPSCALE_OPTION_VERBOSE -SyslogIdentifier=%p -# Resource controls -MemoryLimit=512M - -[Install] -WantedBy=appscale-service.target \ No newline at end of file From d89b04f181f2402715ee6e2aa2a56d39440aaa2f Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Wed, 11 Dec 2019 23:56:52 -0800 Subject: [PATCH 211/221] UAServer do not use user_schema with postgres backend --- AppDB/appscale/datastore/scripts/ua_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index 48dd5c5354..45efa20577 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -331,10 +331,10 @@ def get_user_data(username, secret): if not result: raise gen.Return('Error: User {} does not exist'.format(username)) - if len(user_schema) != len(result): + if len(USERS_SCHEMA) != len(result): raise gen.Return( "Error: Bad length of user schema vs user result " - "user schema: " + str(user_schema) + " result: " + str(result) + "user schema: " + str(USERS_SCHEMA) + " result: " + str(result) ) user = Users("a", "b", "c") From 384f8097e0699b960f5e6349c49ad491c03c3bb0 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 28 Jan 2020 19:39:39 -0800 Subject: [PATCH 212/221] AppDB filter fdb delete mutations before stats update --- AppDB/appscale/datastore/fdb/fdb_datastore.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index 7681983f96..eabfaeb3ca 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -378,7 +378,8 @@ def apply_txn_changes(self, project_id, txid, retries=5): if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) - mutations = [(old_entry, new_entry, index_stats) + mutations = [(old_entry, FDBDatastore._filter_version(new_entry), + index_stats) for old_entry, new_entry, index_stats in writes if index_stats is not None] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, @@ -622,6 +623,14 @@ def _collapsible_id(entity): else: return entity.key().Encode() + @staticmethod + def _filter_version(entity): + """ Filter out any entity that is actually a delete version. """ + if isinstance(entity, (int, long)): + return None + else: + return entity + @staticmethod def _enforce_max_groups(mutations): """ Raises an exception if too many groups were modified. """ From 84f87d5be1ffa161a66a160f92d719047f170904 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Tue, 28 Jan 2020 19:43:34 -0800 Subject: [PATCH 213/221] AppDB restore fdb missing codec import --- AppDB/appscale/datastore/fdb/index_directories.py | 1 + 1 file changed, 1 insertion(+) diff --git a/AppDB/appscale/datastore/fdb/index_directories.py b/AppDB/appscale/datastore/fdb/index_directories.py index 12c1594581..3f290cc280 100644 --- a/AppDB/appscale/datastore/fdb/index_directories.py +++ b/AppDB/appscale/datastore/fdb/index_directories.py @@ -5,6 +5,7 @@ from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER from appscale.datastore.dbconstants import BadRequest, InternalError +from appscale.datastore.fdb import codecs from appscale.datastore.fdb.codecs import ( decode_value, encode_value, encode_versionstamp_index, Path) from appscale.datastore.fdb.utils import ( From d58e3024699e2d82653317058b4aad1c38f8a344 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 30 Jan 2020 14:48:40 -0800 Subject: [PATCH 214/221] AppDB restore fdb missing entity_pb import --- AppDB/appscale/datastore/fdb/indexes.py | 1 + 1 file changed, 1 insertion(+) diff --git a/AppDB/appscale/datastore/fdb/indexes.py b/AppDB/appscale/datastore/fdb/indexes.py index c9ecbabb04..b3dcdb796f 100644 --- a/AppDB/appscale/datastore/fdb/indexes.py +++ b/AppDB/appscale/datastore/fdb/indexes.py @@ -25,6 +25,7 @@ from appscale.datastore.dbconstants import BadRequest, InternalError sys.path.append(APPSCALE_PYTHON_APPSERVER) +from google.appengine.datastore import entity_pb from google.appengine.datastore.datastore_pb import Query_Filter, Query_Order logger = logging.getLogger(__name__) From e751a5427b426d69ffcc958a84b25cbc1207d175 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Thu, 30 Jan 2020 15:27:23 -0800 Subject: [PATCH 215/221] AppDB fdb tx metadata decoding use dict for dedupe as key not hashable --- AppDB/appscale/datastore/fdb/transactions.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/AppDB/appscale/datastore/fdb/transactions.py b/AppDB/appscale/datastore/fdb/transactions.py index 3321631f6d..1aa39ef32d 100644 --- a/AppDB/appscale/datastore/fdb/transactions.py +++ b/AppDB/appscale/datastore/fdb/transactions.py @@ -141,10 +141,11 @@ def decode_metadata(self, txid, kvs): else: raise InternalError(u'Unrecognized RPC type') - lookups = set() + lookups = dict() mutations = [] for chunks in six.itervalues(lookup_rpcs): - lookups.update(self._unpack_keys(b''.join(chunks))) + lookups.update([(key.SerializeToString(), key) + for key in self._unpack_keys(b''.join(chunks))]) for rpc_info in mutation_rpcs: rpc_type = rpc_info[0] @@ -154,7 +155,7 @@ def decode_metadata(self, txid, kvs): else: mutations.extend(self._unpack_keys(blob)) - return lookups, queried_groups, mutations + return list(six.itervalues(lookups)), queried_groups, mutations def get_txid_slice(self, txid): prefix = self._txid_prefix(txid) From be21f3672ca3b5c6494a5e609f6dc1942421ec5e Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 31 Jan 2020 13:31:17 -0800 Subject: [PATCH 216/221] AppDB remove cassandra support --- AppDB/README.rst | 42 - .../appscale/datastore/appscale_datastore.py | 30 - .../datastore/appscale_datastore_batch.py | 34 - .../datastore/cassandra_env/__init__.py | 0 .../cassandra_env/cassandra_interface.py | 1091 ------ .../datastore/cassandra_env/constants.py | 8 - .../cassandra_env/entity_id_allocator.py | 317 -- .../datastore/cassandra_env/get_token.py | 166 - .../datastore/cassandra_env/large_batch.py | 423 --- .../datastore/cassandra_env/py_cassandra.py | 203 - .../datastore/cassandra_env/rebalance.py | 195 - .../datastore/cassandra_env/retry_policies.py | 65 - .../datastore/cassandra_env/schema.py | 473 --- .../cassandra_env/templates/cassandra-env.sh | 320 -- .../cassandra_env/templates/cassandra.yaml | 1237 ------ .../cassandra_env/tornado_cassandra.py | 85 - .../appscale/datastore/cassandra_env/utils.py | 187 - .../datastore/datastore_distributed.py | 3309 ----------------- AppDB/appscale/datastore/dbconstants.py | 7 +- AppDB/appscale/datastore/dbinterface.py | 37 - AppDB/appscale/datastore/entity_utils.py | 58 - AppDB/appscale/datastore/fdb/fdb_datastore.py | 1 - AppDB/appscale/datastore/groomer.py | 1212 ------ AppDB/appscale/datastore/index_manager.py | 268 -- AppDB/appscale/datastore/range_iterator.py | 242 -- .../appscale/datastore/scripts/data_layout.py | 23 - AppDB/appscale/datastore/scripts/datastore.py | 94 +- .../datastore/scripts/delete_records.py | 44 - .../datastore/scripts/groomer_service.py | 35 - .../datastore/scripts/prime_cassandra.py | 24 - .../datastore/scripts/transaction_groomer.py | 498 --- AppDB/appscale/datastore/scripts/ua_server.py | 710 ++-- .../datastore/scripts/ua_server_backup.py | 60 +- .../datastore/scripts/ua_server_restore.py | 64 +- .../datastore/scripts/update_index.py | 101 - .../datastore/scripts/upgrade_schema.py | 114 - .../datastore/scripts/view_records.py | 82 - AppDB/appscale/datastore/utils.py | 53 - .../appscale/datastore/zkappscale/__init__.py | 0 .../datastore/zkappscale/constants.py | 11 - .../datastore/zkappscale/entity_lock.py | 394 -- .../zkappscale/inspectable_counter.py | 98 - .../datastore/zkappscale/tornado_kazoo.py | 546 --- .../zkappscale/transaction_manager.py | 373 -- .../datastore/zkappscale/zktransaction.py | 988 ----- AppDB/setup.py | 21 +- AppDB/test/e2e/test_queries.py | 16 - AppDB/test/unit/test_cassandra_interface.py | 167 - AppDB/test/unit/test_cassandra_prime.py | 27 - AppDB/test/unit/test_datastore_server.py | 1070 ------ AppDB/test/unit/test_entity_utils.py | 68 - AppDB/test/unit/test_groomer.py | 220 -- AppDB/test/unit/test_transaction_manager.py | 107 - AppDB/test/unit/test_zookeeper.py | 400 -- 54 files changed, 269 insertions(+), 16149 deletions(-) delete mode 100644 AppDB/appscale/datastore/appscale_datastore.py delete mode 100644 AppDB/appscale/datastore/appscale_datastore_batch.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/__init__.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/cassandra_interface.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/constants.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/entity_id_allocator.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/get_token.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/large_batch.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/py_cassandra.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/rebalance.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/retry_policies.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/schema.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/templates/cassandra-env.sh delete mode 100644 AppDB/appscale/datastore/cassandra_env/templates/cassandra.yaml delete mode 100644 AppDB/appscale/datastore/cassandra_env/tornado_cassandra.py delete mode 100644 AppDB/appscale/datastore/cassandra_env/utils.py delete mode 100644 AppDB/appscale/datastore/datastore_distributed.py delete mode 100644 AppDB/appscale/datastore/dbinterface.py delete mode 100644 AppDB/appscale/datastore/entity_utils.py delete mode 100644 AppDB/appscale/datastore/groomer.py delete mode 100644 AppDB/appscale/datastore/index_manager.py delete mode 100644 AppDB/appscale/datastore/range_iterator.py delete mode 100644 AppDB/appscale/datastore/scripts/data_layout.py delete mode 100644 AppDB/appscale/datastore/scripts/delete_records.py delete mode 100644 AppDB/appscale/datastore/scripts/groomer_service.py delete mode 100644 AppDB/appscale/datastore/scripts/prime_cassandra.py delete mode 100644 AppDB/appscale/datastore/scripts/transaction_groomer.py delete mode 100644 AppDB/appscale/datastore/scripts/update_index.py delete mode 100644 AppDB/appscale/datastore/scripts/upgrade_schema.py delete mode 100644 AppDB/appscale/datastore/scripts/view_records.py delete mode 100644 AppDB/appscale/datastore/zkappscale/__init__.py delete mode 100644 AppDB/appscale/datastore/zkappscale/constants.py delete mode 100644 AppDB/appscale/datastore/zkappscale/entity_lock.py delete mode 100644 AppDB/appscale/datastore/zkappscale/inspectable_counter.py delete mode 100644 AppDB/appscale/datastore/zkappscale/tornado_kazoo.py delete mode 100644 AppDB/appscale/datastore/zkappscale/transaction_manager.py delete mode 100644 AppDB/appscale/datastore/zkappscale/zktransaction.py delete mode 100644 AppDB/test/unit/test_cassandra_interface.py delete mode 100644 AppDB/test/unit/test_cassandra_prime.py delete mode 100644 AppDB/test/unit/test_datastore_server.py delete mode 100644 AppDB/test/unit/test_entity_utils.py delete mode 100644 AppDB/test/unit/test_groomer.py delete mode 100644 AppDB/test/unit/test_transaction_manager.py delete mode 100644 AppDB/test/unit/test_zookeeper.py diff --git a/AppDB/README.rst b/AppDB/README.rst index 691dde397d..da43905e9d 100644 --- a/AppDB/README.rst +++ b/AppDB/README.rst @@ -9,46 +9,4 @@ One example of using this as a standalone service is redirecting datastore requests from an application running in GAE to some other environment with this service running. -How to set up -============= - -1. `Start a Cassandra cluster`_ - - * You can use cassandra.yaml and cassandra-env.sh in - appscale/datastore/cassandra-env/templates as a starting point if you - replace all options that start with "APPSCALE-" - * Make sure you use the ByteOrderedPartitioner - * Specifying num_tokens as 1 for all nodes is recommended if you want to be - able to rebalance your cluster - -2. `Start a ZooKeeper cluster`_ -3. Install the appscale-datastore Python package with ``pip install ./AppDB`` -4. Create the following files (the file names are not applicable to Cassandra, - but they are an artifact from when AppScale used different databases): - - * /etc/appscale/masters: This should contain the private IP address of any - one of the Cassandra machines. - * /etc/appscale/slaves: This should contain the IP addresses of the - remaining machines in the cluster. Each address should be on a different - line. - -5. Create a file called /etc/appscale/zookeeper_locations with the content - :: - - zk-ip-1 - zk-ip-2 - - (where zk-ip-1 and zk-ip-2 are the locations of the machines - in the ZooKeeper cluster). -6. Run ``appscale-prime-cassandra --replication x`` where "x" is the - replication factor you want for the datastore's keyspace. -7. Start the datastore server with ``appscale-datastore -p x`` where "x" is the - port you would like to start the server on. You probably want to start more - than one since each can currently only handle one request at a time. - AppScale starts 2x the number of cores on the machine. - .. _Google Cloud Datastore: https://cloud.google.com/datastore/ -.. _Start a Cassandra cluster: - http://cassandra.apache.org/doc/latest/getting_started/index.html -.. _Start a ZooKeeper cluster: - https://zookeeper.apache.org/doc/trunk/zookeeperStarted.html diff --git a/AppDB/appscale/datastore/appscale_datastore.py b/AppDB/appscale/datastore/appscale_datastore.py deleted file mode 100644 index 9ae0e19840..0000000000 --- a/AppDB/appscale/datastore/appscale_datastore.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/python -# See LICENSE file - -import appscale.datastore -import importlib -import os -import pkgutil - -DB_ERROR = "DB_ERROR:" - -ERROR_CODES = [DB_ERROR] - - -class DatastoreFactory: - @classmethod - def getDatastore(cls, d_type): - db_module = importlib.import_module( - 'appscale.datastore.{0}_env.py_{0}'.format(d_type)) - return db_module.DatastoreProxy() - - @classmethod - def valid_datastores(cls): - datastore_package_dir = os.path.dirname(appscale.datastore.__file__) - return [pkg.replace('_env', '') for _, pkg, ispkg - in pkgutil.iter_modules([datastore_package_dir]) - if ispkg and pkg.endswith('_env')] - - @classmethod - def error_codes(cls): - return ERROR_CODES diff --git a/AppDB/appscale/datastore/appscale_datastore_batch.py b/AppDB/appscale/datastore/appscale_datastore_batch.py deleted file mode 100644 index c96e2bfb59..0000000000 --- a/AppDB/appscale/datastore/appscale_datastore_batch.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/python -# See LICENSE file - -import appscale.datastore -import importlib -import logging -import os -import pkgutil - - -class DatastoreFactory: - - @classmethod - def getDatastore(cls, d_type, log_level=logging.INFO): - """ Returns a reference for the datastore. - - Args: - d_type: The name of the datastore (ex: cassandra) - log_level: The logging level to use. - """ - db_module = importlib.import_module( - 'appscale.datastore.{0}_env.{0}_interface'.format(d_type)) - return db_module.DatastoreProxy(log_level=log_level) - - @classmethod - def valid_datastores(cls): - """ Returns a list of directories where the datastore code is - - Returns: Directory list - """ - datastore_package_dir = os.path.dirname(appscale.datastore.__file__) - return [pkg.replace('_env', '') for _, pkg, ispkg - in pkgutil.iter_modules([datastore_package_dir]) - if ispkg and pkg.endswith('_env')] diff --git a/AppDB/appscale/datastore/cassandra_env/__init__.py b/AppDB/appscale/datastore/cassandra_env/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py b/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py deleted file mode 100644 index e8e0a170dc..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/cassandra_interface.py +++ /dev/null @@ -1,1091 +0,0 @@ -# Programmer: Navraj Chohan - -""" - Cassandra Interface for AppScale -""" -import datetime -import logging -import struct -import sys -import time -import uuid - -from appscale.common import appscale_info -from appscale.common.async_retrying import retry_raw_coroutine -from appscale.common.constants import SCHEMA_CHANGE_TIMEOUT -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -import cassandra -from cassandra.cluster import Cluster -from cassandra.query import BatchStatement -from cassandra.query import ConsistencyLevel -from cassandra.query import SimpleStatement -from cassandra.query import ValueSequence -from tornado import gen - -from appscale.datastore import dbconstants -from appscale.datastore.cassandra_env.constants import ( - CURRENT_VERSION, LB_POLICY -) -from appscale.datastore.cassandra_env.large_batch import ( - BatchNotApplied, FailedBatch, LargeBatch -) -from appscale.datastore.cassandra_env.retry_policies import ( - BASIC_RETRIES, NO_RETRIES -) -from appscale.datastore.cassandra_env.tornado_cassandra import TornadoCassandra -from appscale.datastore.dbconstants import ( - AppScaleDBConnectionError, Operations, TxnActions -) -from appscale.datastore.dbinterface import AppDBInterface -from appscale.datastore.utils import create_key, get_write_time, tx_partition, \ - tornado_synchronous - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.api.taskqueue import taskqueue_service_pb -from google.appengine.datastore import entity_pb - - -# The directory Cassandra is installed to. -CASSANDRA_INSTALL_DIR = '/opt/cassandra' - -# The maximum amount of entities to fetch concurrently. -ENTITY_FETCH_THRESHOLD = 100 - -# Full path for the nodetool binary. -NODE_TOOL = '{}/cassandra/bin/nodetool'.format(CASSANDRA_INSTALL_DIR) - -# The keyspace used for all tables -KEYSPACE = "Keyspace1" - -# The number of times to retry connecting to Cassandra. -INITIAL_CONNECT_RETRIES = 20 - -# The metadata key for the data layout version. -VERSION_INFO_KEY = 'version' - -# The metadata key used to indicate the state of the indexes. -INDEX_STATE_KEY = 'index_state' - -# The metadata key used to indicate whether or not some entities are missing -# the scatter property. -SCATTER_PROP_KEY = 'scatter_prop' - -# The metadata key indicating that the database has been primed. -PRIMED_KEY = 'primed' - -# The size in bytes that a batch must be to use the batches table. -LARGE_BATCH_THRESHOLD = 5 << 10 - -logger = logging.getLogger(__name__) - - -def batch_size(batch): - """ Calculates the size of a batch. - - Args: - batch: A list of dictionaries representing mutations. - Returns: - An integer specifying the size in bytes of the batch. - """ - size = 0 - for mutation in batch: - size += len(mutation['key']) - if 'values' in mutation: - for value in mutation['values'].values(): - size += len(value) - return size - - -class ThriftColumn(object): - """ Columns created by default with thrift interface. """ - KEY = 'key' - COLUMN_NAME = 'column1' - VALUE = 'value' - - -class IndexStates(object): - """ Possible states for datastore indexes. """ - CLEAN = 'clean' - DIRTY = 'dirty' - SCRUB_IN_PROGRESS = 'scrub_in_progress' - - -class ScatterPropStates(object): - """ Possible states for indexing the scatter property. """ - POPULATED = 'populated' - POPULATION_IN_PROGRESS = 'population_in_progress' - - -class DatastoreProxy(AppDBInterface): - """ - Cassandra implementation of the AppDBInterface - """ - def __init__(self, log_level=logging.INFO, hosts=None): - """ - Constructor. - """ - class_name = self.__class__.__name__ - self.logger = logging.getLogger(class_name) - self.logger.setLevel(log_level) - self.logger.info('Starting {}'.format(class_name)) - - if hosts is not None: - self.hosts = hosts - else: - self.hosts = appscale_info.get_db_ips() - - remaining_retries = INITIAL_CONNECT_RETRIES - while True: - try: - self.cluster = Cluster(self.hosts, default_retry_policy=BASIC_RETRIES, - load_balancing_policy=LB_POLICY) - self.session = self.cluster.connect(KEYSPACE) - self.tornado_cassandra = TornadoCassandra(self.session) - break - except cassandra.cluster.NoHostAvailable as connection_error: - remaining_retries -= 1 - if remaining_retries < 0: - raise connection_error - time.sleep(3) - - self.session.default_consistency_level = ConsistencyLevel.QUORUM - self.prepared_statements = {} - - # Provide synchronous version of some async methods - self.batch_get_entity_sync = tornado_synchronous(self.batch_get_entity) - self.batch_put_entity_sync = tornado_synchronous(self.batch_put_entity) - self.batch_delete_sync = tornado_synchronous(self.batch_delete) - self.valid_data_version_sync = tornado_synchronous(self.valid_data_version) - self.range_query_sync = tornado_synchronous(self.range_query) - self.get_metadata_sync = tornado_synchronous(self.get_metadata) - self.set_metadata_sync = tornado_synchronous(self.set_metadata) - self.delete_table_sync = tornado_synchronous(self.delete_table) - - def close(self): - """ Close all sessions and connections to Cassandra. """ - self.cluster.shutdown() - - @gen.coroutine - def batch_get_entity(self, table_name, row_keys, column_names): - """ - Takes in batches of keys and retrieves their corresponding rows. - - Args: - table_name: The table to access - row_keys: A list of keys to access - column_names: A list of columns to access - Returns: - A dictionary of rows and columns/values of those rows. The format - looks like such: {key:{column_name:value,...}} - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the batch_get could not be performed due to - an error with Cassandra. - """ - if not isinstance(table_name, str): raise TypeError("Expected a str") - if not isinstance(column_names, list): raise TypeError("Expected a list") - if not isinstance(row_keys, list): raise TypeError("Expected a list") - - row_keys_bytes = [bytearray(row_key) for row_key in row_keys] - - statement = 'SELECT * FROM "{table}" '\ - 'WHERE {key} IN %s and {column} IN %s'.format( - table=table_name, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - ) - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - - results = [] - # Split the rows up into chunks to reduce the likelihood of timeouts. - chunk_indexes = [ - (n, n + ENTITY_FETCH_THRESHOLD) - for n in xrange(0, len(row_keys_bytes), ENTITY_FETCH_THRESHOLD)] - - # TODO: This can be made more efficient by maintaining a constant number - # of concurrent requests rather than waiting for each batch to complete. - for start, end in chunk_indexes: - parameters = (ValueSequence(row_keys_bytes[start:end]), - ValueSequence(column_names)) - try: - batch_results = yield self.tornado_cassandra.execute( - query, parameters=parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during batch_get_entity' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - results.extend(list(batch_results)) - - results_dict = {row_key: {} for row_key in row_keys} - for (key, column, value) in results: - if key not in results_dict: - results_dict[key] = {} - - results_dict[key][column] = value - - raise gen.Return(results_dict) - - @gen.coroutine - def batch_put_entity(self, table_name, row_keys, column_names, cell_values, - ttl=None): - """ - Allows callers to store multiple rows with a single call. A row can - have multiple columns and values with them. We refer to each row as - an entity. - - Args: - table_name: The table to mutate - row_keys: A list of keys to store on - column_names: A list of columns to mutate - cell_values: A dict of key/value pairs - ttl: The number of seconds to keep the row. - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the batch_put could not be performed due to - an error with Cassandra. - """ - if not isinstance(table_name, str): - raise TypeError("Expected a str") - if not isinstance(column_names, list): - raise TypeError("Expected a list") - if not isinstance(row_keys, list): - raise TypeError("Expected a list") - if not isinstance(cell_values, dict): - raise TypeError("Expected a dict") - - insert_str = ( - 'INSERT INTO "{table}" ({key}, {column}, {value}) ' - 'VALUES (?, ?, ?)' - ).format(table=table_name, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE) - - if ttl is not None: - insert_str += 'USING TTL {}'.format(ttl) - - statement = self.session.prepare(insert_str) - - statements_and_params = [] - for row_key in row_keys: - for column in column_names: - params = (bytearray(row_key), column, - bytearray(cell_values[row_key][column])) - statements_and_params.append((statement, params)) - - try: - yield [ - self.tornado_cassandra.execute(statement, parameters=params) - for statement, params in statements_and_params - ] - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during batch_put_entity' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - def prepare_insert(self, table): - """ Prepare an insert statement. - - Args: - table: A string containing the table name. - Returns: - A PreparedStatement object. - """ - statement = ( - 'INSERT INTO "{table}" ({key}, {column}, {value}) ' - 'VALUES (?, ?, ?) ' - 'USING TIMESTAMP ?' - ).format(table=table, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE) - - if statement not in self.prepared_statements: - self.prepared_statements[statement] = self.session.prepare(statement) - - return self.prepared_statements[statement] - - def prepare_delete(self, table): - """ Prepare a delete statement. - - Args: - table: A string containing the table name. - Returns: - A PreparedStatement object. - """ - statement = ( - 'DELETE FROM "{table}" ' - 'USING TIMESTAMP ? ' - 'WHERE {key} = ?' - ).format(table=table, key=ThriftColumn.KEY) - - if statement not in self.prepared_statements: - self.prepared_statements[statement] = self.session.prepare(statement) - - return self.prepared_statements[statement] - - @gen.coroutine - def normal_batch(self, mutations, txid): - """ Use Cassandra's native batch statement to apply mutations atomically. - - Args: - mutations: A list of dictionaries representing mutations. - txid: An integer specifying a transaction ID. - """ - self.logger.debug('Normal batch: {} mutations'.format(len(mutations))) - batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, - retry_policy=BASIC_RETRIES) - prepared_statements = {'insert': {}, 'delete': {}} - for mutation in mutations: - table = mutation['table'] - - if table == 'group_updates': - key = mutation['key'] - insert = ( - 'INSERT INTO group_updates (group, last_update) ' - 'VALUES (%(group)s, %(last_update)s) ' - 'USING TIMESTAMP %(timestamp)s' - ) - parameters = {'group': key, 'last_update': mutation['last_update'], - 'timestamp': get_write_time(txid)} - batch.add(insert, parameters) - continue - - if mutation['operation'] == Operations.PUT: - if table not in prepared_statements['insert']: - prepared_statements['insert'][table] = self.prepare_insert(table) - values = mutation['values'] - for column in values: - batch.add( - prepared_statements['insert'][table], - (bytearray(mutation['key']), column, bytearray(values[column]), - get_write_time(txid)) - ) - elif mutation['operation'] == Operations.DELETE: - if table not in prepared_statements['delete']: - prepared_statements['delete'][table] = self.prepare_delete(table) - batch.add( - prepared_statements['delete'][table], - (get_write_time(txid), bytearray(mutation['key'])) - ) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Unable to apply batch' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - def statements_for_mutations(self, mutations, txid): - """ Generates Cassandra statements for a list of mutations. - - Args: - mutations: A list of dictionaries representing mutations. - txid: An integer specifying a transaction ID. - Returns: - A list of tuples containing Cassandra statements and parameters. - """ - prepared_statements = {'insert': {}, 'delete': {}} - statements_and_params = [] - for mutation in mutations: - table = mutation['table'] - - if table == 'group_updates': - key = mutation['key'] - insert = ( - 'INSERT INTO group_updates (group, last_update) ' - 'VALUES (%(group)s, %(last_update)s) ' - 'USING TIMESTAMP %(timestamp)s' - ) - parameters = {'group': key, 'last_update': mutation['last_update'], - 'timestamp': get_write_time(txid)} - statements_and_params.append((SimpleStatement(insert), parameters)) - continue - - if mutation['operation'] == Operations.PUT: - if table not in prepared_statements['insert']: - prepared_statements['insert'][table] = self.prepare_insert(table) - values = mutation['values'] - for column in values: - params = (bytearray(mutation['key']), column, - bytearray(values[column]), get_write_time(txid)) - statements_and_params.append( - (prepared_statements['insert'][table], params)) - elif mutation['operation'] == Operations.DELETE: - if table not in prepared_statements['delete']: - prepared_statements['delete'][table] = self.prepare_delete(table) - params = (get_write_time(txid), bytearray(mutation['key'])) - statements_and_params.append( - (prepared_statements['delete'][table], params)) - - return statements_and_params - - @gen.coroutine - def apply_mutations(self, mutations, txid): - """ Apply mutations across tables. - - Args: - mutations: A list of dictionaries representing mutations. - txid: An integer specifying a transaction ID. - """ - statements_and_params = self.statements_for_mutations(mutations, txid) - yield [ - self.tornado_cassandra.execute(statement, parameters=params) - for statement, params in statements_and_params - ] - - @gen.coroutine - def large_batch(self, app, mutations, entity_changes, txn): - """ Insert or delete multiple rows across tables in an atomic statement. - - Args: - app: A string containing the application ID. - mutations: A list of dictionaries representing mutations. - entity_changes: A list of changes at the entity level. - txn: A transaction ID handler. - Raises: - FailedBatch if a concurrent process modifies the batch status. - AppScaleDBConnectionError if a database connection error was encountered. - """ - self.logger.debug('Large batch: transaction {}, {} mutations'. - format(txn, len(mutations))) - large_batch = LargeBatch(self.session, app, txn) - try: - yield large_batch.start() - except FailedBatch as batch_error: - raise BatchNotApplied(str(batch_error)) - - insert_item = ( - 'INSERT INTO batches (app, transaction, namespace, ' - ' path, old_value, new_value) ' - 'VALUES (?, ?, ?, ?, ?, ?)' - ) - insert_statement = self.session.prepare(insert_item) - - statements_and_params = [] - for entity_change in entity_changes: - old_value = None - if entity_change['old'] is not None: - old_value = bytearray(entity_change['old'].Encode()) - new_value = None - if entity_change['new'] is not None: - new_value = bytearray(entity_change['new'].Encode()) - - parameters = (app, txn, entity_change['key'].name_space(), - bytearray(entity_change['key'].path().Encode()), old_value, - new_value) - statements_and_params.append((insert_statement, parameters)) - - try: - yield [ - self.tornado_cassandra.execute(statement, parameters=params) - for statement, params in statements_and_params - ] - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Unable to write large batch log' - logger.exception(message) - raise BatchNotApplied(message) - - # Since failing after this point is expensive and time consuming, retry - # operations to make a failure less likely. - custom_retry_coroutine = retry_raw_coroutine( - backoff_threshold=5, retrying_timeout=10, - retry_on_exception=dbconstants.TRANSIENT_CASSANDRA_ERRORS) - - persistent_apply_batch = custom_retry_coroutine(large_batch.set_applied) - try: - yield persistent_apply_batch() - except FailedBatch as batch_error: - raise AppScaleDBConnectionError(str(batch_error)) - - persistent_apply_mutations = custom_retry_coroutine(self.apply_mutations) - try: - yield persistent_apply_mutations(mutations, txn) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during large batch' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - try: - yield large_batch.cleanup() - except FailedBatch: - # This should not raise an exception since the batch is already applied. - logger.exception('Unable to clear batch status') - - clear_batch = ( - 'DELETE FROM batches ' - 'WHERE app = %(app)s AND transaction = %(transaction)s' - ) - parameters = {'app': app, 'transaction': txn} - try: - yield self.tornado_cassandra.execute(clear_batch, parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - logger.exception('Unable to clear batch log') - - @gen.coroutine - def batch_delete(self, table_name, row_keys, column_names=()): - """ - Remove a set of rows corresponding to a set of keys. - - Args: - table_name: Table to delete rows from - row_keys: A list of keys to remove - column_names: Not used - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the batch_delete could not be performed due - to an error with Cassandra. - """ - if not isinstance(table_name, str): raise TypeError("Expected a str") - if not isinstance(row_keys, list): raise TypeError("Expected a list") - - row_keys_bytes = [bytearray(row_key) for row_key in row_keys] - - statement = 'DELETE FROM "{table}" WHERE {key} IN %s'.\ - format( - table=table_name, - key=ThriftColumn.KEY - ) - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - parameters = (ValueSequence(row_keys_bytes),) - - try: - yield self.tornado_cassandra.execute(query, parameters=parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during batch_delete' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def delete_table(self, table_name): - """ - Drops a given table (aka column family in Cassandra) - - Args: - table_name: A string name of the table to drop - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the delete_table could not be performed due - to an error with Cassandra. - """ - if not isinstance(table_name, str): raise TypeError("Expected a str") - - statement = 'DROP TABLE IF EXISTS "{table}"'.format(table=table_name) - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - - try: - yield self.tornado_cassandra.execute(query) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during delete_table' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def create_table(self, table_name, column_names): - """ - Creates a table if it doesn't already exist. - - Args: - table_name: The column family name - column_names: Not used but here to match the interface - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the create_table could not be performed due - to an error with Cassandra. - """ - if not isinstance(table_name, str): raise TypeError("Expected a str") - if not isinstance(column_names, list): raise TypeError("Expected a list") - - statement = ( - 'CREATE TABLE IF NOT EXISTS "{table}" (' - '{key} blob,' - '{column} text,' - '{value} blob,' - 'PRIMARY KEY ({key}, {column})' - ') WITH COMPACT STORAGE' - ).format( - table=table_name, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE - ) - query = SimpleStatement(statement, retry_policy=NO_RETRIES) - - try: - yield self.tornado_cassandra.execute(query, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating a table. Waiting {} ' - 'seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise AppScaleDBConnectionError('Exception during create_table') - except (error for error in dbconstants.TRANSIENT_CASSANDRA_ERRORS - if error != cassandra.OperationTimedOut): - message = 'Exception during create_table' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def range_query(self, - table_name, - column_names, - start_key, - end_key, - limit, - offset=0, - start_inclusive=True, - end_inclusive=True, - keys_only=False): - """ - Gets a dense range ordered by keys. Returns an ordered list of - a dictionary of [key:{column1:value1, column2:value2},...] - or a list of keys if keys only. - - Args: - table_name: Name of table to access - column_names: Columns which get returned within the key range - start_key: String for which the query starts at - end_key: String for which the query ends at - limit: Maximum number of results to return - offset: Cuts off these many from the results [offset:] - start_inclusive: Boolean if results should include the start_key - end_inclusive: Boolean if results should include the end_key - keys_only: Boolean if to only keys and not values - Raises: - TypeError: If an argument passed in was not of the expected type. - AppScaleDBConnectionError: If the range_query could not be performed due - to an error with Cassandra. - Returns: - An ordered list of dictionaries of key=>columns/values - """ - if not isinstance(table_name, str): - raise TypeError('table_name must be a string') - if not isinstance(column_names, list): - raise TypeError('column_names must be a list') - if not isinstance(start_key, str): - raise TypeError('start_key must be a string') - if not isinstance(end_key, str): - raise TypeError('end_key must be a string') - if not isinstance(limit, (int, long)) and limit is not None: - raise TypeError('limit must be int, long, or NoneType') - if not isinstance(offset, (int, long)): - raise TypeError('offset must be int or long') - - if start_inclusive: - gt_compare = '>=' - else: - gt_compare = '>' - - if end_inclusive: - lt_compare = '<=' - else: - lt_compare = '<' - - query_limit = '' - if limit is not None: - query_limit = 'LIMIT {}'.format(len(column_names) * limit) - - statement = ( - 'SELECT * FROM "{table}" WHERE ' - 'token({key}) {gt_compare} %s AND ' - 'token({key}) {lt_compare} %s AND ' - '{column} IN %s ' - '{limit} ' - 'ALLOW FILTERING' - ).format(table=table_name, - key=ThriftColumn.KEY, - gt_compare=gt_compare, - lt_compare=lt_compare, - column=ThriftColumn.COLUMN_NAME, - limit=query_limit) - - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - parameters = (bytearray(start_key), bytearray(end_key), - ValueSequence(column_names)) - - try: - results = yield self.tornado_cassandra.execute( - query, parameters=parameters) - - results_list = [] - current_item = {} - current_key = None - for (key, column, value) in results: - if keys_only: - results_list.append(key) - continue - - if key != current_key: - if current_item: - results_list.append({current_key: current_item}) - current_item = {} - current_key = key - - current_item[column] = value - if current_item: - results_list.append({current_key: current_item}) - raise gen.Return(results_list[offset:]) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception during range_query' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def get_metadata(self, key): - """ Retrieve a value from the datastore metadata table. - - Args: - key: A string containing the key to fetch. - Returns: - A string containing the value or None if the key is not present. - """ - statement = ( - 'SELECT {value} FROM "{table}" ' - 'WHERE {key} = %s ' - 'AND {column} = %s' - ).format( - value=ThriftColumn.VALUE, - table=dbconstants.DATASTORE_METADATA_TABLE, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME - ) - try: - results = yield self.tornado_cassandra.execute( - statement, (bytearray(key), key)) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Unable to fetch {} from datastore metadata'.format(key) - logger.exception(message) - raise AppScaleDBConnectionError(message) - - try: - raise gen.Return(results[0].value) - except IndexError: - return - - @gen.coroutine - def set_metadata(self, key, value): - """ Set a datastore metadata value. - - Args: - key: A string containing the key to set. - value: A string containing the value to set. - """ - if not isinstance(key, str): - raise TypeError('key should be a string') - - if not isinstance(value, str): - raise TypeError('value should be a string') - - statement = ( - 'INSERT INTO "{table}" ({key}, {column}, {value}) ' - 'VALUES (%(key)s, %(column)s, %(value)s)' - ).format( - table=dbconstants.DATASTORE_METADATA_TABLE, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE - ) - parameters = {'key': bytearray(key), - 'column': key, - 'value': bytearray(value)} - try: - yield self.tornado_cassandra.execute(statement, parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Unable to set datastore metadata for {}'.format(key) - logger.exception(message) - raise AppScaleDBConnectionError(message) - except cassandra.InvalidRequest: - yield self.create_table(dbconstants.DATASTORE_METADATA_TABLE, - dbconstants.DATASTORE_METADATA_SCHEMA) - yield self.tornado_cassandra.execute(statement, parameters) - - @gen.coroutine - def valid_data_version(self): - """ Checks whether or not the data layout can be used. - - Returns: - A boolean. - """ - try: - version = yield self.get_metadata(VERSION_INFO_KEY) - except cassandra.InvalidRequest: - raise gen.Return(False) - - is_expected_version = ( - version is not None and - float(version) == CURRENT_VERSION - ) - raise gen.Return(is_expected_version) - - @gen.coroutine - def group_updates(self, groups): - """ Fetch the latest transaction IDs for each group. - - Args: - groups: An interable containing encoded Reference objects. - Returns: - A set of integers specifying transaction IDs. - """ - query = 'SELECT * FROM group_updates WHERE group=%s' - results = yield [ - self.tornado_cassandra.execute(query, [bytearray(group)]) - for group in groups - ] - updates = set(rows[0].last_update for rows in results if rows) - raise gen.Return(updates) - - @gen.coroutine - def start_transaction(self, app, txid, is_xg, in_progress): - """ Persist transaction metadata. - - Args: - app: A string containing an application ID. - txid: An integer specifying the transaction ID. - is_xg: A boolean specifying that the transaction is cross-group. - in_progress: An iterable containing transaction IDs. - """ - if in_progress: - in_progress_bin = bytearray( - struct.pack('q' * len(in_progress), *in_progress)) - else: - in_progress_bin = None - - insert = ( - 'INSERT INTO transactions (txid_hash, operation, namespace, path,' - ' start_time, is_xg, in_progress)' - 'VALUES (%(txid_hash)s, %(operation)s, %(namespace)s, %(path)s,' - ' %(start_time)s, %(is_xg)s, %(in_progress)s)' - 'USING TTL {ttl}' - ).format(ttl=dbconstants.MAX_TX_DURATION * 2) - parameters = {'txid_hash': tx_partition(app, txid), - 'operation': TxnActions.START, - 'namespace': '', - 'path': bytearray(''), - 'start_time': datetime.datetime.utcnow(), - 'is_xg': is_xg, - 'in_progress': in_progress_bin} - - try: - yield self.tornado_cassandra.execute(insert, parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while starting a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def put_entities_tx(self, app, txid, entities): - """ Update transaction metadata with new put operations. - - Args: - app: A string containing an application ID. - txid: An integer specifying the transaction ID. - entities: A list of entities that will be put upon commit. - """ - batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, - retry_policy=BASIC_RETRIES) - insert = self.session.prepare(""" - INSERT INTO transactions (txid_hash, operation, namespace, path, entity) - VALUES (?, ?, ?, ?, ?) - USING TTL {ttl} - """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) - - for entity in entities: - args = (tx_partition(app, txid), - TxnActions.MUTATE, - entity.key().name_space(), - bytearray(entity.key().path().Encode()), - bytearray(entity.Encode())) - batch.add(insert, args) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while putting entities in a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def delete_entities_tx(self, app, txid, entity_keys): - """ Update transaction metadata with new delete operations. - - Args: - app: A string containing an application ID. - txid: An integer specifying the transaction ID. - entity_keys: A list of entity keys that will be deleted upon commit. - """ - batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, - retry_policy=BASIC_RETRIES) - insert = self.session.prepare(""" - INSERT INTO transactions (txid_hash, operation, namespace, path, entity) - VALUES (?, ?, ?, ?, ?) - USING TTL {ttl} - """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) - - for key in entity_keys: - # The None value overwrites previous puts. - args = (tx_partition(app, txid), - TxnActions.MUTATE, - key.name_space(), - bytearray(key.path().Encode()), - None) - batch.add(insert, args) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while deleting entities in a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def transactional_tasks_count(self, app, txid): - """ Count the number of existing tasks associated with the transaction. - - Args: - app: A string specifying an application ID. - txid: An integer specifying a transaction ID. - Returns: - An integer specifying the number of existing tasks. - """ - select = ( - 'SELECT count(*) FROM transactions ' - 'WHERE txid_hash = %(txid_hash)s ' - 'AND operation = %(operation)s' - ) - parameters = {'txid_hash': tx_partition(app, txid), - 'operation': TxnActions.ENQUEUE_TASK} - try: - result = yield self.tornado_cassandra.execute(select, parameters) - raise gen.Return(result[0].count) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while fetching task count' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def add_transactional_tasks(self, app, txid, tasks, service_id, version_id): - """ Add tasks to be enqueued upon the completion of a transaction. - - Args: - app: A string specifying an application ID. - txid: An integer specifying a transaction ID. - tasks: A list of TaskQueueAddRequest objects. - service_id: A string specifying the client's service ID. - version_id: A string specifying the client's version ID. - """ - batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, - retry_policy=BASIC_RETRIES) - query_str = ( - 'INSERT INTO transactions (txid_hash, operation, namespace, path, task) ' - 'VALUES (?, ?, ?, ?, ?) ' - 'USING TTL {ttl}' - ).format(ttl=dbconstants.MAX_TX_DURATION * 2) - insert = self.session.prepare(query_str) - - for task in tasks: - task.clear_transaction() - - # The path for the task entry doesn't matter as long as it's unique. - path = bytearray(str(uuid.uuid4())) - - task_payload = '_'.join([service_id, version_id, task.Encode()]) - args = (tx_partition(app, txid), - TxnActions.ENQUEUE_TASK, - '', - path, - task_payload) - batch.add(insert, args) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while adding tasks in a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def record_reads(self, app, txid, group_keys): - """ Keep track of which entity groups were read in a transaction. - - Args: - app: A string specifying an application ID. - txid: An integer specifying a transaction ID. - group_keys: An iterable containing Reference objects. - """ - batch = BatchStatement(consistency_level=ConsistencyLevel.QUORUM, - retry_policy=BASIC_RETRIES) - insert = self.session.prepare(""" - INSERT INTO transactions (txid_hash, operation, namespace, path) - VALUES (?, ?, ?, ?) - USING TTL {ttl} - """.format(ttl=dbconstants.MAX_TX_DURATION * 2)) - - for group_key in group_keys: - if not isinstance(group_key, entity_pb.Reference): - group_key = entity_pb.Reference(group_key) - - args = (tx_partition(app, txid), - TxnActions.GET, - group_key.name_space(), - bytearray(group_key.path().Encode())) - batch.add(insert, args) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while recording reads in a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - @gen.coroutine - def get_transaction_metadata(self, app, txid): - """ Fetch transaction state. - - Args: - app: A string specifying an application ID. - txid: An integer specifying a transaction ID. - Returns: - A dictionary containing transaction state. - """ - select = ( - 'SELECT namespace, operation, path, start_time, is_xg, in_progress, ' - ' entity, task ' - 'FROM transactions ' - 'WHERE txid_hash = %(txid_hash)s ' - ) - parameters = {'txid_hash': tx_partition(app, txid)} - try: - results = yield self.tornado_cassandra.execute(select, parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - message = 'Exception while inserting entities in a transaction' - logger.exception(message) - raise AppScaleDBConnectionError(message) - - metadata = {'puts': {}, 'deletes': [], 'tasks': [], 'reads': set()} - for result in results: - if result.operation == TxnActions.START: - metadata['start'] = result.start_time - metadata['is_xg'] = result.is_xg - metadata['in_progress'] = set() - if metadata['in_progress'] is not None: - metadata['in_progress'] = set( - struct.unpack('q' * int(len(result.in_progress) / 8), - result.in_progress)) - if result.operation == TxnActions.MUTATE: - key = create_key(app, result.namespace, result.path) - if result.entity is None: - metadata['deletes'].append(key) - else: - metadata['puts'][key.Encode()] = result.entity - if result.operation == TxnActions.GET: - group_key = create_key(app, result.namespace, result.path) - metadata['reads'].add(group_key.Encode()) - if result.operation == TxnActions.ENQUEUE_TASK: - service_id, version_id, task_pb = result.task.split('_', 2) - task_metadata = { - 'service_id': service_id, - 'version_id': version_id, - 'task': taskqueue_service_pb.TaskQueueAddRequest(task_pb)} - metadata['tasks'].append(task_metadata) - raise gen.Return(metadata) diff --git a/AppDB/appscale/datastore/cassandra_env/constants.py b/AppDB/appscale/datastore/cassandra_env/constants.py deleted file mode 100644 index f4399c5462..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/constants.py +++ /dev/null @@ -1,8 +0,0 @@ -""" Cassandra-specific constants. """ -from cassandra.policies import DCAwareRoundRobinPolicy - -# The current data layout version. -CURRENT_VERSION = 2.0 - -# The load balancing policy to use when connecting to a cluster. -LB_POLICY = DCAwareRoundRobinPolicy() diff --git a/AppDB/appscale/datastore/cassandra_env/entity_id_allocator.py b/AppDB/appscale/datastore/cassandra_env/entity_id_allocator.py deleted file mode 100644 index f2e0615324..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/entity_id_allocator.py +++ /dev/null @@ -1,317 +0,0 @@ -import sys -import uuid - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from cassandra.query import ( - ConsistencyLevel, - SimpleStatement -) -from tornado import gen - -from appscale.datastore.cassandra_env.retry_policies import NO_RETRIES -from appscale.datastore.cassandra_env.tornado_cassandra import TornadoCassandra -from appscale.datastore.dbconstants import ( - AppScaleBadArg, - AppScaleDBConnectionError, - TRANSIENT_CASSANDRA_ERRORS -) -from appscale.datastore.utils import logger - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore.datastore_stub_util import ( - _MAX_SCATTERED_COUNTER, - _MAX_SEQUENTIAL_COUNTER, - ToScatteredId -) - -# The number of scattered IDs the datastore should reserve at a time. -DEFAULT_RESERVATION_SIZE = 10000 - - -class ReservationFailed(Exception): - """ Indicates that a block of IDs could not be reserved. """ - pass - - -class EntityIDAllocator(object): - """ Keeps track of reserved entity IDs for a project. """ - - def __init__(self, session, project, scattered=False): - """ Creates a new EntityIDAllocator object. - - Args: - session: A cassandra-drivers session object. - project: A string specifying a project ID. - """ - self.project = project - self.session = session - self.tornado_cassandra = TornadoCassandra(self.session) - self.scattered = scattered - if scattered: - self.max_allowed = _MAX_SCATTERED_COUNTER - else: - self.max_allowed = _MAX_SEQUENTIAL_COUNTER - - # Allows the allocator to avoid making unnecessary Cassandra requests when - # setting the minimum counter value. - self._last_reserved_cache = None - - @gen.coroutine - def _ensure_entry(self, retries=5): - """ Ensures an entry exists for a reservation. - - Args: - retries: The number of times to retry the insert. - Raises: - AppScaleDBConnectionError if the insert is tried too many times. - """ - if retries < 0: - raise AppScaleDBConnectionError('Unable to create reserved_ids entry') - - logger.debug('Creating reserved_ids entry for {}'.format(self.project)) - insert = SimpleStatement(""" - INSERT INTO reserved_ids (project, scattered, last_reserved, op_id) - VALUES (%(project)s, %(scattered)s, 0, uuid()) - IF NOT EXISTS - """, retry_policy=NO_RETRIES) - parameters = {'project': self.project, 'scattered': self.scattered} - try: - yield self.tornado_cassandra.execute(insert, parameters) - except TRANSIENT_CASSANDRA_ERRORS: - yield self._ensure_entry(retries=retries-1) - - @gen.coroutine - def _get_last_reserved(self): - """ Retrieves the last entity ID that was reserved. - - Returns: - An integer specifying an entity ID. - """ - get_reserved = SimpleStatement(""" - SELECT last_reserved - FROM reserved_ids - WHERE project = %(project)s - AND scattered = %(scattered)s - """, consistency_level=ConsistencyLevel.SERIAL) - parameters = {'project': self.project, 'scattered': self.scattered} - try: - results = yield self.tornado_cassandra.execute(get_reserved, parameters) - result = results[0] - except IndexError: - yield self._ensure_entry() - last_reserved = yield self._get_last_reserved() - raise gen.Return(last_reserved) - - self._last_reserved_cache = result.last_reserved - raise gen.Return(result.last_reserved) - - @gen.coroutine - def _get_last_op_id(self): - """ Retrieve the op_id that was last written during a reservation. - - Returns: - A UUID4 containing the latest op_id. - """ - get_op_id = SimpleStatement(""" - SELECT op_id - FROM reserved_ids - WHERE project = %(project)s - AND scattered = %(scattered)s - """, consistency_level=ConsistencyLevel.SERIAL) - parameters = {'project': self.project, 'scattered': self.scattered} - results = yield self.tornado_cassandra.execute(get_op_id, parameters) - raise gen.Return(results[0].op_id) - - @gen.coroutine - def _set_reserved(self, last_reserved, new_reserved): - """ Update the last reserved value to allocate that block. - - Args: - last_reserved: An integer specifying the last reserved value. - new_reserved: An integer specifying the new reserved value. - Raises: - ReservationFailed if the update statement fails. - """ - op_id = uuid.uuid4() - set_reserved = SimpleStatement(""" - UPDATE reserved_ids - SET last_reserved = %(new_reserved)s, - op_id = %(op_id)s - WHERE project = %(project)s - AND scattered = %(scattered)s - IF last_reserved = %(last_reserved)s - """, retry_policy=NO_RETRIES) - parameters = { - 'last_reserved': last_reserved, 'new_reserved': new_reserved, - 'project': self.project, 'scattered': self.scattered, 'op_id': op_id} - try: - result = yield self.tornado_cassandra.execute(set_reserved, parameters) - except TRANSIENT_CASSANDRA_ERRORS as error: - last_op_id = yield self._get_last_op_id() - if last_op_id == op_id: - return - raise ReservationFailed(str(error)) - - if not result.was_applied: - raise ReservationFailed('Last reserved value changed') - - self._last_reserved_cache = new_reserved - - @gen.coroutine - def allocate_size(self, size, retries=5, min_counter=None): - """ Reserve a block of IDs for this project. - - Args: - size: The number of IDs to reserve. - retries: The number of times to retry the reservation. - min_counter: The minimum counter value that should be reserved. - Returns: - A tuple of integers specifying the start and end ID. - Raises: - AppScaleDBConnectionError if the reservation is tried too many times. - AppScaleBadArg if the ID space has been exhausted. - """ - if retries < 0: - raise AppScaleDBConnectionError('Unable to reserve new block') - - try: - last_reserved = yield self._get_last_reserved() - except TRANSIENT_CASSANDRA_ERRORS: - raise AppScaleDBConnectionError('Unable to get last reserved ID') - - if min_counter is None: - new_reserved = last_reserved + size - else: - new_reserved = max(last_reserved, min_counter) + size - - if new_reserved > self.max_allowed: - raise AppScaleBadArg('Exceeded maximum allocated IDs') - - try: - yield self._set_reserved(last_reserved, new_reserved) - except ReservationFailed: - start_id, end_id = yield self.allocate_size(size, retries=retries-1) - raise gen.Return((start_id, end_id)) - - start_id = last_reserved + 1 - end_id = new_reserved - raise gen.Return((start_id, end_id)) - - @gen.coroutine - def allocate_max(self, max_id, retries=5): - """ Reserves all IDs up to the one given. - - Args: - max_id: An integer specifying the maximum ID to allocated. - retries: The number of times to retry the reservation. - Returns: - A tuple of integers specifying the start and end ID. - Raises: - AppScaleDBConnectionError if the reservation is tried too many times. - AppScaleBadArg if the ID space has been exhausted. - """ - if retries < 0: - raise AppScaleDBConnectionError('Unable to reserve new block') - - if max_id > self.max_allowed: - raise AppScaleBadArg('Exceeded maximum allocated IDs') - - try: - last_reserved = yield self._get_last_reserved() - except TRANSIENT_CASSANDRA_ERRORS: - raise AppScaleDBConnectionError('Unable to get last reserved ID') - - # Instead of returning an error, the API returns an invalid range. - if last_reserved >= max_id: - raise gen.Return((last_reserved + 1, last_reserved)) - - try: - yield self._set_reserved(last_reserved, max_id) - except ReservationFailed: - start_id, end_id = yield self.allocate_max(max_id, retries=retries-1) - raise gen.Return((start_id, end_id)) - - start_id = last_reserved + 1 - end_id = max_id - raise gen.Return((start_id, end_id)) - - @gen.coroutine - def set_min_counter(self, counter): - """ Ensures the counter is at least as large as the given value. - - Args: - counter: An integer specifying the minimum counter value. - """ - if (self._last_reserved_cache is not None and - self._last_reserved_cache >= counter): - return - - yield self.allocate_max(counter) - - -class ScatteredAllocator(EntityIDAllocator): - """ An iterator that generates evenly-distributed entity IDs. """ - def __init__(self, session, project): - """ Creates a new ScatteredAllocator instance. Each project should just - have one instance since it reserves a large block of IDs at a time. - - Args: - session: A cassandra-driver session. - project: A string specifying a project ID. - """ - super(ScatteredAllocator, self).__init__(session, project, scattered=True) - - # The range that this datastore has already reserved for scattered IDs. - self.start_id = None - self.end_id = None - - def __iter__(self): - """ Returns a new iterator object. """ - return self - - @gen.coroutine - def next(self): - """ Generates a new entity ID. - - Returns: - An integer specifying an entity ID. - """ - # This function will require a tornado lock when made asynchronous. - if self.start_id is None or self.start_id > self.end_id: - size = DEFAULT_RESERVATION_SIZE - self.start_id, self.end_id = yield self.allocate_size(size) - - next_id = ToScatteredId(self.start_id) - self.start_id += 1 - raise gen.Return(next_id) - - @gen.coroutine - def set_min_counter(self, counter): - """ Ensures the counter is at least as large as the given value. - - Args: - counter: An integer specifying the minimum counter value. - """ - # If there's no chance the ID could be allocated, do nothing. - if self.start_id is not None and self.start_id >= counter: - return - - # If the ID is in the allocated block, adjust the block. - if self.end_id is not None and self.end_id > counter: - self.start_id = counter - - # If this server has never allocated a block, adjust the minimum for - # future blocks. - if self.start_id is None: - if (self._last_reserved_cache is not None and - self._last_reserved_cache >= counter): - return - - yield self.allocate_max(counter) - return - - # If this server has allocated a block, but the relevant ID is greater than - # the end ID, get a new block that starts at least as high as the ID. - self.start_id, self.end_id = yield self.allocate_size( - DEFAULT_RESERVATION_SIZE, min_counter=counter - ) diff --git a/AppDB/appscale/datastore/cassandra_env/get_token.py b/AppDB/appscale/datastore/cassandra_env/get_token.py deleted file mode 100644 index 3d1ef242a5..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/get_token.py +++ /dev/null @@ -1,166 +0,0 @@ -from __future__ import division -import argparse - -from appscale.common import appscale_info -from cassandra.cluster import Cluster -from random import choice -from random import shuffle -from subprocess import check_output -from ..cassandra_env.cassandra_interface import KEYSPACE -from ..cassandra_env.cassandra_interface import NODE_TOOL -from ..cassandra_env.cassandra_interface import ThriftColumn -from ..cassandra_env.constants import LB_POLICY -from ..cassandra_env.retry_policies import BASIC_RETRIES -from ..dbconstants import APP_ENTITY_TABLE -from ..dbconstants import APP_ENTITY_SCHEMA -from ..dbconstants import ID_SEPARATOR -from ..dbconstants import KEY_DELIMITER -from ..dbconstants import KIND_SEPARATOR - - -class NoSampleKeys(Exception): - """ Indicates that nodetool did not provide any key samples. """ - pass - - -def is_entity(key): - """ Determines whether a given string is an entity key. - - Args: - key: A string containing a key from 'rangekeysample'. - Returns: - A boolean indicating whether or not the string is an entity key. - """ - key_parts = key.split(KEY_DELIMITER) - if len(key_parts) != 3: - return False - - last_part = key_parts[-1] - if not last_part.endswith(KIND_SEPARATOR): - return False - - last_part = last_part[:-len(KIND_SEPARATOR)] - if KIND_SEPARATOR in last_part: - return False - - return ':' in last_part - - -def get_kind_averages(keys): - """ Get an average size for each kind. - - Args: - keys: A list of dictionaries containing keys. - Returns: - A dictionary listing the average size of each kind. - """ - hosts = appscale_info.get_db_ips() - cluster = Cluster(hosts, default_retry_policy=BASIC_RETRIES, - load_balancing_policy=LB_POLICY) - session = cluster.connect(KEYSPACE) - - entities_by_kind = {} - for key_dict in keys: - key = key_dict['key'] - if is_entity(key): - key_parts = key.split(KEY_DELIMITER) - kind = key_parts[2].split(ID_SEPARATOR, 1)[0] - kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) - if kind_id not in entities_by_kind: - entities_by_kind[kind_id] = {'keys': [], 'size': 0, 'fetched': 0} - entities_by_kind[kind_id]['keys'].append(key) - - for kind_id, kind in entities_by_kind.iteritems(): - shuffle(kind['keys']) - - if not entities_by_kind: - return {} - - futures = [] - for _ in range(50): - kind = choice(entities_by_kind.keys()) - try: - key = entities_by_kind[kind]['keys'].pop() - except IndexError: - continue - - select = """ - SELECT {value} FROM "{table}" - WHERE {key}=%(key)s AND {column}=%(column)s - """.format(value=ThriftColumn.VALUE, table=APP_ENTITY_TABLE, - key=ThriftColumn.KEY, column=ThriftColumn.COLUMN_NAME) - parameters = {'key': bytearray(key), 'column': APP_ENTITY_SCHEMA[0]} - future = session.execute_async(select, parameters) - futures.append({'future': future, 'kind': kind}) - - for future_dict in futures: - future = future_dict['future'] - kind = future_dict['kind'] - try: - entity = future.result()[0].value - except IndexError: - continue - - entities_by_kind[kind]['size'] += len(entity) - entities_by_kind[kind]['fetched'] += 1 - - kind_averages = {} - for kind_id, kind in entities_by_kind.iteritems(): - try: - kind_averages[kind_id] = int(kind['size'] / kind['fetched']) - except ZeroDivisionError: - kind_averages[kind_id] = 0 - - return kind_averages - - -def get_sample(): - """ Gets a sorted sample of keys on this node. - - Returns: - A list of dictionaries containing keys. - """ - sample_output = check_output([NODE_TOOL, 'rangekeysample']) - keys = [{'key': key.strip().decode('hex'), 'size': 0} - for key in sample_output.splitlines()[1:]] - sorted(keys, key=lambda key: key['key']) - return keys - - -def main(): - """ Run appscale-get-token script. """ - parser = argparse.ArgumentParser( - description='Retrieve a Cassandra token owned by this node') - parser.add_argument('percentile', type=int) - args = parser.parse_args() - - keys = get_sample() - if not keys: - raise NoSampleKeys('There are no key samples available on this machine.') - - kind_averages = get_kind_averages(keys) - - for key_dict in keys: - key = key_dict['key'] - key_dict['size'] += len(key) - if not is_entity(key): - continue - - key_parts = key.split(KEY_DELIMITER) - kind = key_parts[2].split(ID_SEPARATOR, 1)[0] - kind_id = KEY_DELIMITER.join([key_parts[0], key_parts[1], kind]) - if kind_id in kind_averages: - key_dict['size'] += kind_averages[kind_id] - - total_size = sum(key['size'] for key in keys) - desired_size = int(total_size * (args.percentile / 100)) - - size_seen = 0 - for key in keys: - size_seen += key['size'] - if size_seen >= desired_size: - print(key['key'].encode('hex')) - return - - # If we still haven't reached the desired size, just select the last key. - print(keys[-1]['key'].encode('hex')) diff --git a/AppDB/appscale/datastore/cassandra_env/large_batch.py b/AppDB/appscale/datastore/cassandra_env/large_batch.py deleted file mode 100644 index 070c771536..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/large_batch.py +++ /dev/null @@ -1,423 +0,0 @@ -import sys -import uuid - -from cassandra.query import ConsistencyLevel -from cassandra.query import SimpleStatement -from tornado import gen - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore.cassandra_env.retry_policies import ( - BASIC_RETRIES, NO_RETRIES) -from appscale.datastore.cassandra_env.tornado_cassandra import TornadoCassandra -from appscale.datastore.cassandra_env.utils import ( - deletions_for_entity, mutations_for_entity) -from appscale.datastore.dbconstants import TRANSIENT_CASSANDRA_ERRORS -from appscale.datastore.utils import logger, tx_partition - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore import entity_pb - - -class BatchNotApplied(Exception): - """ Indicates that a large batch failed before it could be applied. """ - pass - - -class BatchNotFound(Exception): - """ Indicates that the batch status is not defined. """ - pass - - -class BatchNotOwned(Exception): - """ Indicates that a different process owns the batch. """ - pass - - -class FailedBatch(Exception): - """ Indicates that the datastore failed to apply a large batch. """ - pass - - -class LargeBatch(object): - def __init__(self, session, project, txid): - """ Create a new LargeBatch object. - - Args: - session: A cassandra-driver session. - project: A string specifying a project ID. - txid: An integer specifying a transaction ID. - """ - self.session = session - self.tornado_cassandra = TornadoCassandra(self.session) - self.project = project - self.txid = txid - - # Create an identifier so that it's possible to check if operations succeed - # after a timeout. - self.op_id = uuid.uuid4() - - # This value is used when claiming an existing failed batch. - self.read_op_id = None - - # Indicates if the batch has been applied. - self.applied = False - - @gen.coroutine - def is_applied(self, retries=5): - """ Fetch the status of the batch. - - Args: - retries: The number of times to retry after failures. - Returns: - A boolean indicating whether or not the batch has been applied. - Raises: - BatchNotFound if the batch cannot be found. - BatchNotOwned if a different process owns the batch. - """ - if self.applied: - raise gen.Return(True) - - get_status = """ - SELECT applied, op_id FROM batch_status - WHERE txid_hash = %(txid_hash)s - """ - query = SimpleStatement(get_status, retry_policy=BASIC_RETRIES, - consistency_level=ConsistencyLevel.SERIAL) - parameters = {'txid_hash': tx_partition(self.project, self.txid)} - - try: - results = yield self.tornado_cassandra.execute( - query, parameters=parameters) - result = results[0] - if result.op_id != self.op_id: - self.read_op_id = result.op_id - raise BatchNotOwned( - '{} does not match {}'.format(self.op_id, result.op_id)) - raise gen.Return(result.applied) - except TRANSIENT_CASSANDRA_ERRORS: - retries_left = retries - 1 - if retries_left < 0: - raise - - logger.debug('Unable to read batch status. Retrying.') - is_applied = yield self.is_applied(retries=retries_left) - raise gen.Return(is_applied) - except IndexError: - raise BatchNotFound( - 'Batch for {}:{} not found'.format(self.project, self.txid)) - - @gen.coroutine - def start(self, retries=5): - """ Mark the batch as being in progress. - - Args: - retries: The number of times to retry after failures. - Raises: - FailedBatch if the batch cannot be marked as being started. - """ - if retries < 0: - raise FailedBatch('Retries exhausted while starting batch') - - insert = SimpleStatement(""" - INSERT INTO batch_status (txid_hash, applied, op_id) - VALUES (%(txid_hash)s, False, %(op_id)s) - IF NOT EXISTS - """, retry_policy=NO_RETRIES) - parameters = {'txid_hash': tx_partition(self.project, self.txid), - 'op_id': self.op_id} - - try: - result = yield self.tornado_cassandra.execute(insert, parameters) - except TRANSIENT_CASSANDRA_ERRORS: - yield self.start(retries=retries-1) - return - - if result.was_applied: - return - - # Make sure this process was responsible for the insert. - try: - yield self.is_applied() - except (BatchNotOwned, TRANSIENT_CASSANDRA_ERRORS) as batch_failure: - raise FailedBatch(str(batch_failure)) - except BatchNotFound: - yield self.start(retries=retries-1) - return - - @gen.coroutine - def set_applied(self, retries=5): - """ Mark the batch as being applied. - - Args: - retries: The number of times to retry after failures. - Raises: - FailedBatch if the batch cannot be marked as applied. - """ - if retries < 0: - raise FailedBatch('Retries exhausted while updating batch') - - update_status = SimpleStatement(""" - UPDATE batch_status - SET applied = True - WHERE txid_hash = %(txid_hash)s - IF op_id = %(op_id)s - """, retry_policy=NO_RETRIES) - parameters = {'txid_hash': tx_partition(self.project, self.txid), - 'op_id': self.op_id} - - try: - result = yield self.tornado_cassandra.execute(update_status, parameters) - if result.was_applied: - self.applied = True - return - except TRANSIENT_CASSANDRA_ERRORS: - pass # Application is confirmed below. - - try: - self.applied = yield self.is_applied() - if self.applied: - return - yield self.set_applied(retries=retries-1) - return - except (BatchNotFound, BatchNotOwned, TRANSIENT_CASSANDRA_ERRORS) as error: - raise FailedBatch(str(error)) - - @gen.coroutine - def cleanup(self, retries=5): - """ Clean up the batch status entry. - - Args: - retries: The number of times to retry after failures. - Raises: - FailedBatch if the batch cannot be marked as applied. - """ - if retries < 0: - raise FailedBatch('Retries exhausted while cleaning up batch') - - clear_status = SimpleStatement(""" - DELETE FROM batch_status - WHERE txid_hash = %(txid_hash)s - IF op_id = %(op_id)s - """, retry_policy=NO_RETRIES) - parameters = {'txid_hash': tx_partition(self.project, self.txid), - 'op_id': self.op_id} - - try: - result = yield self.tornado_cassandra.execute(clear_status, parameters) - except TRANSIENT_CASSANDRA_ERRORS: - yield self.cleanup(retries=retries-1) - return - - if not result.was_applied: - raise FailedBatch( - 'Unable to clean up batch for {}:{}'.format(self.project, self.txid)) - - -class BatchResolver(object): - """ Resolves large batches. """ - def __init__(self, project_id, db_access): - """ Creates a new BatchResolver. - - Args: - project_id: A string specifying a project ID. - db_access: A DatastoreProxy. - """ - self.project_id = project_id - - self._db_access = db_access - self._session = self._db_access.session - self._tornado_cassandra = TornadoCassandra(self._session) - self._prepared_statements = {} - - @gen.coroutine - def resolve(self, txid, composite_indexes): - """ Resolves a large batch for a given transaction. - - Args: - txid: An integer specifying a transaction ID. - composite_indexes: A list of CompositeIndex objects. - """ - txid_hash = tx_partition(self.project_id, txid) - new_op_id = uuid.uuid4() - try: - batch_status = yield self._get_status(txid_hash) - except BatchNotFound: - # Make sure another process doesn't try to commit the transaction. - yield self._insert(txid_hash, new_op_id) - return - - old_op_id = batch_status.op_id - yield self._update_op_id(txid_hash, batch_status.applied, old_op_id, - new_op_id) - - if batch_status.applied: - # Make sure all the mutations in the batch have been applied. - yield self._apply_mutations(txid, composite_indexes) - - @gen.coroutine - def cleanup(self, txid): - """ Cleans up the metadata from the finished batch. - - Args: - txid: An integer specifying a transaction ID. - """ - txid_hash = tx_partition(self.project_id, txid) - yield self._delete_mutations(txid) - yield self._delete_status(txid_hash) - - def _get_prepared(self, statement): - """ Caches prepared statements. - - Args: - statement: A string containing a Cassandra statement. - """ - if statement not in self._prepared_statements: - self._prepared_statements[statement] = self._session.prepare(statement) - - return self._prepared_statements[statement] - - @gen.coroutine - def _get_status(self, txid_hash): - """ Gets the current status of a large batch. - - Args: - txid_hash: A byte array identifying the transaction. - Returns: - A Cassandra result for the batch entry. - """ - statement = self._get_prepared(""" - SELECT applied, op_id FROM batch_status - WHERE txid_hash = ? - """) - bound_statement = statement.bind((txid_hash,)) - bound_statement.consistency_level = ConsistencyLevel.SERIAL - bound_statement.retry_policy = BASIC_RETRIES - results = yield self._tornado_cassandra.execute(bound_statement) - try: - raise gen.Return(results[0]) - except IndexError: - raise BatchNotFound('Batch not found') - - @gen.coroutine - def _insert(self, txid_hash, op_id): - """ Claims the large batch. - - Args: - txid_hash: A byte array identifying the transaction. - op_id: A uuid4 specifying the process ID. - """ - statement = self._get_prepared(""" - INSERT INTO batch_status (txid_hash, applied, op_id) - VALUES (?, ?, ?) - IF NOT EXISTS - """) - bound_statement = statement.bind((txid_hash, False, op_id)) - bound_statement.retry_policy = NO_RETRIES - results = yield self._tornado_cassandra.execute(bound_statement) - if not results[0].applied: - raise BatchNotOwned('Another process started applying the transaction') - - @gen.coroutine - def _select_mutations(self, txid): - """ Fetches a list of the mutations for the batch. - - Args: - txid: An integer specifying a transaction ID. - Returns: - An iterator of Cassandra results. - """ - statement = self._get_prepared(""" - SELECT old_value, new_value FROM batches - WHERE app = ? AND transaction = ? - """) - bound_statement = statement.bind((self.project_id, txid)) - bound_statement.retry_policy = BASIC_RETRIES - results = yield self._tornado_cassandra.execute(bound_statement) - raise gen.Return(results) - - @gen.coroutine - def _apply_mutations(self, txid, composite_indexes): - """ Applies all the mutations in the batch. - - Args: - txid: An integer specifying a transaction ID. - composite_indexes: A list of CompositeIndex objects. - """ - results = yield self._select_mutations(txid) - futures = [] - for result in results: - old_entity = result.old_value - if old_entity is not None: - old_entity = entity_pb.EntityProto(old_entity) - - new_entity = result.new_value - - if new_entity is None: - mutations = deletions_for_entity(old_entity, composite_indexes) - else: - new_entity = entity_pb.EntityProto(new_entity) - mutations = mutations_for_entity(new_entity, txid, old_entity, - composite_indexes) - - statements_and_params = self._db_access.statements_for_mutations( - mutations, txid) - for statement, params in statements_and_params: - futures.append(self._tornado_cassandra.execute(statement, params)) - - yield futures - - @gen.coroutine - def _update_op_id(self, txid_hash, applied_status, old_op_id, new_op_id): - """ Claims a batch that is in progress. - - Args: - txid_hash: A byte array identifying the transaction. - applied_status: A boolean indicating that the batch has been committed. - old_op_id: A uuid4 specifying the last read process ID. - new_op_id: A uuid4 specifying the new process ID. - """ - statement = self._get_prepared(""" - UPDATE batch_status - SET op_id = ? - WHERE txid_hash = ? - IF op_id = ? - AND applied = ? - """) - params = (new_op_id, txid_hash, old_op_id, applied_status) - bound_statement = statement.bind(params) - bound_statement.retry_policy = NO_RETRIES - results = yield self._tornado_cassandra.execute(bound_statement) - if not results[0].applied: - raise BatchNotOwned('Batch status changed after checking') - - @gen.coroutine - def _delete_mutations(self, txid): - """ Removes mutation entries for the batch. - - Args: - txid: An integer specifying a transaction ID. - """ - statement = self._get_prepared(""" - DELETE FROM batches - WHERE app = ? AND transaction = ? - """) - params = (self.project_id, txid) - bound_statement = statement.bind(params) - bound_statement.retry_policy = BASIC_RETRIES - yield self._tornado_cassandra.execute(bound_statement) - - @gen.coroutine - def _delete_status(self, txid_hash): - """ Removes the batch status entry. - - Args: - txid_hash: A byte array identifying a transaction. - """ - statement = self._get_prepared(""" - DELETE FROM batch_status - WHERE txid_hash = ? - IF EXISTS - """) - bound_statement = statement.bind((txid_hash,)) - bound_statement.retry_policy = NO_RETRIES - yield self._tornado_cassandra.execute(bound_statement) diff --git a/AppDB/appscale/datastore/cassandra_env/py_cassandra.py b/AppDB/appscale/datastore/cassandra_env/py_cassandra.py deleted file mode 100644 index 95cac43e3e..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/py_cassandra.py +++ /dev/null @@ -1,203 +0,0 @@ -""" -Cassandra Interface for AppScale -""" -import time - -from appscale.common import appscale_info -import cassandra -from cassandra.cluster import BatchStatement -from cassandra.cluster import Cluster -from cassandra.cluster import SimpleStatement -from cassandra.query import ConsistencyLevel -from cassandra.query import ValueSequence -from tornado import gen - -from appscale.datastore import dbconstants -from appscale.datastore.cassandra_env.constants import LB_POLICY -from appscale.datastore.cassandra_env.cassandra_interface import ( - INITIAL_CONNECT_RETRIES, KEYSPACE, ThriftColumn -) -from appscale.datastore.cassandra_env.retry_policies import BASIC_RETRIES -from appscale.datastore.cassandra_env.tornado_cassandra import TornadoCassandra -from appscale.datastore.dbconstants import ( - AppScaleDBConnectionError, SCHEMA_TABLE, SCHEMA_TABLE_SCHEMA -) -from appscale.datastore.dbinterface import AppDBInterface -from appscale.datastore.utils import tornado_synchronous - -ERROR_DEFAULT = "DB_ERROR:" # ERROR_CASSANDRA - -PERSISTENT_CONNECTION = False -PROFILING = False - -MAX_ROW_COUNT = 10000000 - - -class DatastoreProxy(AppDBInterface): - def __init__(self): - hosts = appscale_info.get_db_ips() - - remaining_retries = INITIAL_CONNECT_RETRIES - while True: - try: - cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) - self.session = cluster.connect(keyspace=KEYSPACE) - self.tornado_cassandra = TornadoCassandra(self.session) - break - except cassandra.cluster.NoHostAvailable as connection_error: - remaining_retries -= 1 - if remaining_retries < 0: - raise connection_error - time.sleep(3) - - self.session.default_consistency_level = ConsistencyLevel.QUORUM - - # Provide synchronous version of get_schema method - self.get_schema_sync = tornado_synchronous(self.get_schema) - - @gen.coroutine - def get_entity(self, table_name, row_key, column_names): - error = [ERROR_DEFAULT] - list_ = error - row_key = bytearray('/'.join([table_name, row_key])) - statement = """ - SELECT * FROM "{table}" - WHERE {key} = %(key)s - AND {column} IN %(columns)s - """.format(table=table_name, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME) - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - parameters = {'key': row_key, - 'columns': ValueSequence(column_names)} - try: - results = yield self.tornado_cassandra.execute(query, parameters) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - raise AppScaleDBConnectionError('Unable to fetch entity') - - results_dict = {} - for (_, column, value) in results: - results_dict[column] = value - - if not results_dict: - list_[0] += 'Not found' - raise gen.Return(list_) - - for column in column_names: - list_.append(results_dict[column]) - raise gen.Return(list_) - - @gen.coroutine - def put_entity(self, table_name, row_key, column_names, cell_values): - error = [ERROR_DEFAULT] - list_ = error - - row_key = bytearray('/'.join([table_name, row_key])) - values = {} - for index, column in enumerate(column_names): - values[column] = cell_values[index] - - statement = """ - INSERT INTO "{table}" ({key}, {column}, {value}) - VALUES (%(key)s, %(column)s, %(value)s) - """.format(table=table_name, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE) - batch = BatchStatement(retry_policy=BASIC_RETRIES) - for column in column_names: - parameters = {'key': row_key, - 'column': column, - 'value': bytearray(values[column])} - batch.add(statement, parameters) - - try: - yield self.tornado_cassandra.execute(batch) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - list_[0] += 'Unable to insert entity' - raise gen.Return(list_) - - list_.append("0") - raise gen.Return(list_) - - def put_entity_dict(self, table_name, row_key, value_dict): - raise NotImplementedError("put_entity_dict is not implemented in %s." % self.__class__) - - @gen.coroutine - def get_table(self, table_name, column_names): - """ Fetch a list of values for the given columns in a table. - - Args: - table_name: A string containing the name of the table. - column_names: A list of column names to retrieve values for. - Returns: - A list containing a status marker followed by the values. - Note: The response does not contain any row keys or column names. - """ - response = [ERROR_DEFAULT] - - statement = 'SELECT * FROM "{table}"'.format(table=table_name) - query = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - - try: - results = yield self.tornado_cassandra.execute(query) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - response[0] += 'Unable to fetch table contents' - raise gen.Return(response) - - results_list = [] - current_item = {} - current_key = None - for (key, column, value) in results: - if key != current_key: - if current_item: - results_list.append({current_key: current_item}) - current_item = {} - current_key = key - - current_item[column] = value - if current_item: - results_list.append({current_key: current_item}) - - for result in results_list: - result_columns = result.values()[0] - for column in column_names: - try: - response.append(result_columns[column]) - except KeyError: - response[0] += 'Table contents did not match schema' - raise gen.Return(response) - - raise gen.Return(response) - - @gen.coroutine - def delete_row(self, table_name, row_key): - response = [ERROR_DEFAULT] - row_key = bytearray('/'.join([table_name, row_key])) - - statement = 'DELETE FROM "{table}" WHERE {key} = %s'.format( - table=table_name, key=ThriftColumn.KEY) - delete = SimpleStatement(statement, retry_policy=BASIC_RETRIES) - - try: - yield self.tornado_cassandra.execute(delete, (row_key,)) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - response[0] += 'Unable to delete row' - raise gen.Return(response) - - response.append('0') - raise gen.Return(response) - - @gen.coroutine - def get_schema(self, table_name): - error = [ERROR_DEFAULT] - result = error - ret = yield self.get_entity(SCHEMA_TABLE, table_name, SCHEMA_TABLE_SCHEMA) - if len(ret) > 1: - schema = ret[1] - else: - error[0] = ret[0] + "--unable to get schema" - raise gen.Return(error) - schema = schema.split(':') - result = result + schema - raise gen.Return(result) diff --git a/AppDB/appscale/datastore/cassandra_env/rebalance.py b/AppDB/appscale/datastore/cassandra_env/rebalance.py deleted file mode 100644 index f6c1406cb1..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/rebalance.py +++ /dev/null @@ -1,195 +0,0 @@ -from __future__ import division -import argparse -import logging -import os - -from appscale.common.appscale_utils import ssh -from appscale.common.constants import KEY_DIRECTORY -from appscale.common.constants import LOG_FORMAT -from subprocess import (CalledProcessError, - check_output) -from ..cassandra_env.cassandra_interface import NODE_TOOL -from ..cassandra_env.cassandra_interface import KEYSPACE - - -# The percentage difference allowed between an actual and ideal load. -MAX_DRIFT = .3 - -logger = logging.getLogger(__name__) - - -def get_status(): - """ Return the cluster status in a structured way. - - Returns: - A list of nodes represented by dictionaries. - """ - nodes = [] - for line in check_output([NODE_TOOL, 'status', KEYSPACE]).splitlines(): - fields = line.split() - if len(fields) != 8: - continue - nodes.append({ - 'state': fields[0], - 'ip': fields[1], - 'tokens': int(fields[4]), - 'owns': float(fields[5][:-1]) - }) - return nodes - - -def get_gossip(): - """ Return the cluster gossip in a structured way. - - Returns: - A list of nodes represented by dictionaries. - """ - nodes = [] - current_node = None - for line in check_output([NODE_TOOL, 'gossipinfo']).splitlines(): - if line.startswith('/'): - if current_node is not None: - nodes.append(current_node) - - current_node = {'ip': line.strip()[1:]} - - if line.strip().startswith('STATUS'): - current_node['ready'] = 'NORMAL' in line - current_node['token'] = line.split(',')[-1] - - if line.strip().startswith('LOAD'): - current_node['load'] = float(line.split(':')[-1]) - - if current_node is not None: - nodes.append(current_node) - - if not nodes: - raise Exception('Unable to collect gossip for any nodes') - - required_fields = ['ip', 'ready', 'load', 'token'] - for node in nodes: - for required_field in required_fields: - if required_field not in node: - raise Exception('Unable to parse all fields for {}'.format(node)) - - return nodes - - -def get_ring(gossip): - """ Return the ring status in a structured way. - - Args: - gossip: A list of gossip info for each node. - - Returns: - A list of nodes represented by dictionaries. - """ - nodes = sorted(gossip, key=lambda node: node['token']) - for index, node in enumerate(nodes): - node['index'] = index - - if not nodes: - raise Exception('Unable to find nodes in ring') - - # Calculate skew and diff for each node in ring. - ideal_load = sum(node['load'] for node in nodes) / len(nodes) - for index, node in enumerate(nodes): - try: - node['skew'] = abs(node['load'] - ideal_load) / ideal_load - except ZeroDivisionError: - node['skew'] = 0 - - node['diff'] = abs(node['load'] - nodes[index - 1]['load']) - - return nodes - - -def equalize(node1, node2): - """ Move data from the node with a larger load to the other one. - - Args: - node1: A dictionary representing a node. - node2: A dictionary representing a neighbor of node1. - """ - keys = [key for key in os.listdir(KEY_DIRECTORY) if key.endswith('.key')] - keyname = keys[0].split('.')[0] - - to_move = abs(node1['load'] - node2['load']) / 2 - mb_to_move = round(to_move / 1024 ** 2, 2) - if node1['load'] > node2['load']: - logger.info('Moving {} MiB from {} to {}'.format( - mb_to_move, node1['ip'], node2['ip'])) - percentile = 100 - int((to_move / node1['load']) * 100) - new_token = ssh(node1['ip'], keyname, - 'appscale-get-token {}'.format(percentile), - method=check_output).strip() - repair = [new_token, node1['token']] - cleanup_ip = node1['ip'] - else: - logger.info('Moving {} MiB from {} to {}'.format( - mb_to_move, node2['ip'], node1['ip'])) - percentile = int((to_move / node2['load']) * 100) - new_token = ssh(node2['ip'], keyname, - 'appscale-get-token {}'.format(percentile), - method=check_output).strip() - repair = [node1['token'], new_token] - cleanup_ip = node2['ip'] - - logger.info('Moving {} to {}'.format(node1['ip'], new_token[:60] + '...')) - ssh(node1['ip'], keyname, '{} move {}'.format(NODE_TOOL, new_token)) - - start = repair[0][:60] + '...' - end = repair[1][:60] + '...' - logger.info('Repairing {} to {}'.format(start, end)) - check_output([NODE_TOOL, 'repair', '-st', repair[0], '-et', repair[1]]) - - logger.info('Cleaning up {}'.format(cleanup_ip)) - ssh(cleanup_ip, keyname, '{} cleanup'.format(NODE_TOOL)) - - -def main(): - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - parser = argparse.ArgumentParser() - parser.add_argument( - '--skip-tokens-check', action='store_true', - help='Assume that all nodes own one token') - parser.add_argument( - '--skip-ownership-check', action='store_true', - help='Assume that the node count exceeds the replication factor') - args = parser.parse_args() - - if not args.skip_tokens_check or not args.skip_ownership_check: - logger.info('Fetching status') - status = get_status() - - if (not args.skip_tokens_check and - any(node['tokens'] != 1 for node in status)): - raise Exception('All nodes must have exactly one token') - - if (not args.skip_ownership_check and - any(node['owns'] != float(100) for node in status)): - raise Exception('All nodes already own every key') - - logger.info('Fetching gossip') - gossip = get_gossip() - - if sum(node['ready'] for node in gossip) <= 1: - raise Exception('There must be more than one node up to balance') - - ring = get_ring(gossip) - if max(node['skew'] for node in ring) < MAX_DRIFT: - logger.info('All nodes within {}% of ideal load'.format(MAX_DRIFT * 100)) - return - - # Pick two neighboring nodes with the largest difference in load. If the - # equalization process fails, try the next largest difference. - ring_by_diff = sorted(ring, key=lambda node: node['diff'], reverse=True) - for node in ring_by_diff: - try: - equalize(ring[node['index'] - 1], ring[node['index']]) - # If data has been moved, the load needs to be re-evaluated. Load gets - # updated after 90 seconds. - break - except CalledProcessError: - continue diff --git a/AppDB/appscale/datastore/cassandra_env/retry_policies.py b/AppDB/appscale/datastore/cassandra_env/retry_policies.py deleted file mode 100644 index 1e8872c47a..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/retry_policies.py +++ /dev/null @@ -1,65 +0,0 @@ -from cassandra.policies import (FallthroughRetryPolicy, - RetryPolicy) - -# The number of times to retry idempotent statements. -BASIC_RETRY_COUNT = 5 - - -class IdempotentRetryPolicy(RetryPolicy): - """ A policy used for retrying idempotent statements. """ - def on_read_timeout(self, query, consistency, required_responses, - received_responses, data_retrieved, retry_num): - """ This is called when a ReadTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_write_timeout(self, query, consistency, write_type, - required_responses, received_responses, retry_num): - """ This is called when a WriteTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_unavailable(self, query, consistency, required_replicas, - alive_replicas, retry_num): - """ The coordinator has detected an insufficient number of live replicas. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_replicas: The number of replicas required to complete query. - alive_replicas: The number of replicas that are ready to complete query. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - -# A basic policy that retries idempotent operations. -BASIC_RETRIES = IdempotentRetryPolicy() - -# A retry policy that never retries operations. -NO_RETRIES = FallthroughRetryPolicy() diff --git a/AppDB/appscale/datastore/cassandra_env/schema.py b/AppDB/appscale/datastore/cassandra_env/schema.py deleted file mode 100644 index 1cbad7e508..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/schema.py +++ /dev/null @@ -1,473 +0,0 @@ -""" Create Cassandra keyspace and initial tables. """ - -import cassandra -import logging -import sys -import time -from collections import defaultdict - -from kazoo.client import KazooClient - -import cassandra_interface - -from appscale.common import appscale_info -from appscale.common.constants import SCHEMA_CHANGE_TIMEOUT -from appscale.common.datastore_index import DatastoreIndex, merge_indexes -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from cassandra import ConsistencyLevel -from cassandra.cluster import Cluster -from cassandra.cluster import SimpleStatement -from cassandra.policies import FallthroughRetryPolicy, RetryPolicy -from .cassandra_interface import IndexStates -from .cassandra_interface import INITIAL_CONNECT_RETRIES -from .cassandra_interface import KEYSPACE -from .cassandra_interface import ScatterPropStates -from .cassandra_interface import ThriftColumn -from .constants import CURRENT_VERSION, LB_POLICY -from .. import dbconstants - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore import entity_pb -from google.net.proto.ProtocolBuffer import ProtocolBufferDecodeError - -# A policy that does not retry statements. -NO_RETRIES = FallthroughRetryPolicy() - -logger = logging.getLogger(__name__) - -# The number of times to retry idempotent statements. -BASIC_RETRY_COUNT = 5 - - -class IdempotentRetryPolicy(RetryPolicy): - """ A policy used for retrying idempotent statements. """ - def on_read_timeout(self, query, consistency, required_responses, - received_responses, data_retrieved, retry_num): - """ This is called when a ReadTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_write_timeout(self, query, consistency, write_type, - required_responses, received_responses, retry_num): - """ This is called when a WriteTimeout occurs. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_responses: The number of responses required. - received_responses: The number of responses received. - data_retrieved: Indicates whether any responses contained data. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - def on_unavailable(self, query, consistency, required_replicas, - alive_replicas, retry_num): - """ The coordinator has detected an insufficient number of live replicas. - - Args: - query: A statement that timed out. - consistency: The consistency level of the statement. - required_replicas: The number of replicas required to complete query. - alive_replicas: The number of replicas that are ready to complete query. - retry_num: The number of times the statement has been tried. - """ - if retry_num >= BASIC_RETRY_COUNT: - return self.RETHROW, None - else: - return self.RETRY, consistency - - -# A basic policy that retries idempotent operations. -BASIC_RETRIES = IdempotentRetryPolicy() - - -def define_ua_schema(session): - """ Populate the schema table for the UAServer. - - Args: - session: A cassandra-driver session. - """ - uaserver_tables = [ - {'name': dbconstants.USERS_TABLE, 'schema': dbconstants.USERS_SCHEMA} - ] - for table in uaserver_tables: - key = bytearray('/'.join([dbconstants.SCHEMA_TABLE, table['name']])) - columns = bytearray(':'.join(table['schema'])) - define_schema = """ - INSERT INTO "{table}" ({key}, {column}, {value}) - VALUES (%(key)s, %(column)s, %(value)s) - """.format(table=dbconstants.SCHEMA_TABLE, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE) - values = {'key': key, - 'column': dbconstants.SCHEMA_TABLE_SCHEMA[0], - 'value': columns} - session.execute(define_schema, values) - - -def create_batch_tables(cluster, session): - """ Create the tables required for large batches. - - Args: - cluster: A cassandra-driver cluster. - session: A cassandra-driver session. - """ - keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] - if 'batches' in keyspace_metadata.tables: - columns = keyspace_metadata.tables['batches'].columns - if ('transaction' in columns and - columns['transaction'].cql_type != 'bigint'): - session.execute('DROP TABLE batches', timeout=SCHEMA_CHANGE_TIMEOUT) - - logger.info('Trying to create batches') - create_table = """ - CREATE TABLE IF NOT EXISTS batches ( - app text, - transaction bigint, - namespace text, - path blob, - old_value blob, - new_value blob, - exclude_indices text, - PRIMARY KEY ((app, transaction), namespace, path) - ) - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating batches table. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - if ('batch_status' in keyspace_metadata.tables and - 'txid_hash' not in keyspace_metadata.tables['batch_status'].columns): - session.execute('DROP TABLE batch_status', timeout=SCHEMA_CHANGE_TIMEOUT) - - logger.info('Trying to create batch_status') - create_table = """ - CREATE TABLE IF NOT EXISTS batch_status ( - txid_hash blob PRIMARY KEY, - applied boolean, - op_id uuid - ) - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating batch_status table. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - -def create_groups_table(session): - create_table = """ - CREATE TABLE IF NOT EXISTS group_updates ( - group blob PRIMARY KEY, - last_update bigint - ) - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating group_updates table. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - -def create_transactions_table(session): - """ Create the table used for storing transaction metadata. - - Args: - session: A cassandra-driver session. - """ - create_table = """ - CREATE TABLE IF NOT EXISTS transactions ( - txid_hash blob, - operation tinyint, - namespace text, - path blob, - start_time timestamp, - is_xg boolean, - in_progress blob, - entity blob, - task blob, - PRIMARY KEY (txid_hash, operation, namespace, path) - ) WITH gc_grace_seconds = 120 - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating transactions table. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - -def create_entity_ids_table(session): - create_table = """ - CREATE TABLE IF NOT EXISTS reserved_ids ( - project text, - scattered boolean, - last_reserved bigint, - op_id uuid, - PRIMARY KEY ((project, scattered)) - ) - """ - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating entity_ids table. ' - 'Waiting {} seconds for schema to settle.'.format(SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - -def current_datastore_version(session): - """ Retrieves the existing datastore version value. - - Args: - session: A cassandra-driver session. - Returns: - A float specifying the existing datastore version or None. - """ - key = cassandra_interface.VERSION_INFO_KEY - statement = """ - SELECT {value} FROM "{table}" - WHERE {key} = %s - AND {column} = %s - """.format( - value=ThriftColumn.VALUE, - table=dbconstants.DATASTORE_METADATA_TABLE, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME - ) - results = session.execute(statement, (bytearray(key), key)) - try: - return float(results[0].value) - except IndexError: - return None - - -def migrate_composite_index_metadata(cluster, session, zk_client): - """ Moves any existing datastore index metadata to ZooKeeper. - - Args: - cluster: A cassandra.cluster.Cluster object. - session: A cassandra.cluster.Session object. - zk_client: A kazoo.client.KazooClient object. - """ - keyspace_metadata = cluster.metadata.keyspaces[KEYSPACE] - if dbconstants.METADATA_TABLE not in keyspace_metadata.tables: - return - - logging.info('Fetching previously-defined index definitions') - results = session.execute( - 'SELECT * FROM "{}"'.format(dbconstants.METADATA_TABLE)) - indexes_by_project = defaultdict(list) - for result in results: - try: - index_pb = entity_pb.CompositeIndex(result.value) - except ProtocolBufferDecodeError: - logging.warning('Invalid composite index: {}'.format(result.value)) - continue - - index = DatastoreIndex.from_pb(index_pb) - # Assume the index is complete. - index.ready = True - indexes_by_project[index.project_id].append(index) - - for project_id, indexes in indexes_by_project.items(): - logging.info('Adding indexes for {}'.format(project_id)) - merge_indexes(zk_client, project_id, indexes) - - logging.info('Removing previously-defined index definitions from Cassandra') - session.execute('DROP TABLE "{}"'.format(dbconstants.METADATA_TABLE), - timeout=SCHEMA_CHANGE_TIMEOUT) - - -def prime_cassandra(replication): - """ Create Cassandra keyspace and initial tables. - - Args: - replication: An integer specifying the replication factor for the keyspace. - Raises: - AppScaleBadArg if replication factor is not greater than 0. - TypeError if replication is not an integer. - """ - if not isinstance(replication, int): - raise TypeError('Replication must be an integer') - - if int(replication) <= 0: - raise dbconstants.AppScaleBadArg('Replication must be greater than zero') - - zk_client = KazooClient(hosts=appscale_info.get_zk_node_ips()) - zk_client.start() - - hosts = appscale_info.get_db_ips() - - remaining_retries = INITIAL_CONNECT_RETRIES - while True: - try: - cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) - session = cluster.connect() - break - except cassandra.cluster.NoHostAvailable as connection_error: - remaining_retries -= 1 - if remaining_retries < 0: - raise connection_error - time.sleep(3) - session.default_consistency_level = ConsistencyLevel.QUORUM - - create_keyspace = """ - CREATE KEYSPACE IF NOT EXISTS "{keyspace}" - WITH REPLICATION = %(replication)s - """.format(keyspace=KEYSPACE) - keyspace_replication = {'class': 'SimpleStrategy', - 'replication_factor': replication} - session.execute(create_keyspace, {'replication': keyspace_replication}, - timeout=SCHEMA_CHANGE_TIMEOUT) - session.set_keyspace(KEYSPACE) - - logger.info('Waiting for all hosts to be connected') - deadline = time.time() + SCHEMA_CHANGE_TIMEOUT - while True: - if time.time() > deadline: - logger.warning('Timeout when waiting for hosts to join. Continuing ' - 'with connected hosts.') - break - - if len(session.get_pool_state()) == len(hosts): - break - - time.sleep(1) - - for table in dbconstants.INITIAL_TABLES: - create_table = """ - CREATE TABLE IF NOT EXISTS "{table}" ( - {key} blob, - {column} text, - {value} blob, - PRIMARY KEY ({key}, {column}) - ) WITH COMPACT STORAGE - """.format(table=table, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE) - statement = SimpleStatement(create_table, retry_policy=NO_RETRIES) - - logger.info('Trying to create {}'.format(table)) - try: - session.execute(statement, timeout=SCHEMA_CHANGE_TIMEOUT) - except cassandra.OperationTimedOut: - logger.warning( - 'Encountered an operation timeout while creating {} table. Waiting {} ' - 'seconds for schema to settle.'.format(table, SCHEMA_CHANGE_TIMEOUT)) - time.sleep(SCHEMA_CHANGE_TIMEOUT) - raise - - migrate_composite_index_metadata(cluster, session, zk_client) - create_batch_tables(cluster, session) - create_groups_table(session) - create_transactions_table(session) - create_entity_ids_table(session) - - first_entity = session.execute( - 'SELECT * FROM "{}" LIMIT 1'.format(dbconstants.APP_ENTITY_TABLE)) - existing_entities = len(list(first_entity)) == 1 - - define_ua_schema(session) - - metadata_insert = """ - INSERT INTO "{table}" ({key}, {column}, {value}) - VALUES (%(key)s, %(column)s, %(value)s) - """.format( - table=dbconstants.DATASTORE_METADATA_TABLE, - key=ThriftColumn.KEY, - column=ThriftColumn.COLUMN_NAME, - value=ThriftColumn.VALUE - ) - - if existing_entities: - current_version = current_datastore_version(session) - if current_version == 1.0: - # Instruct the groomer to reclean the indexes. - parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY), - 'column': cassandra_interface.INDEX_STATE_KEY, - 'value': bytearray(str(IndexStates.DIRTY))} - session.execute(metadata_insert, parameters) - - parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), - 'column': cassandra_interface.VERSION_INFO_KEY, - 'value': bytearray(str(CURRENT_VERSION))} - session.execute(metadata_insert, parameters) - else: - parameters = {'key': bytearray(cassandra_interface.VERSION_INFO_KEY), - 'column': cassandra_interface.VERSION_INFO_KEY, - 'value': bytearray(str(CURRENT_VERSION))} - session.execute(metadata_insert, parameters) - - # Mark the newly created indexes as clean. - parameters = {'key': bytearray(cassandra_interface.INDEX_STATE_KEY), - 'column': cassandra_interface.INDEX_STATE_KEY, - 'value': bytearray(str(IndexStates.CLEAN))} - session.execute(metadata_insert, parameters) - - # Indicate that scatter property values do not need to be populated. - parameters = {'key': bytearray(cassandra_interface.SCATTER_PROP_KEY), - 'column': cassandra_interface.SCATTER_PROP_KEY, - 'value': bytearray(ScatterPropStates.POPULATED)} - session.execute(metadata_insert, parameters) - - # Indicate that the database has been successfully primed. - parameters = {'key': bytearray(cassandra_interface.PRIMED_KEY), - 'column': cassandra_interface.PRIMED_KEY, - 'value': bytearray(str(CURRENT_VERSION))} - session.execute(metadata_insert, parameters) - logger.info('Cassandra is primed.') - - -def primed(): - """ Check if the required keyspace and tables are present. - - Returns: - A boolean indicating that Cassandra has been primed. - """ - try: - db_access = cassandra_interface.DatastoreProxy() - except cassandra.InvalidRequest: - return False - - try: - primed_version = db_access.get_metadata_sync(cassandra_interface.PRIMED_KEY) - return primed_version == str(CURRENT_VERSION) - finally: - db_access.close() diff --git a/AppDB/appscale/datastore/cassandra_env/templates/cassandra-env.sh b/AppDB/appscale/datastore/cassandra_env/templates/cassandra-env.sh deleted file mode 100644 index 14166730e2..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/templates/cassandra-env.sh +++ /dev/null @@ -1,320 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -calculate_heap_sizes() -{ - case "`uname`" in - Linux) - system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` - system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` - ;; - FreeBSD) - system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` - system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` - system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` - ;; - SunOS) - system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` - system_cpu_cores=`psrinfo | wc -l` - ;; - Darwin) - system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` - system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` - system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` - ;; - *) - # assume reasonable defaults for e.g. a modern desktop or - # cheap server - system_memory_in_mb="2048" - system_cpu_cores="2" - ;; - esac - - # some systems like the raspberry pi don't report cores, use at least 1 - if [ "$system_cpu_cores" -lt "1" ] - then - system_cpu_cores="1" - fi - - # set max heap size based on the following - # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) - # calculate 1/2 ram and cap to 1024MB - # calculate 1/4 ram and cap to 8192MB - # pick the max - half_system_memory_in_mb=`expr $system_memory_in_mb / 2` - quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` - if [ "$half_system_memory_in_mb" -gt "1024" ] - then - half_system_memory_in_mb="1024" - fi - if [ "$quarter_system_memory_in_mb" -gt "8192" ] - then - quarter_system_memory_in_mb="8192" - fi - if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] - then - max_heap_size_in_mb="$half_system_memory_in_mb" - else - max_heap_size_in_mb="$quarter_system_memory_in_mb" - fi - - # AppScale: Adjust max heap size to make room for other services. Padding - # should be defined as a decimal. For example, ".2" would set max heap to - # 80% of what it normally would be. - if [ -n "${HEAP_REDUCTION}" ] - then - max_heap_size_in_mb=$(awk \ - "BEGIN { print int(${max_heap_size_in_mb}*(1-${HEAP_REDUCTION})) }") - fi - - MAX_HEAP_SIZE="${max_heap_size_in_mb}M" - - # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) - max_sensible_yg_per_core_in_mb="100" - max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` - - desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` - - if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] - then - HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" - else - HEAP_NEWSIZE="${desired_yg_in_mb}M" - fi -} - -# Determine the sort of JVM we'll be running on. -JAVA="/usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java" -java_ver_output=`"${JAVA:-java}" -version 2>&1` -jvmver=`echo "$java_ver_output" | grep '[openjdk|java] version' | awk -F'"' 'NR==1 {print $2}' | cut -d\- -f1` -JVM_VERSION=${jvmver%_*} -JVM_PATCH_VERSION=${jvmver#*_} - -if [ "$JVM_VERSION" \< "1.8" ] ; then - echo "Cassandra 3.0 and later require Java 8u40 or later." - exit 1; -fi - -if [ "$JVM_VERSION" \< "1.8" ] && [ "$JVM_PATCH_VERSION" -lt 40 ] ; then - echo "Cassandra 3.0 and later require Java 8u40 or later." - exit 1; -fi - -jvm=`echo "$java_ver_output" | grep -A 1 'java version' | awk 'NR==2 {print $1}'` -case "$jvm" in - OpenJDK) - JVM_VENDOR=OpenJDK - # this will be "64-Bit" or "32-Bit" - JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $2}'` - ;; - "Java(TM)") - JVM_VENDOR=Oracle - # this will be "64-Bit" or "32-Bit" - JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $3}'` - ;; - *) - # Help fill in other JVM values - JVM_VENDOR=other - JVM_ARCH=unknown - ;; -esac - -#GC log path has to be defined here because it needs to access CASSANDRA_HOME -JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_HOME}/logs/gc.log" - -# Here we create the arguments that will get passed to the jvm when -# starting cassandra. - -# Read user-defined JVM options from jvm.options file -JVM_OPTS_FILE=$CASSANDRA_CONF/jvm.options -for opt in `grep "^-" $JVM_OPTS_FILE` -do - JVM_OPTS="$JVM_OPTS $opt" -done - -# Check what parameters were defined on jvm.options file to avoid conflicts -echo $JVM_OPTS | grep -q Xmn -DEFINED_XMN=$? -echo $JVM_OPTS | grep -q Xmx -DEFINED_XMX=$? -echo $JVM_OPTS | grep -q Xms -DEFINED_XMS=$? -echo $JVM_OPTS | grep -q UseConcMarkSweepGC -USING_CMS=$? -echo $JVM_OPTS | grep -q UseG1GC -USING_G1=$? - -# Override these to set the amount of memory to allocate to the JVM at -# start-up. For production use you may wish to adjust this for your -# environment. MAX_HEAP_SIZE is the total amount of memory dedicated -# to the Java heap. HEAP_NEWSIZE refers to the size of the young -# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set -# or not (if you set one, set the other). -# -# The main trade-off for the young generation is that the larger it -# is, the longer GC pause times will be. The shorter it is, the more -# expensive GC will be (usually). -# -# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause -# times. If in doubt, and if you do not particularly want to tweak, go with -# 100 MB per physical CPU core. - -#MAX_HEAP_SIZE="4G" -#HEAP_NEWSIZE="800M" - -# Set this to control the amount of arenas per-thread in glibc -#export MALLOC_ARENA_MAX=4 - -# only calculate the size if it's not set manually -if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then - calculate_heap_sizes -elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then - echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" - exit 1 -fi - -if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then - export MALLOC_ARENA_MAX=4 -fi - -# We only set -Xms and -Xmx if they were not defined on jvm.options file -# If defined, both Xmx and Xms should be defined together. -if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then - JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" - JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" -elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then - echo "Please set or unset -Xmx and -Xms flags in pairs on jvm.options file." - exit 1 -fi - -# We only set -Xmn flag if it was not defined in jvm.options file -# and if the CMS GC is being used -# If defined, both Xmn and Xmx should be defined together. -if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then - echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm.options file." - exit 1 -elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then - JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" -fi - -if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then - JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" -fi - -# provides hints to the JIT compiler -JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" - -# add the jamm javaagent -JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.0.jar" - -JVM_OPTS="$JVM_OPTS -XX:ErrorFile=$CASSANDRA_HOME/logs/cassandra-hs-err-pid$$.log" - -# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR -if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then - JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" -fi - -# stop the jvm on OutOfMemoryError as it can result in some data corruption -# uncomment the preferred option -# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 -# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words -# on white spaces without taking quotes into account -# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" -# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" -JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" - -# print an heap histogram on OutOfMemoryError -# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" - -# jmx: metrics and administration interface -# -# add this if you're having trouble connecting: -# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" -# -# see -# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole -# for more on configuring JMX through firewalls, etc. (Short version: -# get it working with no firewall first.) -# -# Cassandra ships with JMX accessible *only* from localhost. -# To enable remote JMX connections, uncomment lines below -# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity -# -if [ "x$LOCAL_JMX" = "x" ]; then - LOCAL_JMX=yes -fi - -# Specifies the default port over which Cassandra will be available for -# JMX connections. -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -JMX_PORT="7199" - -if [ "$LOCAL_JMX" = "yes" ]; then - JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" - JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" -else - JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" - # if ssl is enabled the same port cannot be used for both jmx and rmi so either - # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) - JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" - - # turn on JMX authentication. See below for further options - JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" - - # jmx ssl options - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" - #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" - #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" - #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" - #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" - #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" -fi - -# jmx authentication and authorization options. By default, auth is only -# activated for remote connections but they can also be enabled for local only JMX -## Basic file based authn & authz -JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" -#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" -## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. -## JAAS login modules can be used for authentication by uncommenting these two properties. -## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - -## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration -## file cassandra-jaas.config -#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" -#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_HOME/conf/cassandra-jaas.config" - -## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, -## uncomment this to use it. Requires one of the two authentication options to be enabled -#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" - -# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ -# directory. -# See http://wiki.apache.org/cassandra/Operations#Monitoring_with_MX4J -# By default mx4j listens on 0.0.0.0:8081. Uncomment the following lines -# to control its listen address and port. -#MX4J_ADDRESS="-Dmx4jaddress=127.0.0.1" -#MX4J_PORT="-Dmx4jport=8081" - -# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 -# for SIGAR we have to set the java.library.path -# to the location of the native libraries. -JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" - -JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" -JVM_OPTS="$JVM_OPTS $MX4J_PORT" -JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" diff --git a/AppDB/appscale/datastore/cassandra_env/templates/cassandra.yaml b/AppDB/appscale/datastore/cassandra_env/templates/cassandra.yaml deleted file mode 100644 index db3559e8a9..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/templates/cassandra.yaml +++ /dev/null @@ -1,1237 +0,0 @@ -# Cassandra storage config YAML - -# NOTE: -# See http://wiki.apache.org/cassandra/StorageConfiguration for -# full explanations of configuration directives -# /NOTE - -# The name of the cluster. This is mainly used to prevent machines in -# one logical cluster from joining another. -cluster_name: 'Test Cluster' - -# This defines the number of tokens randomly assigned to this node on the ring -# The more tokens, relative to other nodes, the larger the proportion of data -# that this node will store. You probably want all nodes to have the same number -# of tokens assuming they have equal hardware capability. -# -# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, -# and will use the initial_token as described below. -# -# Specifying initial_token will override this setting on the node's initial start, -# on subsequent starts, this setting will apply even if initial token is set. -# -# If you already have a cluster with 1 token per node, and wish to migrate to -# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations -num_tokens: APPSCALE-NUM-TOKENS - -# Triggers automatic allocation of num_tokens tokens for this node. The allocation -# algorithm attempts to choose tokens in a way that optimizes replicated load over -# the nodes in the datacenter for the replication strategy used by the specified -# keyspace. -# -# The load assigned to each node will be close to proportional to its number of -# vnodes. -# -# Only supported with the Murmur3Partitioner. -# allocate_tokens_for_keyspace: KEYSPACE - -# initial_token allows you to specify tokens manually. While you can use it with -# vnodes (num_tokens > 1, above) -- in which case you should provide a -# comma-separated list -- it's primarily used when adding nodes to legacy clusters -# that do not have vnodes enabled. -# initial_token: - -# See http://wiki.apache.org/cassandra/HintedHandoff -# May either be "true" or "false" to enable globally -hinted_handoff_enabled: true - -# When hinted_handoff_enabled is true, a black list of data centers that will not -# perform hinted handoff -# hinted_handoff_disabled_datacenters: -# - DC1 -# - DC2 - -# this defines the maximum amount of time a dead host will have hints -# generated. After it has been dead this long, new hints for it will not be -# created until it has been seen alive and gone down again. -max_hint_window_in_ms: 10800000 # 3 hours - -# Maximum throttle in KBs per second, per delivery thread. This will be -# reduced proportionally to the number of nodes in the cluster. (If there -# are two nodes in the cluster, each delivery thread will use the maximum -# rate; if there are three, each will throttle to half of the maximum, -# since we expect two nodes to be delivering hints simultaneously.) -hinted_handoff_throttle_in_kb: 1024 - -# Number of threads with which to deliver hints; -# Consider increasing this number when you have multi-dc deployments, since -# cross-dc handoff tends to be slower -max_hints_delivery_threads: 2 - -# Directory where Cassandra should store hints. -# If not set, the default directory is $CASSANDRA_HOME/data/hints. -hints_directory: /opt/appscale/cassandra/hints - -# How often hints should be flushed from the internal buffers to disk. -# Will *not* trigger fsync. -hints_flush_period_in_ms: 10000 - -# Maximum size for a single hints file, in megabytes. -max_hints_file_size_in_mb: 128 - -# Compression to apply to the hint files. If omitted, hints files -# will be written uncompressed. LZ4, Snappy, and Deflate compressors -# are supported. -#hints_compression: -# - class_name: LZ4Compressor -# parameters: -# - - -# Maximum throttle in KBs per second, total. This will be -# reduced proportionally to the number of nodes in the cluster. -batchlog_replay_throttle_in_kb: 1024 - -# Authentication backend, implementing IAuthenticator; used to identify users -# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, -# PasswordAuthenticator}. -# -# - AllowAllAuthenticator performs no checks - set it to disable authentication. -# - PasswordAuthenticator relies on username/password pairs to authenticate -# users. It keeps usernames and hashed passwords in system_auth.roles table. -# Please increase system_auth keyspace replication factor if you use this authenticator. -# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) -authenticator: AllowAllAuthenticator - -# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions -# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, -# CassandraAuthorizer}. -# -# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. -# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please -# increase system_auth keyspace replication factor if you use this authorizer. -authorizer: AllowAllAuthorizer - -# Part of the Authentication & Authorization backend, implementing IRoleManager; used -# to maintain grants and memberships between roles. -# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, -# which stores role information in the system_auth keyspace. Most functions of the -# IRoleManager require an authenticated login, so unless the configured IAuthenticator -# actually implements authentication, most of this functionality will be unavailable. -# -# - CassandraRoleManager stores role data in the system_auth keyspace. Please -# increase system_auth keyspace replication factor if you use this role manager. -role_manager: CassandraRoleManager - -# Validity period for roles cache (fetching granted roles can be an expensive -# operation depending on the role manager, CassandraRoleManager is one example) -# Granted roles are cached for authenticated sessions in AuthenticatedUser and -# after the period specified here, become eligible for (async) reload. -# Defaults to 2000, set to 0 to disable caching entirely. -# Will be disabled automatically for AllowAllAuthenticator. -roles_validity_in_ms: 2000 - -# Refresh interval for roles cache (if enabled). -# After this interval, cache entries become eligible for refresh. Upon next -# access, an async reload is scheduled and the old value returned until it -# completes. If roles_validity_in_ms is non-zero, then this must be -# also. -# Defaults to the same value as roles_validity_in_ms. -# roles_update_interval_in_ms: 2000 - -# Validity period for permissions cache (fetching permissions can be an -# expensive operation depending on the authorizer, CassandraAuthorizer is -# one example). Defaults to 2000, set to 0 to disable. -# Will be disabled automatically for AllowAllAuthorizer. -permissions_validity_in_ms: 2000 - -# Refresh interval for permissions cache (if enabled). -# After this interval, cache entries become eligible for refresh. Upon next -# access, an async reload is scheduled and the old value returned until it -# completes. If permissions_validity_in_ms is non-zero, then this must be -# also. -# Defaults to the same value as permissions_validity_in_ms. -# permissions_update_interval_in_ms: 2000 - -# Validity period for credentials cache. This cache is tightly coupled to -# the provided PasswordAuthenticator implementation of IAuthenticator. If -# another IAuthenticator implementation is configured, this cache will not -# be automatically used and so the following settings will have no effect. -# Please note, credentials are cached in their encrypted form, so while -# activating this cache may reduce the number of queries made to the -# underlying table, it may not bring a significant reduction in the -# latency of individual authentication attempts. -# Defaults to 2000, set to 0 to disable credentials caching. -credentials_validity_in_ms: 2000 - -# Refresh interval for credentials cache (if enabled). -# After this interval, cache entries become eligible for refresh. Upon next -# access, an async reload is scheduled and the old value returned until it -# completes. If credentials_validity_in_ms is non-zero, then this must be -# also. -# Defaults to the same value as credentials_validity_in_ms. -# credentials_update_interval_in_ms: 2000 - -# The partitioner is responsible for distributing groups of rows (by -# partition key) across nodes in the cluster. You should leave this -# alone for new clusters. The partitioner can NOT be changed without -# reloading all data, so when upgrading you should set this to the -# same partitioner you were already using. -# -# Besides Murmur3Partitioner, partitioners included for backwards -# compatibility include RandomPartitioner, ByteOrderedPartitioner, and -# OrderPreservingPartitioner. -# -partitioner: org.apache.cassandra.dht.ByteOrderedPartitioner - -# Directories where Cassandra should store data on disk. Cassandra -# will spread data evenly across them, subject to the granularity of -# the configured compaction strategy. -# If not set, the default directory is $CASSANDRA_HOME/data/data. -data_file_directories: - - /opt/appscale/cassandra - -# commit log. when running on magnetic HDD, this should be a -# separate spindle than the data directories. -# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. -commitlog_directory: /opt/appscale/cassandra/commitlog - -# Enable / disable CDC functionality on a per-node basis. This modifies the logic used -# for write path allocation rejection (standard: never reject. cdc: reject Mutation -# containing a CDC-enabled table if at space limit in cdc_raw_directory). -cdc_enabled: false - -# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the -# segment contains mutations for a CDC-enabled table. This should be placed on a -# separate spindle than the data directories. If not set, the default directory is -# $CASSANDRA_HOME/data/cdc_raw. -# cdc_raw_directory: /var/lib/cassandra/cdc_raw - -# Policy for data disk failures: -# -# die -# shut down gossip and client transports and kill the JVM for any fs errors or -# single-sstable errors, so the node can be replaced. -# -# stop_paranoid -# shut down gossip and client transports even for single-sstable errors, -# kill the JVM for errors during startup. -# -# stop -# shut down gossip and client transports, leaving the node effectively dead, but -# can still be inspected via JMX, kill the JVM for errors during startup. -# -# best_effort -# stop using the failed disk and respond to requests based on -# remaining available sstables. This means you WILL see obsolete -# data at CL.ONE! -# -# ignore -# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra -disk_failure_policy: stop - -# Policy for commit disk failures: -# -# die -# shut down gossip and Thrift and kill the JVM, so the node can be replaced. -# -# stop -# shut down gossip and Thrift, leaving the node effectively dead, but -# can still be inspected via JMX. -# -# stop_commit -# shutdown the commit log, letting writes collect but -# continuing to service reads, as in pre-2.0.5 Cassandra -# -# ignore -# ignore fatal errors and let the batches fail -commit_failure_policy: stop - -# Maximum size of the native protocol prepared statement cache -# -# Valid values are either "auto" (omitting the value) or a value greater 0. -# -# Note that specifying a too large value will result in long running GCs and possbily -# out-of-memory errors. Keep the value at a small fraction of the heap. -# -# If you constantly see "prepared statements discarded in the last minute because -# cache limit reached" messages, the first step is to investigate the root cause -# of these messages and check whether prepared statements are used correctly - -# i.e. use bind markers for variable parts. -# -# Do only change the default value, if you really have more prepared statements than -# fit in the cache. In most cases it is not neccessary to change this value. -# Constantly re-preparing statements is a performance penalty. -# -# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater -prepared_statements_cache_size_mb: - -# Maximum size of the Thrift prepared statement cache -# -# If you do not use Thrift at all, it is safe to leave this value at "auto". -# -# See description of 'prepared_statements_cache_size_mb' above for more information. -# -# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater -thrift_prepared_statements_cache_size_mb: - -# Maximum size of the key cache in memory. -# -# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the -# minimum, sometimes more. The key cache is fairly tiny for the amount of -# time it saves, so it's worthwhile to use it at large numbers. -# The row cache saves even more time, but must contain the entire row, -# so it is extremely space-intensive. It's best to only use the -# row cache if you have hot rows or static rows. -# -# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. -# -# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. -key_cache_size_in_mb: - -# Duration in seconds after which Cassandra should -# save the key cache. Caches are saved to saved_caches_directory as -# specified in this configuration file. -# -# Saved caches greatly improve cold-start speeds, and is relatively cheap in -# terms of I/O for the key cache. Row cache saving is much more expensive and -# has limited use. -# -# Default is 14400 or 4 hours. -key_cache_save_period: 14400 - -# Number of keys from the key cache to save -# Disabled by default, meaning all keys are going to be saved -# key_cache_keys_to_save: 100 - -# Row cache implementation class name. Available implementations: -# -# org.apache.cassandra.cache.OHCProvider -# Fully off-heap row cache implementation (default). -# -# org.apache.cassandra.cache.SerializingCacheProvider -# This is the row cache implementation availabile -# in previous releases of Cassandra. -# row_cache_class_name: org.apache.cassandra.cache.OHCProvider - -# Maximum size of the row cache in memory. -# Please note that OHC cache implementation requires some additional off-heap memory to manage -# the map structures and some in-flight memory during operations before/after cache entries can be -# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. -# Do not specify more memory that the system can afford in the worst usual situation and leave some -# headroom for OS block level cache. Do never allow your system to swap. -# -# Default value is 0, to disable row caching. -row_cache_size_in_mb: 0 - -# Duration in seconds after which Cassandra should save the row cache. -# Caches are saved to saved_caches_directory as specified in this configuration file. -# -# Saved caches greatly improve cold-start speeds, and is relatively cheap in -# terms of I/O for the key cache. Row cache saving is much more expensive and -# has limited use. -# -# Default is 0 to disable saving the row cache. -row_cache_save_period: 0 - -# Number of keys from the row cache to save. -# Specify 0 (which is the default), meaning all keys are going to be saved -# row_cache_keys_to_save: 100 - -# Maximum size of the counter cache in memory. -# -# Counter cache helps to reduce counter locks' contention for hot counter cells. -# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before -# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration -# of the lock hold, helping with hot counter cell updates, but will not allow skipping -# the read entirely. Only the local (clock, count) tuple of a counter cell is kept -# in memory, not the whole counter, so it's relatively cheap. -# -# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. -# -# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. -# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. -counter_cache_size_in_mb: - -# Duration in seconds after which Cassandra should -# save the counter cache (keys only). Caches are saved to saved_caches_directory as -# specified in this configuration file. -# -# Default is 7200 or 2 hours. -counter_cache_save_period: 7200 - -# Number of keys from the counter cache to save -# Disabled by default, meaning all keys are going to be saved -# counter_cache_keys_to_save: 100 - -# saved caches -# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. -saved_caches_directory: /opt/appscale/cassandra/saved_caches - -# commitlog_sync may be either "periodic" or "batch." -# -# When in batch mode, Cassandra won't ack writes until the commit log -# has been fsynced to disk. It will wait -# commitlog_sync_batch_window_in_ms milliseconds between fsyncs. -# This window should be kept short because the writer threads will -# be unable to do extra work while waiting. (You may need to increase -# concurrent_writes for the same reason.) -# -# commitlog_sync: batch -# commitlog_sync_batch_window_in_ms: 2 -# -# the other option is "periodic" where writes may be acked immediately -# and the CommitLog is simply synced every commitlog_sync_period_in_ms -# milliseconds. -commitlog_sync: periodic -commitlog_sync_period_in_ms: 10000 - -# The size of the individual commitlog file segments. A commitlog -# segment may be archived, deleted, or recycled once all the data -# in it (potentially from each columnfamily in the system) has been -# flushed to sstables. -# -# The default size is 32, which is almost always fine, but if you are -# archiving commitlog segments (see commitlog_archiving.properties), -# then you probably want a finer granularity of archiving; 8 or 16 MB -# is reasonable. -# Max mutation size is also configurable via max_mutation_size_in_kb setting in -# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. -# This should be positive and less than 2048. -# -# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must -# be set to at least twice the size of max_mutation_size_in_kb / 1024 -# -commitlog_segment_size_in_mb: 32 - -# Compression to apply to the commit log. If omitted, the commit log -# will be written uncompressed. LZ4, Snappy, and Deflate compressors -# are supported. -# commitlog_compression: -# - class_name: LZ4Compressor -# parameters: -# - - -# any class that implements the SeedProvider interface and has a -# constructor that takes a Map of parameters will do. -seed_provider: - # Addresses of hosts that are deemed contact points. - # Cassandra nodes use this list of hosts to find each other and learn - # the topology of the ring. You must change this if you are running - # multiple nodes! - - class_name: org.apache.cassandra.locator.SimpleSeedProvider - parameters: - # seeds is actually a comma-delimited list of addresses. - # Ex: ",," - - seeds: "APPSCALE-MASTER" - -# For workloads with more data than can fit in memory, Cassandra's -# bottleneck will be reads that need to fetch data from -# disk. "concurrent_reads" should be set to (16 * number_of_drives) in -# order to allow the operations to enqueue low enough in the stack -# that the OS and drives can reorder them. Same applies to -# "concurrent_counter_writes", since counter writes read the current -# values before incrementing and writing them back. -# -# On the other hand, since writes are almost never IO bound, the ideal -# number of "concurrent_writes" is dependent on the number of cores in -# your system; (8 * number_of_cores) is a good rule of thumb. -concurrent_reads: 32 -concurrent_writes: 32 -concurrent_counter_writes: 32 - -# For materialized view writes, as there is a read involved, so this should -# be limited by the less of concurrent reads or concurrent writes. -concurrent_materialized_view_writes: 32 - -# Maximum memory to use for sstable chunk cache and buffer pooling. -# 32MB of this are reserved for pooling buffers, the rest is used as an -# cache that holds uncompressed sstable chunks. -# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, -# so is in addition to the memory allocated for heap. The cache also has on-heap -# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size -# if the default 64k chunk size is used). -# Memory is only allocated when needed. -# file_cache_size_in_mb: 512 - -# Flag indicating whether to allocate on or off heap when the sstable buffer -# pool is exhausted, that is when it has exceeded the maximum memory -# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. - -# buffer_pool_use_heap_if_exhausted: true - -# The strategy for optimizing disk read -# Possible values are: -# ssd (for solid state disks, the default) -# spinning (for spinning disks) -# disk_optimization_strategy: ssd - -# Total permitted memory to use for memtables. Cassandra will stop -# accepting writes when the limit is exceeded until a flush completes, -# and will trigger a flush based on memtable_cleanup_threshold -# If omitted, Cassandra will set both to 1/4 the size of the heap. -# memtable_heap_space_in_mb: 2048 -# memtable_offheap_space_in_mb: 2048 - -# memtable_cleanup_threshold is deprecated. The default calculation -# is the only reasonable choice. See the comments on memtable_flush_writers -# for more information. -# -# Ratio of occupied non-flushing memtable size to total permitted size -# that will trigger a flush of the largest memtable. Larger mct will -# mean larger flushes and hence less compaction, but also less concurrent -# flush activity which can make it difficult to keep your disks fed -# under heavy write load. -# -# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) -# memtable_cleanup_threshold: 0.11 - -# Specify the way Cassandra allocates and manages memtable memory. -# Options are: -# -# heap_buffers -# on heap nio buffers -# -# offheap_buffers -# off heap (direct) nio buffers -# -# offheap_objects -# off heap objects -memtable_allocation_type: heap_buffers - -# Total space to use for commit logs on disk. -# -# If space gets above this value, Cassandra will flush every dirty CF -# in the oldest segment and remove it. So a small total commitlog space -# will tend to cause more flush activity on less-active columnfamilies. -# -# The default value is the smaller of 8192, and 1/4 of the total space -# of the commitlog volume. -# -# commitlog_total_space_in_mb: 8192 - -# This sets the number of memtable flush writer threads per disk -# as well as the total number of memtables that can be flushed concurrently. -# These are generally a combination of compute and IO bound. -# -# Memtable flushing is more CPU efficient than memtable ingest and a single thread -# can keep up with the ingest rate of a whole server on a single fast disk -# until it temporarily becomes IO bound under contention typically with compaction. -# At that point you need multiple flush threads. At some point in the future -# it may become CPU bound all the time. -# -# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation -# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing -# to free memory. -# -# memtable_flush_writers defaults to two for a single data directory. -# This means that two memtables can be flushed concurrently to the single data directory. -# If you have multiple data directories the default is one memtable flushing at a time -# but the flush will use a thread per data directory so you will get two or more writers. -# -# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. -# Adding more flush writers will result in smaller more frequent flushes that introduce more -# compaction overhead. -# -# There is a direct tradeoff between number of memtables that can be flushed concurrently -# and flush size and frequency. More is not better you just need enough flush writers -# to never stall waiting for flushing to free memory. -# -#memtable_flush_writers: 2 - -# Total space to use for change-data-capture logs on disk. -# -# If space gets above this value, Cassandra will throw WriteTimeoutException -# on Mutations including tables with CDC enabled. A CDCCompactor is responsible -# for parsing the raw CDC logs and deleting them when parsing is completed. -# -# The default value is the min of 4096 mb and 1/8th of the total space -# of the drive where cdc_raw_directory resides. -# cdc_total_space_in_mb: 4096 - -# When we hit our cdc_raw limit and the CDCCompactor is either running behind -# or experiencing backpressure, we check at the following interval to see if any -# new space for cdc-tracked tables has been made available. Default to 250ms -# cdc_free_space_check_interval_ms: 250 - -# A fixed memory pool size in MB for for SSTable index summaries. If left -# empty, this will default to 5% of the heap size. If the memory usage of -# all index summaries exceeds this limit, SSTables with low read rates will -# shrink their index summaries in order to meet this limit. However, this -# is a best-effort process. In extreme conditions Cassandra may need to use -# more than this amount of memory. -index_summary_capacity_in_mb: - -# How frequently index summaries should be resampled. This is done -# periodically to redistribute memory from the fixed-size pool to sstables -# proportional their recent read rates. Setting to -1 will disable this -# process, leaving existing index summaries at their current sampling level. -index_summary_resize_interval_in_minutes: 60 - -# Whether to, when doing sequential writing, fsync() at intervals in -# order to force the operating system to flush the dirty -# buffers. Enable this to avoid sudden dirty buffer flushing from -# impacting read latencies. Almost always a good idea on SSDs; not -# necessarily on platters. -trickle_fsync: false -trickle_fsync_interval_in_kb: 10240 - -# TCP port, for commands and data -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -storage_port: 7000 - -# SSL port, for encrypted communication. Unused unless enabled in -# encryption_options -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -ssl_storage_port: 7001 - -# Address or interface to bind to and tell other Cassandra nodes to connect to. -# You _must_ change this if you want multiple nodes to be able to communicate! -# -# Set listen_address OR listen_interface, not both. -# -# Leaving it blank leaves it up to InetAddress.getLocalHost(). This -# will always do the Right Thing _if_ the node is properly configured -# (hostname, name resolution, etc), and the Right Thing is to use the -# address associated with the hostname (it might not be). -# -# Setting listen_address to 0.0.0.0 is always wrong. -# -listen_address: APPSCALE-LOCAL - -# Set listen_address OR listen_interface, not both. Interfaces must correspond -# to a single address, IP aliasing is not supported. -# listen_interface: eth0 - -# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address -# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 -# address will be used. If true the first ipv6 address will be used. Defaults to false preferring -# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. -# listen_interface_prefer_ipv6: false - -# Address to broadcast to other Cassandra nodes -# Leaving this blank will set it to the same value as listen_address -# broadcast_address: 1.2.3.4 - -# When using multiple physical network interfaces, set this -# to true to listen on broadcast_address in addition to -# the listen_address, allowing nodes to communicate in both -# interfaces. -# Ignore this property if the network configuration automatically -# routes between the public and private networks such as EC2. -# listen_on_broadcast_address: false - -# Internode authentication backend, implementing IInternodeAuthenticator; -# used to allow/disallow connections from peer nodes. -# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator - -# Whether to start the native transport server. -# Please note that the address on which the native transport is bound is the -# same as the rpc_address. The port however is different and specified below. -start_native_transport: true -# port for the CQL native transport to listen for clients on -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -native_transport_port: 9042 -# Enabling native transport encryption in client_encryption_options allows you to either use -# encryption for the standard port or to use a dedicated, additional port along with the unencrypted -# standard native_transport_port. -# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption -# for native_transport_port. Setting native_transport_port_ssl to a different value -# from native_transport_port will use encryption for native_transport_port_ssl while -# keeping native_transport_port unencrypted. -# native_transport_port_ssl: 9142 -# The maximum threads for handling requests when the native transport is used. -# This is similar to rpc_max_threads though the default differs slightly (and -# there is no native_transport_min_threads, idle threads will always be stopped -# after 30 seconds). -# native_transport_max_threads: 128 -# -# The maximum size of allowed frame. Frame (requests) larger than this will -# be rejected as invalid. The default is 256MB. If you're changing this parameter, -# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. -# native_transport_max_frame_size_in_mb: 256 - -# The maximum number of concurrent client connections. -# The default is -1, which means unlimited. -# native_transport_max_concurrent_connections: -1 - -# The maximum number of concurrent client connections per source ip. -# The default is -1, which means unlimited. -# native_transport_max_concurrent_connections_per_ip: -1 - -# Whether to start the thrift rpc server. -start_rpc: false - -# The address or interface to bind the Thrift RPC service and native transport -# server to. -# -# Set rpc_address OR rpc_interface, not both. -# -# Leaving rpc_address blank has the same effect as on listen_address -# (i.e. it will be based on the configured hostname of the node). -# -# Note that unlike listen_address, you can specify 0.0.0.0, but you must also -# set broadcast_rpc_address to a value other than 0.0.0.0. -# -# For security reasons, you should not expose this port to the internet. Firewall it if needed. -rpc_address: APPSCALE-LOCAL - -# Set rpc_address OR rpc_interface, not both. Interfaces must correspond -# to a single address, IP aliasing is not supported. -# rpc_interface: eth1 - -# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address -# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 -# address will be used. If true the first ipv6 address will be used. Defaults to false preferring -# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. -# rpc_interface_prefer_ipv6: false - -# port for Thrift to listen for clients on -rpc_port: 9160 - -# RPC address to broadcast to drivers and other Cassandra nodes. This cannot -# be set to 0.0.0.0. If left blank, this will be set to the value of -# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must -# be set. -# broadcast_rpc_address: 1.2.3.4 - -# enable or disable keepalive on rpc/native connections -rpc_keepalive: true - -# Cassandra provides two out-of-the-box options for the RPC Server: -# -# sync -# One thread per thrift connection. For a very large number of clients, memory -# will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size -# per thread, and that will correspond to your use of virtual memory (but physical memory -# may be limited depending on use of stack space). -# -# hsha -# Stands for "half synchronous, half asynchronous." All thrift clients are handled -# asynchronously using a small number of threads that does not vary with the amount -# of thrift clients (and thus scales well to many clients). The rpc requests are still -# synchronous (one thread per active request). If hsha is selected then it is essential -# that rpc_max_threads is changed from the default value of unlimited. -# -# The default is sync because on Windows hsha is about 30% slower. On Linux, -# sync/hsha performance is about the same, with hsha of course using less memory. -# -# Alternatively, can provide your own RPC server by providing the fully-qualified class name -# of an o.a.c.t.TServerFactory that can create an instance of it. -rpc_server_type: sync - -# Uncomment rpc_min|max_thread to set request pool size limits. -# -# Regardless of your choice of RPC server (see above), the number of maximum requests in the -# RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync -# RPC server, it also dictates the number of clients that can be connected at all). -# -# The default is unlimited and thus provides no protection against clients overwhelming the server. You are -# encouraged to set a maximum that makes sense for you in production, but do keep in mind that -# rpc_max_threads represents the maximum number of client requests this server may execute concurrently. -# -# rpc_min_threads: 16 -# rpc_max_threads: 2048 - -# uncomment to set socket buffer sizes on rpc connections -# rpc_send_buff_size_in_bytes: -# rpc_recv_buff_size_in_bytes: - -# Uncomment to set socket buffer size for internode communication -# Note that when setting this, the buffer size is limited by net.core.wmem_max -# and when not setting it it is defined by net.ipv4.tcp_wmem -# See also: -# /proc/sys/net/core/wmem_max -# /proc/sys/net/core/rmem_max -# /proc/sys/net/ipv4/tcp_wmem -# /proc/sys/net/ipv4/tcp_wmem -# and 'man tcp' -# internode_send_buff_size_in_bytes: - -# Uncomment to set socket buffer size for internode communication -# Note that when setting this, the buffer size is limited by net.core.wmem_max -# and when not setting it it is defined by net.ipv4.tcp_wmem -# internode_recv_buff_size_in_bytes: - -# Frame size for thrift (maximum message length). -thrift_framed_transport_size_in_mb: 15 - -# Set to true to have Cassandra create a hard link to each sstable -# flushed or streamed locally in a backups/ subdirectory of the -# keyspace data. Removing these links is the operator's -# responsibility. -incremental_backups: false - -# Whether or not to take a snapshot before each compaction. Be -# careful using this option, since Cassandra won't clean up the -# snapshots for you. Mostly useful if you're paranoid when there -# is a data format change. -snapshot_before_compaction: false - -# Whether or not a snapshot is taken of the data before keyspace truncation -# or dropping of column families. The STRONGLY advised default of true -# should be used to provide data safety. If you set this flag to false, you will -# lose data on truncation or drop. -auto_snapshot: true - -# Granularity of the collation index of rows within a partition. -# Increase if your rows are large, or if you have a very large -# number of rows per partition. The competing goals are these: -# -# - a smaller granularity means more index entries are generated -# and looking up rows withing the partition by collation column -# is faster -# - but, Cassandra will keep the collation index in memory for hot -# rows (as part of the key cache), so a larger granularity means -# you can cache more hot rows -column_index_size_in_kb: 64 - -# Per sstable indexed key cache entries (the collation index in memory -# mentioned above) exceeding this size will not be held on heap. -# This means that only partition information is held on heap and the -# index entries are read from disk. -# -# Note that this size refers to the size of the -# serialized index information and not the size of the partition. -column_index_cache_size_in_kb: 2 - -# Number of simultaneous compactions to allow, NOT including -# validation "compactions" for anti-entropy repair. Simultaneous -# compactions can help preserve read performance in a mixed read/write -# workload, by mitigating the tendency of small sstables to accumulate -# during a single long running compactions. The default is usually -# fine and if you experience problems with compaction running too -# slowly or too fast, you should look at -# compaction_throughput_mb_per_sec first. -# -# concurrent_compactors defaults to the smaller of (number of disks, -# number of cores), with a minimum of 2 and a maximum of 8. -# -# If your data directories are backed by SSD, you should increase this -# to the number of cores. -#concurrent_compactors: 1 - -# Throttles compaction to the given total throughput across the entire -# system. The faster you insert data, the faster you need to compact in -# order to keep the sstable count down, but in general, setting this to -# 16 to 32 times the rate you are inserting data is more than sufficient. -# Setting this to 0 disables throttling. Note that this account for all types -# of compaction, including validation compaction. -compaction_throughput_mb_per_sec: 16 - -# When compacting, the replacement sstable(s) can be opened before they -# are completely written, and used in place of the prior sstables for -# any range that has been written. This helps to smoothly transfer reads -# between the sstables, reducing page cache churn and keeping hot rows hot -sstable_preemptive_open_interval_in_mb: 50 - -# Throttles all outbound streaming file transfers on this node to the -# given total throughput in Mbps. This is necessary because Cassandra does -# mostly sequential IO when streaming data during bootstrap or repair, which -# can lead to saturating the network connection and degrading rpc performance. -# When unset, the default is 200 Mbps or 25 MB/s. -# stream_throughput_outbound_megabits_per_sec: 200 - -# Throttles all streaming file transfer between the datacenters, -# this setting allows users to throttle inter dc stream throughput in addition -# to throttling all network stream traffic as configured with -# stream_throughput_outbound_megabits_per_sec -# When unset, the default is 200 Mbps or 25 MB/s -# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 - -# How long the coordinator should wait for read operations to complete -read_request_timeout_in_ms: 5000 -# How long the coordinator should wait for seq or index scans to complete -range_request_timeout_in_ms: 10000 -# How long the coordinator should wait for writes to complete -write_request_timeout_in_ms: 2000 -# How long the coordinator should wait for counter writes to complete -counter_write_request_timeout_in_ms: 5000 -# How long a coordinator should continue to retry a CAS operation -# that contends with other proposals for the same row -cas_contention_timeout_in_ms: 1000 -# How long the coordinator should wait for truncates to complete -# (This can be much longer, because unless auto_snapshot is disabled -# we need to flush first so we can snapshot before removing the data.) -truncate_request_timeout_in_ms: 60000 -# The default timeout for other, miscellaneous operations -request_timeout_in_ms: 10000 - -# How long before a node logs slow queries. Select queries that take longer than -# this timeout to execute, will generate an aggregated log message, so that slow queries -# can be identified. Set this value to zero to disable slow query logging. -slow_query_log_timeout_in_ms: 500 - -# Enable operation timeout information exchange between nodes to accurately -# measure request timeouts. If disabled, replicas will assume that requests -# were forwarded to them instantly by the coordinator, which means that -# under overload conditions we will waste that much extra time processing -# already-timed-out requests. -# -# Warning: before enabling this property make sure to ntp is installed -# and the times are synchronized between the nodes. -cross_node_timeout: false - -# Set keep-alive period for streaming -# This node will send a keep-alive message periodically with this period. -# If the node does not receive a keep-alive message from the peer for -# 2 keep-alive cycles the stream session times out and fail -# Default value is 300s (5 minutes), which means stalled stream -# times out in 10 minutes by default -# streaming_keep_alive_period_in_secs: 300 - -# phi value that must be reached for a host to be marked down. -# most users should never need to adjust this. -# phi_convict_threshold: 8 - -# endpoint_snitch -- Set this to a class that implements -# IEndpointSnitch. The snitch has two functions: -# -# - it teaches Cassandra enough about your network topology to route -# requests efficiently -# - it allows Cassandra to spread replicas around your cluster to avoid -# correlated failures. It does this by grouping machines into -# "datacenters" and "racks." Cassandra will do its best not to have -# more than one replica on the same "rack" (which may not actually -# be a physical location) -# -# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH -# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. -# This means that if you start with the default SimpleSnitch, which -# locates every node on "rack1" in "datacenter1", your only options -# if you need to add another datacenter are GossipingPropertyFileSnitch -# (and the older PFS). From there, if you want to migrate to an -# incompatible snitch like Ec2Snitch you can do it by adding new nodes -# under Ec2Snitch (which will locate them in a new "datacenter") and -# decommissioning the old ones. -# -# Out of the box, Cassandra provides: -# -# SimpleSnitch: -# Treats Strategy order as proximity. This can improve cache -# locality when disabling read repair. Only appropriate for -# single-datacenter deployments. -# -# GossipingPropertyFileSnitch -# This should be your go-to snitch for production use. The rack -# and datacenter for the local node are defined in -# cassandra-rackdc.properties and propagated to other nodes via -# gossip. If cassandra-topology.properties exists, it is used as a -# fallback, allowing migration from the PropertyFileSnitch. -# -# PropertyFileSnitch: -# Proximity is determined by rack and data center, which are -# explicitly configured in cassandra-topology.properties. -# -# Ec2Snitch: -# Appropriate for EC2 deployments in a single Region. Loads Region -# and Availability Zone information from the EC2 API. The Region is -# treated as the datacenter, and the Availability Zone as the rack. -# Only private IPs are used, so this will not work across multiple -# Regions. -# -# Ec2MultiRegionSnitch: -# Uses public IPs as broadcast_address to allow cross-region -# connectivity. (Thus, you should set seed addresses to the public -# IP as well.) You will need to open the storage_port or -# ssl_storage_port on the public IP firewall. (For intra-Region -# traffic, Cassandra will switch to the private IP after -# establishing a connection.) -# -# RackInferringSnitch: -# Proximity is determined by rack and data center, which are -# assumed to correspond to the 3rd and 2nd octet of each node's IP -# address, respectively. Unless this happens to match your -# deployment conventions, this is best used as an example of -# writing a custom Snitch class and is provided in that spirit. -# -# You can use a custom Snitch by setting this to the full class name -# of the snitch, which will be assumed to be on your classpath. -endpoint_snitch: SimpleSnitch - -# controls how often to perform the more expensive part of host score -# calculation -dynamic_snitch_update_interval_in_ms: 100 -# controls how often to reset all host scores, allowing a bad host to -# possibly recover -dynamic_snitch_reset_interval_in_ms: 600000 -# if set greater than zero and read_repair_chance is < 1.0, this will allow -# 'pinning' of replicas to hosts in order to increase cache capacity. -# The badness threshold will control how much worse the pinned host has to be -# before the dynamic snitch will prefer other replicas over it. This is -# expressed as a double which represents a percentage. Thus, a value of -# 0.2 means Cassandra would continue to prefer the static snitch values -# until the pinned host was 20% worse than the fastest. -dynamic_snitch_badness_threshold: 0.1 - -# request_scheduler -- Set this to a class that implements -# RequestScheduler, which will schedule incoming client requests -# according to the specific policy. This is useful for multi-tenancy -# with a single Cassandra cluster. -# NOTE: This is specifically for requests from the client and does -# not affect inter node communication. -# org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place -# org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of -# client requests to a node with a separate queue for each -# request_scheduler_id. The scheduler is further customized by -# request_scheduler_options as described below. -request_scheduler: org.apache.cassandra.scheduler.NoScheduler - -# Scheduler Options vary based on the type of scheduler -# -# NoScheduler -# Has no options -# -# RoundRobin -# throttle_limit -# The throttle_limit is the number of in-flight -# requests per client. Requests beyond -# that limit are queued up until -# running requests can complete. -# The value of 80 here is twice the number of -# concurrent_reads + concurrent_writes. -# default_weight -# default_weight is optional and allows for -# overriding the default which is 1. -# weights -# Weights are optional and will default to 1 or the -# overridden default_weight. The weight translates into how -# many requests are handled during each turn of the -# RoundRobin, based on the scheduler id. -# -# request_scheduler_options: -# throttle_limit: 80 -# default_weight: 5 -# weights: -# Keyspace1: 1 -# Keyspace2: 5 - -# request_scheduler_id -- An identifier based on which to perform -# the request scheduling. Currently the only valid option is keyspace. -# request_scheduler_id: keyspace - -# Enable or disable inter-node encryption -# JVM defaults for supported SSL socket protocols and cipher suites can -# be replaced using custom encryption options. This is not recommended -# unless you have policies in place that dictate certain settings, or -# need to disable vulnerable ciphers or protocols in case the JVM cannot -# be updated. -# FIPS compliant settings can be configured at JVM level and should not -# involve changing encryption settings here: -# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html -# *NOTE* No custom encryption options are enabled at the moment -# The available internode options are : all, none, dc, rack -# -# If set to dc cassandra will encrypt the traffic between the DCs -# If set to rack cassandra will encrypt the traffic between the racks -# -# The passwords used in these options must match the passwords used when generating -# the keystore and truststore. For instructions on generating these files, see: -# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore -# -server_encryption_options: - internode_encryption: none - keystore: conf/.keystore - keystore_password: cassandra - truststore: conf/.truststore - truststore_password: cassandra - # More advanced defaults below: - # protocol: TLS - # algorithm: SunX509 - # store_type: JKS - # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] - # require_client_auth: false - # require_endpoint_verification: false - -# enable or disable client/server encryption. -client_encryption_options: - enabled: false - # If enabled and optional is set to true encrypted and unencrypted connections are handled. - optional: false - keystore: conf/.keystore - keystore_password: cassandra - # require_client_auth: false - # Set trustore and truststore_password if require_client_auth is true - # truststore: conf/.truststore - # truststore_password: cassandra - # More advanced defaults below: - # protocol: TLS - # algorithm: SunX509 - # store_type: JKS - # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] - -# internode_compression controls whether traffic between nodes is -# compressed. -# Can be: -# -# all -# all traffic is compressed -# -# dc -# traffic between different datacenters is compressed -# -# none -# nothing is compressed. -internode_compression: dc - -# Enable or disable tcp_nodelay for inter-dc communication. -# Disabling it will result in larger (but fewer) network packets being sent, -# reducing overhead from the TCP protocol itself, at the cost of increasing -# latency if you block for cross-datacenter responses. -inter_dc_tcp_nodelay: false - -# TTL for different trace types used during logging of the repair process. -tracetype_query_ttl: 86400 -tracetype_repair_ttl: 604800 - -# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level -# This threshold can be adjusted to minimize logging if necessary -# gc_log_threshold_in_ms: 200 - -# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at -# INFO level -# UDFs (user defined functions) are disabled by default. -# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. -enable_user_defined_functions: false - -# Enables scripted UDFs (JavaScript UDFs). -# Java UDFs are always enabled, if enable_user_defined_functions is true. -# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. -# This option has no effect, if enable_user_defined_functions is false. -enable_scripted_user_defined_functions: false - -# Enables materialized view creation on this node. -# Materialized views are considered experimental and are not recommended for production use. -enable_materialized_views: true - -# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. -# Lowering this value on Windows can provide much tighter latency and better throughput, however -# some virtualized environments may see a negative performance impact from changing this setting -# below their system default. The sysinternals 'clockres' tool can confirm your system's default -# setting. -windows_timer_interval: 1 - - -# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from -# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by -# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys -# can still (and should!) be in the keystore and will be used on decrypt operations -# (to handle the case of key rotation). -# -# It is strongly recommended to download and install Java Cryptography Extension (JCE) -# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. -# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) -# -# Currently, only the following file types are supported for transparent data encryption, although -# more are coming in future cassandra releases: commitlog, hints -transparent_data_encryption_options: - enabled: false - chunk_length_kb: 64 - cipher: AES/CBC/PKCS5Padding - key_alias: testing:1 - # CBC IV length for AES needs to be 16 bytes (which is also the default size) - # iv_length: 16 - key_provider: - - class_name: org.apache.cassandra.security.JKSKeyProvider - parameters: - - keystore: conf/.keystore - keystore_password: cassandra - store_type: JCEKS - key_password: cassandra - - -##################### -# SAFETY THRESHOLDS # -##################### - -# When executing a scan, within or across a partition, we need to keep the -# tombstones seen in memory so we can return them to the coordinator, which -# will use them to make sure other replicas also know about the deleted rows. -# With workloads that generate a lot of tombstones, this can cause performance -# problems and even exaust the server heap. -# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) -# Adjust the thresholds here if you understand the dangers and want to -# scan more tombstones anyway. These thresholds may also be adjusted at runtime -# using the StorageService mbean. -tombstone_warn_threshold: 1000 -tombstone_failure_threshold: 100000 - -# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. -# Caution should be taken on increasing the size of this threshold as it can lead to node instability. -batch_size_warn_threshold_in_kb: 5 - -# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. -batch_size_fail_threshold_in_kb: 50 - -# Log WARN on any batches not of type LOGGED than span across more partitions than this limit -unlogged_batch_across_partitions_warn_threshold: 10 - -# Log a warning when compacting partitions larger than this value -compaction_large_partition_warning_threshold_mb: 100 - -# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level -# Adjust the threshold based on your application throughput requirement -# By default, Cassandra logs GC Pauses greater than 200 ms at INFO level -gc_warn_threshold_in_ms: 1000 - -# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption -# early. Any value size larger than this threshold will result into marking an SSTable -# as corrupted. This should be positive and less than 2048. -# max_value_size_in_mb: 256 - -# Back-pressure settings # -# If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation -# sent to replicas, with the aim of reducing pressure on overloaded replicas. -back_pressure_enabled: false -# The back-pressure strategy applied. -# The default implementation, RateBasedBackPressure, takes three arguments: -# high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. -# If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor; -# if above high ratio, the rate limiting is increased by the given factor; -# such factor is usually best configured between 1 and 10, use larger values for a faster recovery -# at the expense of potentially more dropped mutations; -# the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica, -# if SLOW at the speed of the slowest one. -# New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and -# provide a public constructor accepting a Map. -back_pressure_strategy: - - class_name: org.apache.cassandra.net.RateBasedBackPressure - parameters: - - high_ratio: 0.90 - factor: 5 - flow: FAST - -# Coalescing Strategies # -# Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). -# On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in -# virtualized environments, the point at which an application can be bound by network packet processing can be -# surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal -# doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process -# is sufficient for many applications such that no load starvation is experienced even without coalescing. -# There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages -# per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one -# trip to read from a socket, and all the task submission work can be done at the same time reducing context switching -# and increasing cache friendliness of network message processing. -# See CASSANDRA-8692 for details. - -# Strategy to use for coalescing messages in OutboundTcpConnection. -# Can be fixed, movingaverage, timehorizon, disabled (default). -# You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. -# otc_coalescing_strategy: DISABLED - -# How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first -# message is received before it will be sent with any accompanying messages. For moving average this is the -# maximum amount of time that will be waited as well as the interval at which messages must arrive on average -# for coalescing to be enabled. -# otc_coalescing_window_us: 200 - -# Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. -# otc_coalescing_enough_coalesced_messages: 8 - -# How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. -# Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory -# taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value -# will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU -# time and queue contention while iterating the backlog of messages. -# An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. -# -# otc_backlog_expiration_interval_ms: 200 diff --git a/AppDB/appscale/datastore/cassandra_env/tornado_cassandra.py b/AppDB/appscale/datastore/cassandra_env/tornado_cassandra.py deleted file mode 100644 index 895ecbf724..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/tornado_cassandra.py +++ /dev/null @@ -1,85 +0,0 @@ -""" A wrapper that converts Cassandra futures to Tornado futures. """ -import logging -from tornado.concurrent import Future as TornadoFuture -from tornado.ioloop import IOLoop - -logger = logging.getLogger(__name__) - - -class TornadoCassandra(object): - """ A wrapper that converts Cassandra futures to Tornado futures. """ - - def __init__(self, session): - """ Create a new TornadoCassandra manager. - - Args: - session: A Cassandra driver session. - """ - self._session = session - - def execute(self, query, parameters=None, *args, **kwargs): - """ Runs a Cassandra query asynchronously. - - Returns: - A Tornado future. - """ - tornado_future = TornadoFuture() - io_loop = IOLoop.current() - cassandra_future = self._session.execute_async( - query, parameters, *args, **kwargs) - - # This list is passed around in order to collect each page of results. - results = [] - cassandra_future.add_callbacks( - self._handle_page, self._handle_failure, - callback_args=(io_loop, tornado_future, cassandra_future, results), - errback_args=(io_loop, tornado_future, query) - ) - return tornado_future - - @staticmethod - def _handle_page(page_results, io_loop, tornado_future, cassandra_future, - all_results): - """ Processes a page from a Cassandra statement and finalizes the Tornado - future upon statement completion. - - Args: - page_results: A list of the page's result rows - (limited version of ResultSet). - io_loop: An instance of tornado IOLoop where execute was initially called. - tornado_future: A Tornado future. - cassandra_future: A Cassandra future containing ResultSet. - all_results: The complete list of results collected so far. - """ - try: - all_results.extend(page_results) - except TypeError: - # page_results are not iterable for insert statements. - pass - - if cassandra_future.has_more_pages: - cassandra_future.start_fetching_next_page() - logger.debug("Fetching next page of cassandra response") - return - - # When possible, this should use the ResultSet object to preserve all the - # attributes. When the ResultSet does not contain all the results, use a - # bare list of results. - if page_results is not None and len(all_results) > len(page_results): - io_loop.add_callback(tornado_future.set_result, all_results) - else: - result = cassandra_future.result() - io_loop.add_callback(tornado_future.set_result, result) - - @staticmethod - def _handle_failure(error, io_loop, tornado_future, query): - """ Assigns the Cassandra exception to the Tornado future. - - Args: - error: A Python exception. - io_loop: An instance of tornado IOLoop where execute was initially called. - tornado_future: A Tornado future. - query: An instance of Cassandra query. - """ - logger.error(u"Failed to run query: {} ({})".format(query, error)) - io_loop.add_callback(tornado_future.set_exception, error) diff --git a/AppDB/appscale/datastore/cassandra_env/utils.py b/AppDB/appscale/datastore/cassandra_env/utils.py deleted file mode 100644 index 461591314c..0000000000 --- a/AppDB/appscale/datastore/cassandra_env/utils.py +++ /dev/null @@ -1,187 +0,0 @@ -""" Helper functions for the Cassandra datastore implementation. """ -from .. import dbconstants -from .. import helper_functions -from ..dbconstants import Operations -from ..utils import ( - clean_app_id, - encode_index_pb, - get_composite_index_keys, - get_composite_indexes_rows, - get_entity_key, - get_entity_kind, - get_index_kv_from_tuple, - get_kind_key -) - - -def deletions_for_entity(entity, composite_indices=()): - """ Get a list of deletions needed across tables for deleting an entity. - - Args: - entity: An entity object. - composite_indices: A list or tuple of composite indices. - Returns: - A list of dictionaries representing mutation operations. - """ - deletions = [] - app_id = clean_app_id(entity.key().app()) - namespace = entity.key().name_space() - prefix = dbconstants.KEY_DELIMITER.join([app_id, namespace]) - - asc_rows = get_index_kv_from_tuple([(prefix, entity)]) - for entry in asc_rows: - deletions.append({'table': dbconstants.ASC_PROPERTY_TABLE, - 'key': entry[0], - 'operation': Operations.DELETE}) - - dsc_rows = get_index_kv_from_tuple( - [(prefix, entity)], reverse=True) - for entry in dsc_rows: - deletions.append({'table': dbconstants.DSC_PROPERTY_TABLE, - 'key': entry[0], - 'operation': Operations.DELETE}) - - for key in get_composite_indexes_rows([entity], composite_indices): - deletions.append({'table': dbconstants.COMPOSITE_TABLE, - 'key': key, - 'operation': Operations.DELETE}) - - entity_key = get_entity_key(prefix, entity.key().path()) - deletions.append({'table': dbconstants.APP_ENTITY_TABLE, - 'key': entity_key, - 'operation': Operations.DELETE}) - - kind_key = get_kind_key(prefix, entity.key().path()) - deletions.append({'table': dbconstants.APP_KIND_TABLE, - 'key': kind_key, - 'operation': Operations.DELETE}) - - return deletions - - -def index_deletions(old_entity, new_entity, composite_indices=()): - """ Get a list of index deletions needed for updating an entity. For changing - an existing entity, this involves examining the property list of both - entities to see which index entries need to be removed. - - Args: - old_entity: An entity object. - new_entity: An entity object. - composite_indices: A list or tuple of composite indices. - Returns: - A list of dictionaries representing mutation operations. - """ - deletions = [] - app_id = clean_app_id(old_entity.key().app()) - namespace = old_entity.key().name_space() - kind = get_entity_kind(old_entity.key()) - entity_key = str(encode_index_pb(old_entity.key().path())) - - new_props = {} - for prop in new_entity.property_list(): - if prop.name() not in new_props: - new_props[prop.name()] = [] - new_props[prop.name()].append(prop) - - changed_props = {} - for prop in old_entity.property_list(): - if prop.name() in new_props and prop in new_props[prop.name()]: - continue - - if prop.name() not in changed_props: - changed_props[prop.name()] = [] - changed_props[prop.name()].append(prop) - - value = str(encode_index_pb(prop.value())) - - key = dbconstants.KEY_DELIMITER.join( - [app_id, namespace, kind, prop.name(), value, entity_key]) - deletions.append({'table': dbconstants.ASC_PROPERTY_TABLE, - 'key': key, - 'operation': Operations.DELETE}) - - reverse_key = dbconstants.KEY_DELIMITER.join( - [app_id, namespace, kind, prop.name(), - helper_functions.reverse_lex(value), entity_key]) - deletions.append({'table': dbconstants.DSC_PROPERTY_TABLE, - 'key': reverse_key, - 'operation': Operations.DELETE}) - - changed_prop_names = set(changed_props.keys()) - for index in composite_indices: - if index.definition().entity_type() != kind: - continue - - index_props = set(prop.name() for prop - in index.definition().property_list()) - if index_props.isdisjoint(changed_prop_names): - continue - - old_entries = set(get_composite_index_keys(index, old_entity)) - new_entries = set(get_composite_index_keys(index, new_entity)) - for entry in (old_entries - new_entries): - deletions.append({'table': dbconstants.COMPOSITE_TABLE, - 'key': entry, - 'operation': Operations.DELETE}) - - return deletions - - -def mutations_for_entity(entity, txn, current_value=None, - composite_indices=()): - """ Get a list of mutations needed across tables for an entity change. - - Args: - entity: An entity object. - txn: A transaction ID handler. - current_value: The entity object currently stored. - composite_indices: A list of composite indices for the entity kind. - Returns: - A list of dictionaries representing mutations. - """ - mutations = [] - if current_value is not None: - mutations.extend( - index_deletions(current_value, entity, composite_indices)) - - app_id = clean_app_id(entity.key().app()) - namespace = entity.key().name_space() - encoded_path = str(encode_index_pb(entity.key().path())) - prefix = dbconstants.KEY_DELIMITER.join([app_id, namespace]) - entity_key = dbconstants.KEY_DELIMITER.join([prefix, encoded_path]) - entity_value = {dbconstants.APP_ENTITY_SCHEMA[0]: entity.Encode(), - dbconstants.APP_ENTITY_SCHEMA[1]: str(txn)} - mutations.append({'table': dbconstants.APP_ENTITY_TABLE, - 'key': entity_key, - 'operation': Operations.PUT, - 'values': entity_value}) - - reference_value = {'reference': entity_key} - - kind_key = get_kind_key(prefix, entity.key().path()) - mutations.append({'table': dbconstants.APP_KIND_TABLE, - 'key': kind_key, - 'operation': Operations.PUT, - 'values': reference_value}) - - asc_rows = get_index_kv_from_tuple([(prefix, entity)]) - for entry in asc_rows: - mutations.append({'table': dbconstants.ASC_PROPERTY_TABLE, - 'key': entry[0], - 'operation': Operations.PUT, - 'values': reference_value}) - - dsc_rows = get_index_kv_from_tuple([(prefix, entity)], reverse=True) - for entry in dsc_rows: - mutations.append({'table': dbconstants.DSC_PROPERTY_TABLE, - 'key': entry[0], - 'operation': Operations.PUT, - 'values': reference_value}) - - for key in get_composite_indexes_rows([entity], composite_indices): - mutations.append({'table': dbconstants.COMPOSITE_TABLE, - 'key': key, - 'operation': Operations.PUT, - 'values': reference_value}) - - return mutations diff --git a/AppDB/appscale/datastore/datastore_distributed.py b/AppDB/appscale/datastore/datastore_distributed.py deleted file mode 100644 index 012d888d6b..0000000000 --- a/AppDB/appscale/datastore/datastore_distributed.py +++ /dev/null @@ -1,3309 +0,0 @@ -import array -import datetime -import itertools -import logging -import md5 -import sys -import uuid - -from tornado import gen -from tornado.ioloop import IOLoop - -from appscale.datastore import dbconstants, helper_functions - -from appscale.common.datastore_index import DatastoreIndex, merge_indexes -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from kazoo.client import KazooState -from appscale.datastore.dbconstants import ( - APP_ENTITY_SCHEMA, BadRequest, ID_KEY_LENGTH, InternalError, MAX_TX_DURATION, - Timeout -) -from appscale.datastore.cassandra_env.cassandra_interface import ( - batch_size, LARGE_BATCH_THRESHOLD) -from appscale.datastore.cassandra_env.entity_id_allocator import EntityIDAllocator -from appscale.datastore.cassandra_env.entity_id_allocator import ScatteredAllocator -from appscale.datastore.cassandra_env.large_batch import BatchNotApplied -from appscale.datastore.cassandra_env.utils import deletions_for_entity -from appscale.datastore.cassandra_env.utils import mutations_for_entity -from appscale.datastore.index_manager import IndexInaccessible -from appscale.datastore.taskqueue_client import EnqueueError, TaskQueueClient -from appscale.datastore.utils import _FindIndexToUse -from appscale.datastore.utils import clean_app_id -from appscale.datastore.utils import decode_path -from appscale.datastore.utils import encode_entity_table_key -from appscale.datastore.utils import encode_index_pb -from appscale.datastore.utils import encode_path_from_filter -from appscale.datastore.utils import get_composite_index_keys -from appscale.datastore.utils import get_entity_key -from appscale.datastore.utils import get_entity_kind -from appscale.datastore.utils import get_index_key_from_params -from appscale.datastore.utils import get_kind_key -from appscale.datastore.utils import group_for_key -from appscale.datastore.utils import kind_from_encoded_key -from appscale.datastore.utils import reference_property_to_reference -from appscale.datastore.utils import UnprocessedQueryCursor -from appscale.datastore.range_iterator import RangeExhausted, RangeIterator -from appscale.datastore.zkappscale import entity_lock -from appscale.datastore.zkappscale import zktransaction - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.api import datastore_errors -from google.appengine.api.datastore_distributed import _MAX_ACTIONS_PER_TXN -from google.appengine.datastore import appscale_stub_util -from google.appengine.datastore import datastore_pb -from google.appengine.datastore import datastore_index -from google.appengine.datastore import entity_pb -from google.appengine.datastore import sortable_pb_encoder -from google.appengine.datastore.datastore_stub_util import IdToCounter -from google.appengine.datastore.datastore_stub_util import SEQUENTIAL -from google.appengine.runtime import apiproxy_errors -from google.appengine.ext import db -from google.appengine.ext.db.metadata import Namespace -from google.net.proto.ProtocolBuffer import ProtocolBufferDecodeError - -logger = logging.getLogger(__name__) - - -class DatastoreDistributed(): - """ AppScale persistent layer for the datastore API. It is the - replacement for the AppServers to persist their data into - a distributed datastore instead of a flat file. - """ - # Max number of results for a query - _MAXIMUM_RESULTS = 10000 - - # Maximum amount of filter and orderings allowed within a query - _MAX_QUERY_COMPONENTS = 63 - - # For enabling and disabling range inclusivity - _ENABLE_INCLUSIVITY = True - _DISABLE_INCLUSIVITY = False - - # Delimiter between app names and namespace and the rest of an entity key - _NAMESPACE_SEPARATOR = dbconstants.KEY_DELIMITER - - # Delimiter between parameters in index keys. - _SEPARATOR = dbconstants.KEY_DELIMITER - - # This is the terminating string for range queries - _TERM_STRING = dbconstants.TERMINATING_STRING - - # Smallest possible value that is considered non-null and indexable. - MIN_INDEX_VALUE = '\x01' - - # When assigning the first allocated ID, give this value - _FIRST_VALID_ALLOCATED_ID = 1 - - # The key we use to lock for allocating new IDs - _ALLOCATE_ROOT_KEY = "__allocate__" - - # Number of times to retry acquiring a lock for non transactions. - NON_TRANS_LOCK_RETRY_COUNT = 5 - - # How long to wait before retrying to grab a lock - LOCK_RETRY_TIME = .5 - - # Maximum number of allowed composite indexes any one application can - # register. - _MAX_NUM_INDEXES = dbconstants.MAX_NUMBER_OF_COMPOSITE_INDEXES - - # The position of the prop name when splitting an index entry by the - # delimiter. - PROP_NAME_IN_SINGLE_PROP_INDEX = 3 - - # The cassandra index column that stores the reference to the entity. - INDEX_REFERENCE_COLUMN = 'reference' - - # The number of entities to fetch at a time when updating indices. - BATCH_SIZE = 100 - - def __init__(self, datastore_batch, transaction_manager, zookeeper=None, - log_level=logging.INFO, taskqueue_locations=()): - """ - Constructor. - - Args: - datastore_batch: A reference to the batch datastore interface. - zookeeper: A reference to the zookeeper interface. - """ - class_name = self.__class__.__name__ - self.logger = logging.getLogger(class_name) - self.logger.setLevel(log_level) - - assert datastore_batch.valid_data_version_sync() - - self.logger.info('Starting {}'.format(class_name)) - - # datastore accessor used by this class to do datastore operations. - self.datastore_batch = datastore_batch - - # zookeeper instance for accesing ZK functionality. - self.zookeeper = zookeeper - - # Maintain a scattered allocator for each project. - self.scattered_allocators = {} - - # Maintain a sequential allocator for each project. - self.sequential_allocators = {} - - self.taskqueue_client = TaskQueueClient(taskqueue_locations) - self.transaction_manager = transaction_manager - self.index_manager = None - self.zookeeper.handle.add_listener(self._zk_state_listener) - - def get_limit(self, query): - """ Returns the limit that should be used for the given query. - - Args: - query: A datastore_pb.Query. - Returns: - A tuple containing the number of entities the datastore should retrieve - and a boolean indicating whether or not the datastore should check for - results beyond what it returns. - """ - # The datastore should check for more results beyond what it returns when - # the current request is not able to satisfy the full query. This can - # happen during batch queries if "count" is less than the full query's - # limit. It can also happen when the limit is greater than the maximum - # results that the datastore returns per request. - check_more_results = False - limit = None - if query.has_limit(): - limit = query.limit() - - if query.has_count() and (limit is None or limit > query.count()): - check_more_results = True - limit = query.count() - - if limit is None or limit > self._MAXIMUM_RESULTS: - check_more_results = True - limit = self._MAXIMUM_RESULTS - - if query.has_offset(): - limit += query.offset() - - # We can not scan with 0 or less, hence we set it to one. - if limit <= 0: - limit = 1 - - return limit, check_more_results - - @staticmethod - def __decode_index_str(value, prop_value): - """ Takes an encoded string and converts it to a PropertyValue. - - Args: - value: An encoded str. - prop_value: PropertyValue to fill in. - """ - value = str(value).replace('\x01\x01', '\x00').replace('\x01\x02', '\x01') - decoded_value = sortable_pb_encoder.Decoder( - array.array('B', str(value))) - prop_value.Merge(decoded_value) - - @staticmethod - def validate_app_id(app_id): - """ Verify that this is the stub for app_id. - - Args: - app_id: An application ID. - Raises: - AppScaleBadArg: If the application id is not set. - """ - if not app_id: - raise dbconstants.AppScaleBadArg("Application name must be set") - - @staticmethod - def validate_key(key): - """ Validate this key by checking to see if it has a name or id. - - Args: - key: entity_pb.Reference - Raises: - datastore_errors.BadRequestError: if the key is invalid - TypeError: if key is not of entity_pb.Reference - """ - - if not isinstance(key, entity_pb.Reference): - raise TypeError("Expected type Reference") - - DatastoreDistributed.validate_app_id(key.app()) - - for elem in key.path().element_list(): - if elem.has_id() and elem.has_name(): - raise datastore_errors.BadRequestError( - 'Each key path element should have id or name but not both: {0}' \ - .format(key)) - - def get_table_prefix(self, data): - """ Returns the namespace prefix for a query. - - Args: - data: An Entity, Key or Query PB, or an (app_id, ns) tuple. - Returns: - A valid table prefix. - """ - if isinstance(data, entity_pb.EntityProto): - app_id = clean_app_id(data.key().app()) - namespace = data.key().name_space() - elif isinstance(data, tuple): - app_id = data[0] - namespace = data[1] - else: - app_id = clean_app_id(data.app()) - namespace = data.name_space() - - return self._SEPARATOR.join([app_id, namespace]) - - @staticmethod - def get_ancestor_key_from_ent_key(ent_key): - """ Get the key string for the ancestor portion of a composite key. - - Args: - ent_key: A string of the entire path of an entity. - Returns: - A str of the path of the ancestor. - """ - ancestor = "" - tokens = str(ent_key).split(dbconstants.KIND_SEPARATOR) - # Strip off the empty placeholder and also do not include the last kind. - for token in tokens[:-2]: - ancestor += token + dbconstants.KIND_SEPARATOR - return ancestor - - @staticmethod - def get_composite_index_key(index, entity, position_list=None, - filters=None): - """ Creates a key to the composite index table for a given entity - for a composite cursor. - - Keys are built as such: - app_id/ns/composite_id/ancestor/valuevaluevalue..../entity_key - Components explained: - ns: The namespace of the entity. - composite_id: The composite ID assigned to this index upon creation. - ancestor: The root ancestor path (only if the query this index is for - has an ancestor) - value(s): The string representation of mulitiple properties. - entity_key: The entity key (full path) used as a means of having a unique - identifier. This prevents two entities with the same values from - colliding. - - Args: - index: A datstore_pb.CompositeIndex. - entity: A entity_pb.EntityProto. - position_list: A list of datastore_pb.CompiledCursor_Position items. - Contains values for property items from a cursor. - filters: A list of datastore_pb.Query_Filters, used to attain equality - values not present in position_list. - Returns: - A string representing a key to the composite table. - """ - composite_id = index.id() - definition = index.definition() - app_id = clean_app_id(entity.key().app()) - name_space = entity.key().name_space() - ent_key = encode_index_pb(entity.key().path()) - pre_comp_index_key = "{0}{1}{2}{4}{3}{4}".format(app_id, - DatastoreDistributed._NAMESPACE_SEPARATOR, name_space, composite_id, - DatastoreDistributed._SEPARATOR) - if definition.ancestor() == 1: - ancestor = DatastoreDistributed.get_ancestor_key_from_ent_key(ent_key) - pre_comp_index_key += "{0}{1}".format(ancestor, - DatastoreDistributed._SEPARATOR) - - value_dict = {} - for prop in entity.property_list(): - value_dict[prop.name()] = \ - str(encode_index_pb(prop.value())) - - # Position list and filters are used if we're creating a composite - # key for a cursor. - if position_list: - for indexvalue in position_list[0].indexvalue_list(): - value_dict[indexvalue.property()] = \ - str(encode_index_pb(indexvalue.value())) - if filters: - for filt in filters: - if filt.op() == datastore_pb.Query_Filter.EQUAL: - value_dict[filt.property(0).name()] = \ - str(encode_index_pb(filt.property(0).value())) - - index_value = "" - for prop in definition.property_list(): - name = prop.name() - value = '' - if name in value_dict: - value = value_dict[name] - elif name == "__key__": - value = ent_key - else: - logger.warning("Given entity {0} is missing a property value {1}.".\ - format(entity, prop.name())) - if prop.direction() == entity_pb.Index_Property.DESCENDING: - value = helper_functions.reverse_lex(value) - - index_value += str(value) + DatastoreDistributed._SEPARATOR - - # We append the ent key to have unique keys if entities happen - # to share the same index values (and ancestor). - composite_key = "{0}{1}{2}".format(pre_comp_index_key, index_value, - ent_key) - return composite_key - - @gen.coroutine - def insert_composite_indexes(self, entities, composite_indexes): - """ Creates composite indexes for a set of entities. - - Args: - entities: A list entities. - composite_indexes: A list of datastore_pb.CompositeIndex. - """ - if not composite_indexes: - return - row_keys = [] - row_values = {} - # Create default composite index for all entities. Here we take each - # of the properties in one - for ent in entities: - for index_def in composite_indexes: - # Skip any indexes if the kind does not match. - kind = get_entity_kind(ent.key()) - if index_def.definition().entity_type() != kind: - continue - - # Make sure the entity contains the required entities for the composite - # definition. - prop_name_def_list = [index_prop.name() for index_prop in \ - index_def.definition().property_list()] - all_prop_names_in_ent = [prop.name() for prop in \ - ent.property_list()] - has_values = True - for index_prop_name in prop_name_def_list: - if index_prop_name not in all_prop_names_in_ent: - has_values = False - # Special property name which does not show up in the list but - # is apart of the key of the entity. - if index_prop_name == "__key__": - has_values = True - if not has_values: - continue - - # Get the composite index key. - composite_index_keys = get_composite_index_keys(index_def, ent) - row_keys.extend(composite_index_keys) - - # Get the reference value for the composite table. - entity_key = str(encode_index_pb(ent.key().path())) - prefix = self.get_table_prefix(ent.key()) - reference = "{0}{1}{2}".format(prefix, self._SEPARATOR, entity_key) - for composite_key in composite_index_keys: - row_values[composite_key] = {'reference': reference} - - yield self.datastore_batch.batch_put_entity( - dbconstants.COMPOSITE_TABLE, row_keys, - dbconstants.COMPOSITE_SCHEMA, row_values) - - @gen.coroutine - def delete_composite_index_metadata(self, app_id, index): - """ Deletes a index for the given application identifier. - - Args: - app_id: A string representing the application identifier. - index: A entity_pb.CompositeIndex object. - """ - self.logger.info('Deleting composite index:\n{}'.format(index)) - try: - project_index_manager = self.index_manager.projects[app_id] - except KeyError: - raise BadRequest('project_id: {} not found'.format(app_id)) - - # TODO: Remove actual index entries. - project_index_manager.delete_index_definition(index.id()) - - @gen.coroutine - def create_composite_index(self, app_id, index): - """ Stores a new index for the given application identifier. - - Args: - app_id: A string representing the application identifier. - index: A entity_pb.CompositeIndex object. - Returns: - A unique number representing the composite index ID. - """ - new_index = DatastoreIndex.from_pb(index) - - # The ID must be a positive number that fits in a signed 64-bit int. - new_index.id = uuid.uuid1().int >> 65 - merge_indexes(self.zookeeper.handle, app_id, [new_index]) - raise gen.Return(new_index.id) - - @gen.coroutine - def update_composite_index(self, app_id, index): - """ Updates an index for a given app ID. - - Args: - app_id: A string containing the app ID. - index: An entity_pb.CompositeIndex object. - """ - self.logger.info('Updating index: {}'.format(index)) - entries_updated = 0 - entity_type = index.definition().entity_type() - - # TODO: Adjust prefix based on ancestor. - prefix = '{app}{delimiter}{entity_type}{kind_separator}'.format( - app=app_id, - delimiter=self._SEPARATOR * 2, - entity_type=entity_type, - kind_separator=dbconstants.KIND_SEPARATOR, - ) - start_row = prefix - end_row = prefix + self._TERM_STRING - start_inclusive = True - - while True: - # Fetch references from the kind table since entity keys can have a - # parent prefix. - references = yield self.datastore_batch.range_query( - table_name=dbconstants.APP_KIND_TABLE, - column_names=dbconstants.APP_KIND_SCHEMA, - start_key=start_row, - end_key=end_row, - limit=self.BATCH_SIZE, - offset=0, - start_inclusive=start_inclusive, - ) - - pb_entities = yield self.__fetch_entities(references) - entities = [entity_pb.EntityProto(entity) for entity in pb_entities] - - yield self.insert_composite_indexes(entities, [index]) - entries_updated += len(entities) - - # If we fetched fewer references than we asked for, we're done. - if len(references) < self.BATCH_SIZE: - break - - start_row = references[-1].keys()[0] - start_inclusive = self._DISABLE_INCLUSIVITY - - self.logger.info('Updated {} index entries.'.format(entries_updated)) - - @gen.coroutine - def allocate_size(self, project, namespace, path_prefix, size): - """ Allocates a block of IDs for a project. - - Args: - project: A string specifying the project ID. - namespace: A string specifying a namespace. - path_prefix: A tuple specifying the model key's path (omitting the final - ID). - size: An integer specifying the number of IDs to reserve. - Returns: - A tuple of integers specifying the start and end ID. - """ - # The Cassandra backend does not implement path-specific allocators. - del namespace, path_prefix - - if project not in self.sequential_allocators: - self.sequential_allocators[project] = EntityIDAllocator( - self.datastore_batch.session, project) - - allocator = self.sequential_allocators[project] - start_id, end_id = yield allocator.allocate_size(size) - raise gen.Return((start_id, end_id)) - - @gen.coroutine - def allocate_max(self, project, namespace, path_prefix, max_id): - """ Reserves all IDs up to the one given. - - Args: - project: A string specifying the project ID. - namespace: A string specifying the namespace. - path_prefix: A tuple specifying the model key's path (omitting the final - ID). - max_id: An integer specifying the maximum ID to allocated. - Returns: - A tuple of integers specifying the start and end ID. - """ - # The Cassandra backend does not implement path-specific allocators. - del namespace, path_prefix - - if project not in self.sequential_allocators: - self.sequential_allocators[project] = EntityIDAllocator( - self.datastore_batch.session, project) - - allocator = self.sequential_allocators[project] - start_id, end_id = yield allocator.allocate_max(max_id) - raise gen.Return((start_id, end_id)) - - @gen.coroutine - def reserve_ids(self, project_id, ids): - """ Ensures the given IDs are not re-allocated. - - Args: - project_id: A string specifying the project ID. - ids: An iterable of integers specifying entity IDs. - """ - if project_id not in self.sequential_allocators: - self.sequential_allocators[project_id] = EntityIDAllocator( - self.datastore_batch.session, project_id) - - if project_id not in self.scattered_allocators: - self.scattered_allocators[project_id] = ScatteredAllocator( - self.datastore_batch.session, project_id) - - for id_ in ids: - counter, space = IdToCounter(id_) - if space == SEQUENTIAL: - allocator = self.sequential_allocators[project_id] - else: - allocator = self.scattered_allocators[project_id] - - yield allocator.set_min_counter(counter) - - @gen.coroutine - def put_entities(self, app, entities): - """ Updates indexes of existing entities, inserts new entities and - indexes for them. - - Args: - app: A string containing the application ID. - entities: List of entities. - """ - self.logger.debug('Inserting {} entities'.format(len(entities))) - - composite_indexes = yield self.get_indexes(app) - - by_group = {} - for entity in entities: - group_key = group_for_key(entity.key()).Encode() - if group_key not in by_group: - by_group[group_key] = [] - by_group[group_key].append(entity) - - for encoded_group_key, entity_list in by_group.iteritems(): - group_key = entity_pb.Reference(encoded_group_key) - - txid = self.transaction_manager.create_transaction_id(app, xg=False) - self.transaction_manager.set_groups(app, txid, [group_key]) - - # Allow the lock to stick around if there is an issue applying the batch. - lock = entity_lock.EntityLock(self.zookeeper.handle, [group_key], txid) - try: - yield lock.acquire() - except entity_lock.LockTimeout: - raise Timeout('Unable to acquire entity group lock') - - try: - entity_keys = [ - get_entity_key(self.get_table_prefix(entity), entity.key().path()) - for entity in entity_list] - try: - current_values = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, entity_keys, APP_ENTITY_SCHEMA) - except dbconstants.AppScaleDBConnectionError: - lock.release() - self.transaction_manager.delete_transaction_id(app, txid) - raise - - batch = [] - entity_changes = [] - for entity in entity_list: - prefix = self.get_table_prefix(entity) - entity_key = get_entity_key(prefix, entity.key().path()) - - current_value = None - if current_values[entity_key]: - current_value = entity_pb.EntityProto( - current_values[entity_key][APP_ENTITY_SCHEMA[0]]) - - batch.extend(mutations_for_entity(entity, txid, current_value, - composite_indexes)) - - batch.append({'table': 'group_updates', - 'key': bytearray(encoded_group_key), - 'last_update': txid}) - - entity_changes.append( - {'key': entity.key(), 'old': current_value, 'new': entity}) - - if batch_size(batch) > LARGE_BATCH_THRESHOLD: - try: - yield self.datastore_batch.large_batch(app, batch, entity_changes, - txid) - except BatchNotApplied as error: - # If the "applied" switch has not been flipped, the lock can be - # released. The transaction ID is kept so that the groomer can - # clean up the batch tables. - lock.release() - raise dbconstants.AppScaleDBConnectionError(str(error)) - else: - try: - yield self.datastore_batch.normal_batch(batch, txid) - except dbconstants.AppScaleDBConnectionError: - # Since normal batches are guaranteed to be atomic, the lock can - # be released. - lock.release() - self.transaction_manager.delete_transaction_id(app, txid) - raise - - lock.release() - - finally: - # In case of failure entity group lock should stay acquired - # as transaction groomer will handle it later. - # But tornado lock must be released. - lock.ensure_release_tornado_lock() - - self.transaction_manager.delete_transaction_id(app, txid) - - @gen.coroutine - def delete_entities(self, group, txid, keys, composite_indexes=()): - """ Deletes the entities and the indexes associated with them. - - Args: - group: An entity group Reference object. - txid: An integer specifying a transaction ID. - keys: An interable containing entity Reference objects. - composite_indexes: A list or tuple of CompositeIndex objects. - """ - entity_keys = [] - for key in keys: - prefix = self.get_table_prefix(key) - entity_keys.append(get_entity_key(prefix, key.path())) - - # Must fetch the entities to get the keys of indexes before deleting. - current_values = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, entity_keys, APP_ENTITY_SCHEMA) - - for key in entity_keys: - if not current_values[key]: - continue - - current_value = entity_pb.EntityProto( - current_values[key][APP_ENTITY_SCHEMA[0]]) - batch = deletions_for_entity(current_value, composite_indexes) - - batch.append({'table': 'group_updates', - 'key': bytearray(group.Encode()), - 'last_update': txid}) - - yield self.datastore_batch.normal_batch(batch, txid) - - @gen.coroutine - def dynamic_put(self, app_id, put_request, put_response): - """ Stores and entity and its indexes in the datastore. - - Args: - app_id: Application ID. - put_request: Request with entities to store. - put_response: The response sent back to the app server. - Raises: - ZKTransactionException: If we are unable to acquire/release ZooKeeper locks. - """ - if app_id not in self.scattered_allocators: - self.scattered_allocators[app_id] = ScatteredAllocator( - self.datastore_batch.session, app_id) - allocator = self.scattered_allocators[app_id] - - entities = put_request.entity_list() - - for entity in entities: - self.validate_key(entity.key()) - - for prop in itertools.chain(entity.property_list(), - entity.raw_property_list()): - if prop.value().has_uservalue(): - uid = md5.new(prop.value().uservalue().email().lower()).digest() - uid = '1' + ''.join(['%02d' % ord(x) for x in uid])[:20] - prop.mutable_value().mutable_uservalue().set_obfuscated_gaiaid(uid) - - last_path = entity.key().path().element_list()[-1] - if last_path.id() == 0 and not last_path.has_name(): - allocated_id = yield allocator.next() - last_path.set_id(allocated_id) - group = entity.mutable_entity_group() - root = entity.key().path().element(0) - group.add_element().CopyFrom(root) - - if put_request.has_transaction(): - yield self.datastore_batch.put_entities_tx( - app_id, put_request.transaction().handle(), entities) - else: - yield self.put_entities(app_id, entities) - self.logger.debug('Updated {} entities'.format(len(entities))) - - put_response.key_list().extend([e.key() for e in entities]) - - def get_root_key_from_entity_key(self, entity_key): - """ Extract the root key from an entity key. We - remove any excess children from a string to get to - the root key. - - Args: - entity_key: A string or Key object representing a row key. - Returns: - The root key extracted from the row key. - Raises: - TypeError: If the type is not supported. - """ - if isinstance(entity_key, str): - tokens = entity_key.split(dbconstants.KIND_SEPARATOR) - return tokens[0] + dbconstants.KIND_SEPARATOR - elif isinstance(entity_key, entity_pb.Reference): - app_id = clean_app_id(entity_key.app()) - path = entity_key.path() - element_list = path.element_list() - return self.get_root_key(app_id, entity_key.name_space(), element_list) - else: - raise TypeError("Unable to get root key from given type of %s" % \ - entity_key.__class__) - - def get_root_key(self, app_id, ns, ancestor_list): - """ Gets the root key string from an ancestor listing. - - Args: - app_id: The app ID of the listing. - ns: The namespace of the entity. - ancestor_list: The ancestry of a given entity. - Returns: - A string representing the root key of an entity. - """ - prefix = self.get_table_prefix((app_id, ns)) - first_ent = ancestor_list[0] - if first_ent.has_name(): - key_id = first_ent.name() - elif first_ent.has_id(): - # Make sure ids are ordered lexigraphically by making sure they - # are of set size i.e. 2 > 0003 but 0002 < 0003. - key_id = str(first_ent.id()).zfill(ID_KEY_LENGTH) - return "{0}{1}{2}:{3}{4}".format(prefix, self._NAMESPACE_SEPARATOR, - first_ent.type(), key_id, dbconstants.KIND_SEPARATOR) - - def is_instance_wrapper(self, obj, expected_type): - """ A wrapper for isinstance for mocking purposes. - - Return whether an object is an instance of a class or of a subclass thereof. - With a type as second argument, return whether that is the object's type. - - Args: - obj: The object to check. - expected_type: A instance type we are comparing obj's type to. - Returns: - True if obj is of type expected_type, False otherwise. - """ - return isinstance(obj, expected_type) - - def acquire_locks_for_trans(self, entities, txnid): - """ Acquires locks for entities for one particular entity group. - - Args: - entities: A list of entities (entity_pb.EntityProto or entity_pb.Reference) - for which are are getting a lock for. - txnid: The transaction ID handler. - Returns: - A hash mapping root keys to transaction IDs. - Raises: - ZKTransactionException: If lock is not obtainable. - TypeError: If args are of incorrect types. - """ - # Key tuples are the prefix and the root key for which we're getting locks. - root_keys = [] - txn_hash = {} - if not self.is_instance_wrapper(entities, list): - raise TypeError("Expected a list and got {0}".format(entities.__class__)) - for ent in entities: - if self.is_instance_wrapper(ent, entity_pb.Reference): - root_keys.append(self.get_root_key_from_entity_key(ent)) - elif self.is_instance_wrapper(ent, entity_pb.EntityProto): - root_keys.append(self.get_root_key_from_entity_key(ent.key())) - else: - raise TypeError("Excepted either a reference or an EntityProto" - "got {0}".format(ent.__class__)) - - if entities == []: - return {} - - if self.is_instance_wrapper(entities[0], entity_pb.Reference): - app_id = entities[0].app() - else: - app_id = entities[0].key().app() - app_id = clean_app_id(app_id) - # Remove all duplicate root keys. - root_keys = list(set(root_keys)) - try: - for root_key in root_keys: - txn_hash[root_key] = txnid - self.zookeeper.acquire_lock(app_id, txnid, root_key) - except zktransaction.ZKTransactionException as zkte: - self.logger.warning('Concurrent transaction: {}'.format(txnid)) - for root_key in txn_hash: - self.zookeeper.notify_failed_transaction(app_id, txn_hash[root_key]) - raise zkte - - return txn_hash - - def release_locks_for_nontrans(self, app_id, entities, txn_hash): - """ Releases locks for non-transactional puts. - - Args: - entities: List of entities for which we are releasing locks. Can - be either entity_pb.EntityProto or entity_pb.Reference. - txn_hash: A hash mapping root keys to transaction IDs. - Raises: - ZKTransactionException: If we are unable to release locks. - """ - root_keys = [] - for ent in entities: - if isinstance(ent, entity_pb.EntityProto): - ent = ent.key() - root_keys.append(self.get_root_key_from_entity_key(ent)) - - # Remove all duplicate root keys - root_keys = list(set(root_keys)) - for root_key in root_keys: - txnid = txn_hash[root_key] - self.zookeeper.release_lock(app_id, txnid) - - @gen.coroutine - def fetch_keys(self, key_list): - """ Given a list of keys fetch the entities. - - Args: - key_list: A list of keys to fetch. - Returns: - A tuple of entities from the datastore and key list. - """ - row_keys = [] - for key in key_list: - self.validate_app_id(key.app()) - index_key = str(encode_index_pb(key.path())) - prefix = self.get_table_prefix(key) - row_keys.append(self._SEPARATOR.join([prefix, index_key])) - result = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, row_keys, APP_ENTITY_SCHEMA) - raise gen.Return((result, row_keys)) - - @gen.coroutine - def dynamic_get(self, app_id, get_request, get_response): - """ Fetch keys from the datastore. - - Args: - app_id: The application ID. - get_request: Request with list of keys. - get_response: Response to application server. - Raises: - ZKTransactionException: If a lock was unable to get acquired. - """ - keys = get_request.key_list() - if len(keys) < 5: - self.logger.debug('Get:\n{}'.format(get_request)) - else: - self.logger.debug('Get: {} keys'.format(len(keys))) - - if get_request.has_transaction(): - results, row_keys = yield self.fetch_keys(keys) - fetched_groups = {group_for_key(key).Encode() for key in keys} - yield self.datastore_batch.record_reads( - app_id, get_request.transaction().handle(), fetched_groups) - else: - results, row_keys = yield self.fetch_keys(keys) - - result_count = 0 - for r in row_keys: - group = get_response.add_entity() - if r in results and APP_ENTITY_SCHEMA[0] in results[r]: - result_count += 1 - group.mutable_entity().CopyFrom( - entity_pb.EntityProto(results[r][APP_ENTITY_SCHEMA[0]])) - self.logger.debug('Returning {} results'.format(result_count)) - - @gen.coroutine - def dynamic_delete(self, app_id, delete_request): - """ Deletes a set of rows. - - Args: - app_id: The application ID. - delete_request: Request with a list of keys. - """ - keys = delete_request.key_list() - if not keys: - return - - ent_kinds = [] - for key in delete_request.key_list(): - last_path = key.path().element_list()[-1] - if last_path.type() not in ent_kinds: - ent_kinds.append(last_path.type()) - - composite_indexes = yield self.get_indexes(app_id) - filtered_indexes = [index for index in composite_indexes - if index.definition().entity_type() in ent_kinds] - - if delete_request.has_transaction(): - txid = delete_request.transaction().handle() - yield self.datastore_batch.delete_entities_tx(app_id, txid, keys) - else: - by_group = {} - for key in keys: - group_key = group_for_key(key).Encode() - if group_key not in by_group: - by_group[group_key] = [] - by_group[group_key].append(key) - - for encoded_group_key, key_list in by_group.iteritems(): - group_key = entity_pb.Reference(encoded_group_key) - - txid = self.transaction_manager.create_transaction_id(app_id, xg=False) - self.transaction_manager.set_groups(app_id, txid, [group_key]) - - # Allow the lock to stick around if there is an issue applying the batch. - lock = entity_lock.EntityLock(self.zookeeper.handle, [group_key], txid) - try: - yield lock.acquire() - except entity_lock.LockTimeout: - raise Timeout('Unable to acquire entity group lock') - - try: - yield self.delete_entities( - group_key, - txid, - key_list, - composite_indexes=filtered_indexes - ) - lock.release() - finally: - # In case of failure entity group lock should stay acquired - # as transaction groomer will handle it later. - # But tornado lock must be released. - lock.ensure_release_tornado_lock() - - self.logger.debug('Removed {} entities'.format(len(key_list))) - self.transaction_manager.delete_transaction_id(app_id, txid) - - def generate_filter_info(self, filters): - """Transform a list of filters into a more usable form. - - Args: - filters: A list of filter PBs. - Returns: - A dict mapping property names to lists of (op, value) tuples. - """ - filter_info = {} - for filt in filters: - prop = filt.property(0) - value = prop.value() - if prop.name() == '__key__': - value = reference_property_to_reference(value.referencevalue()) - value = value.path() - filter_info.setdefault(prop.name(), []).\ - append((filt.op(), encode_index_pb(value))) - return filter_info - - def generate_order_info(self, orders): - """Transform a list of orders into a more usable form which - is a tuple of properties and ordering directions. - - Args: - orders: A list of order PBs. - Returns: - A list of (property, direction) tuples. - """ - orders = [(order.property(), order.direction()) for order in orders] - if orders and orders[-1] == ('__key__', datastore_pb.Query_Order.ASCENDING): - orders.pop() - return orders - - @gen.coroutine - def __get_start_key(self, prefix, prop_name, order, last_result, query=None): - """ Builds the start key for cursor query. - - Args: - prefix: The start key prefix (app id and namespace). - prop_name: Property name of the filter. - order: Sort order the query requires. - last_result: Last result encoded in cursor. - query: A datastore_pb.Query object. - Raises: - AppScaleDBError if unable to retrieve original entity or original entity - no longer has the requested property. - """ - e = last_result - path = str(encode_index_pb(e.key().path())) - last_result_key = self._SEPARATOR.join([prefix, path]) - if not prop_name and not order: - raise gen.Return(last_result_key) - if e.property_list(): - plist = e.property_list() - else: - # Fetch the entity from the datastore in order to get the property - # values. - ret = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, [last_result_key], APP_ENTITY_SCHEMA) - - if APP_ENTITY_SCHEMA[0] not in ret[last_result_key]: - message = '{} not found in {}'.format( - last_result_key, dbconstants.APP_ENTITY_TABLE) - raise dbconstants.AppScaleDBError(message) - - ent = entity_pb.EntityProto(ret[last_result_key][APP_ENTITY_SCHEMA[0]]) - plist = ent.property_list() - - val = None - - # Use the value from the query if possible. This reduces ambiguity when - # the entity provided by the cursor has multiple values for the property. - if (query is not None and query.filter_size() == 1 and - query.filter(0).op() == datastore_pb.Query_Filter.EQUAL): - query_prop = query.filter(0).property(0) - val = str(encode_index_pb(query_prop.value())) - - if val is None: - for p in plist: - if p.name() == prop_name: - val = str(encode_index_pb(p.value())) - break - - if val is None: - raise dbconstants.AppScaleDBError('{} not in entity'.format(prop_name)) - - if order == datastore_pb.Query_Order.DESCENDING: - val = helper_functions.reverse_lex(val) - params = [prefix, get_entity_kind(e), prop_name, val, path] - raise gen.Return(get_index_key_from_params(params)) - - def is_zigzag_merge_join(self, query, filter_info, order_info): - """ Checks to see if the current query can be executed as a zigzag - merge join. - - Args: - query: A datastore_pb.Query. - filter_info: dict of property names mapping to tuples of filter - operators and values. - order_info: tuple with property name and the sort order. - Returns: - True if it qualifies as a zigzag merge join, and false otherwise. - """ - filter_info = self.remove_exists_filters(filter_info) - - order_properties = [] - for order in order_info: - order_properties.append(order[0]) - - property_names = [] - for property_name in filter_info: - filt = filter_info[property_name] - property_names.append(property_name) - # We only handle equality filters for zigzag merge join queries. - if filt[0][0] != datastore_pb.Query_Filter.EQUAL: - return False - - if len(filter_info) < 2: - return False - - for order_property_name in order_properties: - if order_property_name not in property_names: - return False - - return True - - @gen.coroutine - def __fetch_entities_from_row_list(self, rowkeys): - """ Given a list of keys fetch the entities from the entity table. - - Args: - rowkeys: A list of strings which are keys to the entitiy table. - Returns: - A list of entities. - """ - result = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, rowkeys, APP_ENTITY_SCHEMA) - entities = [] - for key in rowkeys: - if key in result and APP_ENTITY_SCHEMA[0] in result[key]: - entities.append(result[key][APP_ENTITY_SCHEMA[0]]) - raise gen.Return(entities) - - def __extract_rowkeys_from_refs(self, refs): - """ Extract the rowkeys to fetch from a list of references. - - Args: - refs: key/value pairs where the values contain a reference to the - entitiy table. - Returns: - A list of rowkeys. - """ - if len(refs) == 0: - return [] - keys = [item.keys()[0] for item in refs] - rowkeys = [] - for index, ent in enumerate(refs): - key = keys[index] - ent = ent[key]['reference'] - # Make sure not to fetch the same entity more than once. - if ent not in rowkeys: - rowkeys.append(ent) - return rowkeys - - @gen.coroutine - def __fetch_entities(self, refs): - """ Given a list of references, get the entities. - - Args: - refs: key/value pairs where the values contain a reference to - the entitiy table. - Returns: - A list of validated entities. - """ - rowkeys = self.__extract_rowkeys_from_refs(refs) - result = yield self.__fetch_entities_from_row_list(rowkeys) - raise gen.Return(result) - - @gen.coroutine - def __fetch_entities_dict(self, refs): - """ Given a list of references, return the entities as a dictionary. - - Args: - refs: key/value pairs where the values contain a reference to - the entitiy table. - Returns: - A dictionary of validated entities. - """ - rowkeys = self.__extract_rowkeys_from_refs(refs) - result = yield self.__fetch_entities_dict_from_row_list(rowkeys) - raise gen.Return(result) - - @gen.coroutine - def __fetch_entities_dict_from_row_list(self, rowkeys): - """ Given a list of rowkeys, return the entities as a dictionary. - - Args: - rowkeys: A list of strings which are keys to the entitiy table. - Returns: - A dictionary of validated entities. - """ - results = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, rowkeys, APP_ENTITY_SCHEMA) - - clean_results = {} - for key in rowkeys: - if key in results and APP_ENTITY_SCHEMA[0] in results[key]: - clean_results[key] = results[key][APP_ENTITY_SCHEMA[0]] - - raise gen.Return(clean_results) - - @gen.coroutine - def __fetch_and_validate_entity_set(self, index_dict, limit, app_id, - direction): - """ Fetch all the valid entities as needed from references. - - Args: - index_dict: A dictionary containing a list of index entries for each - reference. - limit: An integer specifying the max number of entities needed. - app_id: A string, the application identifier. - direction: The direction of the index. - Returns: - A list of valid entities. - """ - references = index_dict.keys() - # Prevent duplicate entities across queries with a cursor. - references.sort() - offset = 0 - results = [] - to_fetch = limit - added_padding = False - while True: - refs_to_fetch = references[offset:offset + to_fetch] - - # If we've exhausted the list of references, we can return. - if len(refs_to_fetch) == 0: - raise gen.Return(results[:limit]) - - entities = yield self.__fetch_entities_dict_from_row_list(refs_to_fetch) - - # Prevent duplicate entities across queries with a cursor. - entity_keys = entities.keys() - entity_keys.sort() - - for reference in entity_keys: - use_result = False - indexes_to_check = index_dict[reference] - for index_info in indexes_to_check: - index = index_info['index'] - prop_name = index_info['prop_name'] - entry = {index: {'reference': reference}} - if self.__valid_index_entry(entry, entities, direction, prop_name): - use_result = True - else: - use_result = False - break - - if use_result: - results.append(entities[reference]) - if len(results) >= limit: - raise gen.Return(results[:limit]) - - offset = offset + to_fetch - - to_fetch -= len(results) - - # Pad the number of references to fetch to increase the likelihood of - # getting all the valid references that we need. - if not added_padding: - to_fetch += dbconstants.MAX_GROUPS_FOR_XG - added_padding = True - - def __extract_entities(self, kv): - """ Given a result from a range query on the Entity table return a - list of encoded entities. - - Args: - kv: Key and values from a range query on the entity table. - Returns: - The extracted entities. - """ - keys = [item.keys()[0] for item in kv] - results = [] - for index, entity in enumerate(kv): - key = keys[index] - entity = entity[key][APP_ENTITY_SCHEMA[0]] - results.append(entity) - - return results - - @gen.coroutine - def ancestor_query(self, query, filter_info): - """ Performs ancestor queries which is where you select - entities based on a particular root entity. - - Args: - query: The query to run. - filter_info: Tuple with filter operators and values. - Returns: - A tuple containing a list of entities and a boolean indicating if there - are more results for the query. - Raises: - ZKTransactionException: If a lock could not be acquired. - """ - ancestor = query.ancestor() - prefix = self.get_table_prefix(query) - path = buffer(prefix + self._SEPARATOR) + encode_index_pb(ancestor.path()) - txn_id = 0 - if query.has_transaction(): - txn_id = query.transaction().handle() - - startrow = path - endrow = path + self._TERM_STRING - - end_inclusive = self._ENABLE_INCLUSIVITY - start_inclusive = self._ENABLE_INCLUSIVITY - - if '__key__' in filter_info: - op = filter_info['__key__'][0][0] - __key__ = str(filter_info['__key__'][0][1]) - if op and op == datastore_pb.Query_Filter.EQUAL: - startrow = prefix + self._SEPARATOR + __key__ - endrow = prefix + self._SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.GREATER_THAN: - start_inclusive = self._DISABLE_INCLUSIVITY - startrow = prefix + self._SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - startrow = prefix + self._SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.LESS_THAN: - endrow = prefix + self._SEPARATOR + __key__ - end_inclusive = self._DISABLE_INCLUSIVITY - elif op and op == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - endrow = prefix + self._SEPARATOR + __key__ - - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - last_result = cursor._GetLastResult() - startrow = yield self.__get_start_key(prefix, None, None, last_result) - start_inclusive = self._DISABLE_INCLUSIVITY - if query.compiled_cursor().position_list()[0].start_inclusive() == 1: - start_inclusive = self._ENABLE_INCLUSIVITY - - more_results = False - - if startrow > endrow: - raise gen.Return(([], more_results)) - - request_limit, check_more_results = self.get_limit(query) - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - entities = [] - while True: - results = yield self.datastore_batch.range_query( - dbconstants.APP_ENTITY_TABLE, - APP_ENTITY_SCHEMA, - startrow, - endrow, - fetch_count, - start_inclusive=start_inclusive, - end_inclusive=end_inclusive) - - if query.has_kind(): - entities.extend([ - result.values()[0]['entity'] for result in results - if kind_from_encoded_key(result.keys()[0]) == query.kind()]) - else: - entities.extend([result.values()[0]['entity'] for result in results]) - - if len(results) < fetch_count: - break - - if len(entities) >= fetch_count: - break - - # TODO: This can be made more efficient by skipping ahead to the next - # possible match. - startrow = results[-1].keys()[0] - start_inclusive = False - - if query.has_transaction(): - yield self.datastore_batch.record_reads( - query.app(), query.transaction().handle(), [group_for_key(ancestor)]) - - if check_more_results and len(entities) > request_limit: - more_results = True - - raise gen.Return((entities[:request_limit], more_results)) - - @gen.coroutine - def fetch_from_entity_table(self, - startrow, - endrow, - limit, - offset, - start_inclusive, - end_inclusive, - query, - txn_id): - """ - Fetches entities from the entity table given a query and a set of parameters. - It will validate the results and remove tombstoned items. - - Args: - startrow: The key from which we start a range query. - endrow: The end key that terminates a range query. - limit: The maximum number of items to return from a query. - offset: The number of entities we want removed from the front of the result. - start_inclusive: Boolean if we should include the start key in the result. - end_inclusive: Boolean if we should include the end key in the result. - query: The query we are currently running. - txn_id: The current transaction ID if there is one, it is 0 if there is not. - Returns: - A validated database result. - """ - final_result = [] - while 1: - result = yield self.datastore_batch.range_query( - dbconstants.APP_ENTITY_TABLE, - APP_ENTITY_SCHEMA, - startrow, - endrow, - limit, - offset=0, - start_inclusive=start_inclusive, - end_inclusive=end_inclusive) - - prev_len = len(result) - last_result = None - if result: - last_result = result[-1].keys()[0] - else: - break - - final_result += result - - if len(result) != prev_len: - startrow = last_result - start_inclusive = self._DISABLE_INCLUSIVITY - limit = limit - len(result) - continue - else: - break - - raise gen.Return(self.__extract_entities(final_result)) - - @gen.coroutine - def kindless_query(self, query, filter_info): - """ Performs kindless queries where queries are performed - on the entity table and go across kinds. - - Args: - query: The query to run. - filter_info: Tuple with filter operators and values. - Returns: - A tuple containing entities that match the query and a boolean indicating - if there are more results for the query. - """ - prefix = self.get_table_prefix(query) - - filters = [] - if '__key__' in filter_info: - for filter in filter_info['__key__']: - filters.append({'key': str(filter[1]), 'op': filter[0]}) - - order = None - prop_name = None - - startrow = prefix + self._SEPARATOR - endrow = prefix + self._SEPARATOR + self._TERM_STRING - start_inclusive = self._ENABLE_INCLUSIVITY - end_inclusive = self._ENABLE_INCLUSIVITY - for filter in filters: - if filter['op'] == datastore_pb.Query_Filter.EQUAL: - startrow = prefix + self._SEPARATOR + filter['key'] - endrow = startrow - if filter['op'] == datastore_pb.Query_Filter.GREATER_THAN: - startrow = prefix + self._SEPARATOR + filters[0]['key'] - start_inclusive = self._DISABLE_INCLUSIVITY - if filter['op'] == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - startrow = prefix + self._SEPARATOR + filters[0]['key'] - if filter['op'] == datastore_pb.Query_Filter.LESS_THAN: - endrow = prefix + self._SEPARATOR + filter['key'] - end_inclusive = self._DISABLE_INCLUSIVITY - if filter['op'] == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - endrow = prefix + self._SEPARATOR + filter['key'] - - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - last_result = cursor._GetLastResult() - startrow = yield self.__get_start_key( - prefix, prop_name, order, last_result) - start_inclusive = self._DISABLE_INCLUSIVITY - if query.compiled_cursor().position_list()[0].start_inclusive() == 1: - start_inclusive = self._ENABLE_INCLUSIVITY - - more_results = False - request_limit, check_more_results = self.get_limit(query) - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - result = yield self.fetch_from_entity_table( - startrow, endrow, fetch_count, offset=0, start_inclusive=start_inclusive, - end_inclusive=end_inclusive, query=query, txn_id=0) - - if check_more_results and len(result) > request_limit: - more_results = True - - raise gen.Return((result[:request_limit], more_results)) - - def reverse_path(self, key): - """ Use this function for reversing the key ancestry order. - Needed for kind queries. - - Args: - key: A string key which needs reversing. - Returns: - A string key which can be used on the kind table. - """ - tokens = key.split(dbconstants.KIND_SEPARATOR) - tokens.reverse() - key = dbconstants.KIND_SEPARATOR.join(tokens)[1:] + \ - dbconstants.KIND_SEPARATOR - return key - - def kind_query_range(self, query, filter_info, order_info): - """ Gets start and end keys for kind queries, along with - inclusivity of those keys. - - Args: - query: The query to run. - filter_info: __key__ filter. - order_info: ordering for __key__. - Returns: - A tuple of the start row, end row, if its start inclusive, - and if its end inclusive - """ - ancestor_filter = "" - if query.has_ancestor(): - ancestor = query.ancestor() - ancestor_filter = encode_index_pb(ancestor.path()) - end_inclusive = self._ENABLE_INCLUSIVITY - start_inclusive = self._ENABLE_INCLUSIVITY - prefix = self.get_table_prefix(query) - startrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + \ - str(ancestor_filter) - endrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + \ - str(ancestor_filter) + \ - self._TERM_STRING - if '__key__' not in filter_info: - return startrow, endrow, start_inclusive, end_inclusive - - for key_filter in filter_info['__key__']: - op = key_filter[0] - __key__ = str(key_filter[1]) - if op and op == datastore_pb.Query_Filter.EQUAL: - startrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - endrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.GREATER_THAN: - start_inclusive = self._DISABLE_INCLUSIVITY - startrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - startrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - elif op and op == datastore_pb.Query_Filter.LESS_THAN: - endrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - end_inclusive = self._DISABLE_INCLUSIVITY - elif op and op == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - endrow = prefix + self._SEPARATOR + query.kind() + \ - dbconstants.KIND_SEPARATOR + __key__ - return startrow, endrow, start_inclusive, end_inclusive - - def default_namespace(self): - """ Returns the default namespace entry because the groomer does not - generate it for each application. - - Returns: - A entity proto of the default metadata.Namespace. - """ - default_namespace = Namespace(id=1) - protobuf = db.model_to_protobuf(default_namespace) - last_path = protobuf.key().path().element_list()[-1] - last_path.set_id(1) - return protobuf.Encode() - - @gen.coroutine - def __kind_query(self, query, filter_info, order_info): - """ Performs kind only queries, kind and ancestor, and ancestor queries - https://developers.google.com/appengine/docs/python/datastore/queries. - - Args: - query: The query to run. - filter_info: tuple with filter operators and values. - order_info: tuple with property name and the sort order. - Returns: - A tuple containing an ordered list of entities matching the query and a - boolean indicating if there are more results for the query. - Raises: - AppScaleDBError: An infinite loop is detected when fetching references. - """ - self.logger.debug('Kind Query:\n{}'.format(query)) - - more_results = False - - filter_info = self.remove_exists_filters(filter_info) - # Detect quickly if this is a kind query or not. - for fi in filter_info: - if fi != "__key__": - raise gen.Return((None, more_results)) - - if query.has_ancestor(): - if len(order_info) > 0: - # Ordered ancestor queries require an index. - raise gen.Return((None, more_results)) - - result, more_results = yield self.ancestor_query(query, filter_info) - raise gen.Return((result, more_results)) - - if not query.has_kind(): - result, more_results = yield self.kindless_query(query, filter_info) - raise gen.Return((result, more_results)) - - if query.kind().startswith("__") and query.kind().endswith("__"): - # Use the default namespace for metadata queries. - query.set_name_space("") - - startrow, endrow, start_inclusive, end_inclusive = \ - self.kind_query_range(query, filter_info, order_info) - if startrow is None or endrow is None: - raise gen.Return((None, more_results)) - - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - last_result = cursor._GetLastResult() - prefix = self.get_table_prefix(query) - startrow = get_kind_key(prefix, last_result.key().path()) - start_inclusive = self._DISABLE_INCLUSIVITY - if query.compiled_cursor().position_list()[0].start_inclusive() == 1: - start_inclusive = self._ENABLE_INCLUSIVITY - - request_limit, check_more_results = self.get_limit(query) - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - if startrow > endrow: - raise gen.Return(([], more_results)) - - # Since the validity of each reference is not checked until after the - # range query has been performed, we may need to fetch additional - # references in order to satisfy the query. - entities = [] - current_limit = fetch_count - while True: - references = yield self.datastore_batch.range_query( - dbconstants.APP_KIND_TABLE, - dbconstants.APP_KIND_SCHEMA, - startrow, - endrow, - current_limit, - offset=0, - start_inclusive=start_inclusive, - end_inclusive=end_inclusive - ) - - new_entities = yield self.__fetch_entities(references) - entities.extend(new_entities) - - # If we have enough valid entities to satisfy the query, we're done. - if len(entities) >= fetch_count: - break - - # If we received fewer references than we asked for, they are exhausted. - if len(references) < current_limit: - break - - # If all of the references that we fetched were valid, we're done. - if len(new_entities) == len(references): - break - - invalid_refs = len(references) - len(new_entities) - - # Pad the limit to increase the likelihood of fetching all the valid - # references that we need. - current_limit = invalid_refs + dbconstants.MAX_GROUPS_FOR_XG - - self.logger.debug('{} references invalid. Fetching {} more references.' - .format(invalid_refs, current_limit)) - - # Start from the last reference fetched. - last_startrow = startrow - startrow = references[-1].keys()[0] - start_inclusive = self._DISABLE_INCLUSIVITY - - if startrow == last_startrow: - raise dbconstants.AppScaleDBError( - 'An infinite loop was detected while fetching references.') - - if query.kind() == "__namespace__": - entities = [self.default_namespace()] + entities - - if check_more_results and len(entities) > request_limit: - more_results = True - - results = entities[:request_limit] - - # Handle projection queries. - if query.property_name_size() > 0: - results = self.remove_extra_props(query, results) - - self.logger.debug('Returning {} entities'.format(len(results))) - raise gen.Return((results, more_results)) - - def remove_exists_filters(self, filter_info): - """ Remove any filters that have EXISTS filters. - - Args: - filter_info: dict of property names mapping to tuples of filter - operators and values. - Returns: - A filter info dictionary without any EXIST filters. - """ - filtered = {} - for key in filter_info.keys(): - if filter_info[key][0][0] == datastore_pb.Query_Filter.EXISTS: - continue - else: - filtered[key] = filter_info[key] - return filtered - - def remove_extra_equality_filters(self, potential_filter_ops): - """ Keep only the first equality filter for a given property. - - Args: - potential_filter_ops: A list of tuples in the form (operation, value). - Returns: - A filter_ops list with only one equality filter. - """ - filter_ops = [] - saw_equality_filter = False - for operation, value in potential_filter_ops: - if operation == datastore_pb.Query_Filter.EQUAL and saw_equality_filter: - continue - - if operation == datastore_pb.Query_Filter.EQUAL: - saw_equality_filter = True - - filter_ops.append((operation, value)) - - return filter_ops - - @gen.coroutine - def __single_property_query(self, query, filter_info, order_info): - """Performs queries satisfiable by the Single_Property tables. - - Args: - query: The query to run. - filter_info: tuple with filter operators and values. - order_info: tuple with property name and the sort order. - Returns: - A tuple containing a list of entities retrieved from the given query and - a boolean indicating if there are more results for the query. - """ - self.logger.debug('Single Property Query:\n{}'.format(query)) - if query.kind().startswith("__") and \ - query.kind().endswith("__"): - # Use the default namespace for metadata queries. - query.set_name_space("") - - more_results = False - - filter_info = self.remove_exists_filters(filter_info) - ancestor = None - property_names = set(filter_info.keys()) - property_names.update(x[0] for x in order_info) - property_names.discard('__key__') - if len(property_names) != 1: - raise gen.Return((None, more_results)) - - property_name = property_names.pop() - potential_filter_ops = filter_info.get(property_name, []) - - # We will apply the other equality filters after fetching the entities. - filter_ops = self.remove_extra_equality_filters(potential_filter_ops) - - multiple_equality_filters = self.__get_multiple_equality_filters( - query.filter_list()) - - if len(order_info) > 1 or (order_info and order_info[0][0] == '__key__'): - raise gen.Return((None, more_results)) - - # If there is an ancestor in the query, any filtering must be within a - # single value when using a single-prop index. - spans_multiple_values = order_info or ( - filter_ops and filter_ops[0][0] != datastore_pb.Query_Filter.EQUAL) - if query.has_ancestor() and spans_multiple_values: - raise gen.Return((None, more_results)) - - if query.has_ancestor(): - ancestor = query.ancestor() - - if not query.has_kind(): - raise gen.Return((None, more_results)) - - if order_info and order_info[0][0] == property_name: - direction = order_info[0][1] - else: - direction = datastore_pb.Query_Order.ASCENDING - - prefix = self.get_table_prefix(query) - - request_limit, check_more_results = self.get_limit(query) - - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - last_result = cursor._GetLastResult() - startrow = yield self.__get_start_key( - prefix, property_name, direction, last_result, query=query) - else: - startrow = None - - end_compiled_cursor = None - if query.has_end_compiled_cursor(): - end_compiled_cursor = query.end_compiled_cursor() - - # Since the validity of each reference is not checked until after the - # range query has been performed, we may need to fetch additional - # references in order to satisfy the query. - entities = [] - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - current_limit = fetch_count - while True: - references = yield self.__apply_filters( - filter_ops, order_info, property_name, query.kind(), prefix, - current_limit, startrow, ancestor=ancestor, query=query, - end_compiled_cursor=end_compiled_cursor) - - potential_entities = yield self.__fetch_entities_dict(references) - - # Since the entities may be out of order due to invalid references, - # we construct a new list in order of valid references. - new_entities = [] - for reference in references: - if self.__valid_index_entry(reference, potential_entities, direction, - property_name): - entity_key = reference[reference.keys()[0]]['reference'] - valid_entity = potential_entities[entity_key] - new_entities.append(valid_entity) - - if len(multiple_equality_filters) > 0: - self.logger.debug('Detected multiple equality filters on a repeated' - 'property. Removing results that do not match query.') - new_entities = self.__apply_multiple_equality_filters( - new_entities, multiple_equality_filters) - - entities.extend(new_entities) - - # If we have enough valid entities to satisfy the query, we're done. - if len(entities) >= fetch_count: - break - - # If we received fewer references than we asked for, they are exhausted. - if len(references) < current_limit: - break - - # If all of the references that we fetched were valid, we're done. - if len(new_entities) == len(references): - break - - invalid_refs = len(references) - len(new_entities) - - # Pad the limit to increase the likelihood of fetching all the valid - # references that we need. - current_limit = invalid_refs + dbconstants.MAX_GROUPS_FOR_XG - - self.logger.debug('{} references invalid. Fetching {} more references.' - .format(invalid_refs, current_limit)) - - last_startrow = startrow - # Start from the last reference fetched. - startrow = references[-1].keys()[0] - - if startrow == last_startrow: - raise dbconstants.AppScaleDBError( - 'An infinite loop was detected while fetching references.') - - if check_more_results and len(entities) > request_limit: - more_results = True - - results = entities[:request_limit] - - # Handle projection queries. - # TODO: When the index has been confirmed clean, use those values directly. - if query.property_name_size() > 0: - results = self.remove_extra_props(query, results) - - self.logger.debug('Returning {} results'.format(len(results))) - raise gen.Return((results, more_results)) - - @gen.coroutine - def __apply_filters(self, - filter_ops, - order_info, - property_name, - kind, - prefix, - limit, - startrow, - force_start_key_exclusive=False, - ancestor=None, - query=None, - end_compiled_cursor=None): - """ Applies property filters in the query. - - Args: - filter_ops: Tuple with property filter operator and value. - order_info: Tuple with property name and sort order. - kind: Kind of the entity. - prefix: Prefix for the table. - limit: Number of results. - startrow: Start key for the range scan. - force_start_key_exclusive: Do not include the start key. - ancestor: Optional query ancestor. - query: Query object for debugging. - end_compiled_cursor: A compiled cursor to resume a query. - Results: - Returns a list of entity keys. - Raises: - NotImplementedError: For unsupported queries. - AppScaleMisconfiguredQuery: Bad filters or orderings. - """ - ancestor_filter = None - if ancestor: - ancestor_filter = str(encode_index_pb(ancestor.path())) - - end_inclusive = True - start_inclusive = True - - endrow = None - column_names = dbconstants.PROPERTY_SCHEMA - - if order_info and order_info[0][0] == property_name: - direction = order_info[0][1] - else: - direction = datastore_pb.Query_Order.ASCENDING - - if direction == datastore_pb.Query_Order.ASCENDING: - table_name = dbconstants.ASC_PROPERTY_TABLE - else: - table_name = dbconstants.DSC_PROPERTY_TABLE - - if startrow: - start_inclusive = False - - if end_compiled_cursor: - list_cursor = appscale_stub_util.ListCursor(query) - last_result, _ = list_cursor._DecodeCompiledCursor(end_compiled_cursor) - endrow = yield self.__get_start_key( - prefix, property_name, direction, last_result) - - # This query is returning based on order on a specfic property name - # The start key (if not already supplied) depends on the property - # name and does not take into consideration its value. The end key - # is based on the terminating string. - if len(filter_ops) == 0 and (order_info and len(order_info) == 1): - if not startrow: - params = [prefix, kind, property_name, None] - startrow = get_index_key_from_params(params) - if not endrow: - params = [prefix, kind, property_name, self._TERM_STRING, None] - endrow = get_index_key_from_params(params) - if force_start_key_exclusive: - start_inclusive = False - result = yield self.datastore_batch.range_query( - table_name, column_names, startrow, endrow, limit, - offset=0, start_inclusive=start_inclusive, end_inclusive=end_inclusive) - raise gen.Return(result) - - # This query has a value it bases the query on for a property name - # The difference between operators is what the end and start key are. - if len(filter_ops) == 1: - key_comparison = False - oper = filter_ops[0][0] - value = str(filter_ops[0][1]) - - if direction == datastore_pb.Query_Order.DESCENDING: - value = helper_functions.reverse_lex(value) - if oper == datastore_pb.Query_Filter.EQUAL: - if ancestor: # Keep range within ancestor key. - start_value = ''.join([value, self._SEPARATOR, ancestor_filter]) - else: # Keep range within property value. - start_value = ''.join([value, self._SEPARATOR]) - end_value = ''.join([start_value, self._TERM_STRING]) - - # Single prop indexes can handle key inequality within a given value - # (eg. color='blue', __key__ endrow: - self.logger.error('Start row {} > end row {}'. - format([startrow], [endrow])) - raise gen.Return([]) - - ret = yield self.datastore_batch.range_query( - table_name, column_names, startrow, endrow, limit, - offset=0, start_inclusive=start_inclusive, end_inclusive=end_inclusive) - raise gen.Return(ret) - - # Here we have two filters and so we set the start and end key to - # get the given value within those ranges. - if len(filter_ops) > 1: - if filter_ops[0][0] == datastore_pb.Query_Filter.EQUAL or \ - filter_ops[1][0] == datastore_pb.Query_Filter.EQUAL: - # If one of the filters is EQUAL, set start and end key - # to the same value. - if filter_ops[0][0] == datastore_pb.Query_Filter.EQUAL: - value1 = filter_ops[0][1] - value2 = filter_ops[1][1] - oper2 = filter_ops[1][0] - else: - value1 = filter_ops[1][1] - value2 = filter_ops[0][1] - oper2 = filter_ops[0][0] - # Checking to see if filters/values are correct bounds. - # value1 and oper1 are the EQUALS filter values. - if oper2 == datastore_pb.Query_Filter.LESS_THAN: - if value2 > value1 == False: - raise gen.Return([]) - elif oper2 == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - if value2 >= value1 == False: - raise gen.Return([]) - elif oper2 == datastore_pb.Query_Filter.GREATER_THAN: - if value2 < value1 == False: - raise gen.Return([]) - elif oper2 == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - if value2 <= value1 == False: - raise gen.Return([]) - start_inclusive = self._ENABLE_INCLUSIVITY - end_inclusive = self._DISABLE_INCLUSIVITY - params = [prefix, kind, property_name, value1 + self._SEPARATOR] - if not startrow: - startrow = get_index_key_from_params(params) - else: - start_inclusive = self._DISABLE_INCLUSIVITY - if not endrow: - params = [prefix, kind, property_name, value1 + \ - self._SEPARATOR + self._TERM_STRING] - endrow = get_index_key_from_params(params) - - ret = yield self.datastore_batch.range_query( - table_name, column_names, startrow, endrow, limit, - offset=0, start_inclusive=start_inclusive, - end_inclusive=end_inclusive) - raise gen.Return(ret) - if filter_ops[0][0] == datastore_pb.Query_Filter.GREATER_THAN or \ - filter_ops[0][0] == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - oper1 = filter_ops[0][0] - oper2 = filter_ops[1][0] - value1 = str(filter_ops[0][1]) - value2 = str(filter_ops[1][1]) - else: - oper1 = filter_ops[1][0] - oper2 = filter_ops[0][0] - value1 = str(filter_ops[1][1]) - value2 = str(filter_ops[0][1]) - - if direction == datastore_pb.Query_Order.ASCENDING: - table_name = dbconstants.ASC_PROPERTY_TABLE - # The first operator will always be either > or >=. - if startrow: - start_inclusive = self._DISABLE_INCLUSIVITY - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN: - params = [prefix, kind, property_name, value1 + self._SEPARATOR + \ - self._TERM_STRING] - startrow = get_index_key_from_params(params) - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - params = [prefix, kind, property_name, value1 ] - startrow = get_index_key_from_params(params) - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - # The second operator will be either < or <=. - if endrow: - end_inclusive = self._ENABLE_INCLUSIVITY - elif oper2 == datastore_pb.Query_Filter.LESS_THAN: - params = [prefix, kind, property_name, value2] - endrow = get_index_key_from_params(params) - end_inclusive = self._DISABLE_INCLUSIVITY - elif oper2 == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - params = [prefix, kind, property_name, value2 + self._SEPARATOR + \ - self._TERM_STRING] - endrow = get_index_key_from_params(params) - end_inclusive = self._ENABLE_INCLUSIVITY - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - if direction == datastore_pb.Query_Order.DESCENDING: - table_name = dbconstants.DSC_PROPERTY_TABLE - value1 = helper_functions.reverse_lex(value1) - value2 = helper_functions.reverse_lex(value2) - - if endrow: - end_inclusive = self._ENABLE_INCLUSIVITY - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN: - params = [prefix, kind, property_name, value1] - endrow = get_index_key_from_params(params) - end_inclusive = self._DISABLE_INCLUSIVITY - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - params = [prefix, kind, property_name, value1 + self._SEPARATOR + \ - self._TERM_STRING] - endrow = get_index_key_from_params(params) - end_inclusive = self._ENABLE_INCLUSIVITY - - if startrow: - start_inclusive = self._DISABLE_INCLUSIVITY - elif oper2 == datastore_pb.Query_Filter.LESS_THAN: - params = [prefix, kind, property_name, value2 + self._SEPARATOR + \ - self._TERM_STRING] - startrow = get_index_key_from_params(params) - elif oper2 == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - params = [prefix, kind, property_name, value2] - startrow = get_index_key_from_params(params) - - if force_start_key_exclusive: - start_inclusive = False - if startrow > endrow: - raise gen.Return([]) - - result = yield self.datastore_batch.range_query( - table_name, column_names, startrow, endrow, limit, - offset=0, start_inclusive=start_inclusive, end_inclusive=end_inclusive) - raise gen.Return(result) - - raise gen.Return([]) - - @staticmethod - @gen.coroutine - def _common_refs_from_ranges(ranges, limit, path=None): - """ Find common entries across multiple index ranges. - - Args: - ranges: A list of RangeIterator objects. - limit: An integer specifying the maximum number of references to find. - path: An entity_pb.Path object that restricts the query to a single key. - Returns: - A dictionary mapping entity references to index entries. - """ - if path is not None: - limit = 1 - for range_ in ranges: - range_.set_cursor(path, inclusive=True) - - reference_hash = {} - min_common_path = ranges[0].get_cursor() - entries_exhausted = False - while True: - common_keys = [] - entry = None - for range_ in ranges: - try: - entry = yield range_.async_next() - except RangeExhausted: - # If any ranges have been exhausted, there are no more matches. - entries_exhausted = True - break - - # If this entry's path is ahead of the others, consider it the new - # minimum acceptable path and adjust the other ranges. - if entry.encoded_path > str(encode_index_pb(min_common_path)): - min_common_path = entry.path - break - - common_keys.append({'index': entry.key, 'prop_name': range_.prop_name}) - - if entries_exhausted: - break - - # If not all paths are common, adjust earlier ranges. - if len(common_keys) < len(ranges): - for range_ in ranges: - range_.set_cursor(min_common_path, inclusive=True) - - continue - - reference_hash[entry.entity_reference] = common_keys - - # Ensure the chosen reference is excluded. - for range_ in ranges: - range_.set_cursor(min_common_path, inclusive=False) - - # If there are enough references to satisfy the query, stop fetching - # entries. - if len(reference_hash) == limit: - break - - raise gen.Return(reference_hash) - - @gen.coroutine - def zigzag_merge_join(self, query, filter_info, order_info): - """ Performs a composite query for queries which have multiple - equality filters. Uses a varient of the zigzag join merge algorithm. - - This method is used if there are only equality filters present. - If there are inequality filters, orders on properties which are not also - apart of a filter, or ancestors, this method does - not apply. Existing single property indexes are used and it does not - require the user to establish composite indexes ahead of time. - See http://www.youtube.com/watch?v=AgaL6NGpkB8 for Google's - implementation. - - Args: - query: A datastore_pb.Query. - filter_info: dict of property names mapping to tuples of filter - operators and values. - order_info: tuple with property name and the sort order. - Returns: - A tuple containing a list of entities retrieved from the given query and - a boolean indicating whether there are more results for the query. - """ - self.logger.debug('ZigZag Merge Join Query:\n{}'.format(query)) - more_results = False - - if not self.is_zigzag_merge_join(query, filter_info, order_info): - raise gen.Return((None, more_results)) - - kind = query.kind() - request_limit, check_more_results = self.get_limit(query) - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - app_id = clean_app_id(query.app()) - - # We only use references from the ascending property table. - direction = datastore_pb.Query_Order.ASCENDING - - prop_filters = [filter_ for filter_ in query.filter_list() - if filter_.property(0).name() != '__key__'] - ranges = [RangeIterator.from_filter(self.datastore_batch, app_id, - query.name_space(), kind, filter_) - for filter_ in prop_filters] - - # Check if the query is restricted to a single key. - path = None - key_filters = [filter_ for filter_ in query.filter_list() - if filter_.property(0).name() == '__key__'] - if len(key_filters) > 1: - raise BadRequest('Queries can only specify one key') - - if key_filters: - path = entity_pb.Path() - ref_val = key_filters[0].property(0).value().referencevalue() - for element in ref_val.pathelement_list(): - path.add_element().MergeFrom(element) - - if query.has_ancestor(): - for range_ in ranges: - range_.restrict_to_path(query.ancestor().path()) - - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - cursor_path = cursor._GetLastResult().key().path() - for range_ in ranges: - range_.set_cursor(cursor_path, inclusive=False) - - entities = [] - while True: - reference_hash = yield self._common_refs_from_ranges( - ranges, fetch_count, path) - new_entities = yield self.__fetch_and_validate_entity_set( - reference_hash, fetch_count, app_id, direction) - entities.extend(new_entities) - - # If there are enough entities to satisfy the query, stop fetching. - if len(entities) >= fetch_count: - break - - # If there weren't enough common references to fulfill the limit, the - # references are exhausted. - if len(reference_hash) < fetch_count: - break - - if check_more_results and len(entities) > request_limit: - more_results = True - - results = entities[:request_limit] - self.logger.debug('Returning {} results'.format(len(results))) - raise gen.Return((results, more_results)) - - def get_range_composite_query(self, query, filter_info, composite_index): - """ Gets the start and end key of a composite query. - - Args: - query: A datastore_pb.Query object. - filter_info: A dictionary mapping property names to tuples of filter - operators and values. - composite_index: An entity_pb.CompositeIndex object. - Returns: - A tuple of strings, the start and end key for the composite table. - """ - index_id = composite_index.id() - definition = composite_index.definition() - app_id = clean_app_id(query.app()) - name_space = '' - if query.has_name_space(): - name_space = query.name_space() - # Calculate the prekey for both the start and end key. - pre_comp_index_key = "{0}{1}{2}{4}{3}{4}".format(app_id, - self._NAMESPACE_SEPARATOR, name_space, index_id, self._SEPARATOR) - - if definition.ancestor() == 1: - ancestor_str = encode_index_pb(query.ancestor().path()) - pre_comp_index_key += "{0}{1}".format(ancestor_str, self._SEPARATOR) - - value = '' - index_value = "" - equality_value = "" - direction = datastore_pb.Query_Order.ASCENDING - for prop in definition.property_list(): - # Choose the least restrictive operation by default. - oper = datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL - - # The last property dictates the direction. - if prop.has_direction(): - direction = prop.direction() - # We loop through the definition list and remove the filters we've seen - # before if they are equality or exists. - all_filter_ops = [ii[0] for ii in filter_info.get(prop.name(), [])] - if not all_filter_ops: - continue - - if datastore_pb.Query_Filter.EQUAL in all_filter_ops: - filters = filter_info.get(prop.name()) - index_used = 0 - for index, filt in enumerate(filters): - if filt[0] == datastore_pb.Query_Filter.EQUAL: - index_used = index - break - - filter_to_use = filters.pop(index_used) - - value = str(filter_to_use[1]) - if prop.direction() == entity_pb.Index_Property.DESCENDING: - value = helper_functions.reverse_lex(value) - equality_value += str(value) + self._SEPARATOR - oper = filter_to_use[0] - index_value += str(value) + self._SEPARATOR - - elif datastore_pb.Query_Filter.EXISTS in all_filter_ops: - # Exists filters do not add to the index value. They are just - # placeholders. - filters = filter_info.get(prop.name()) - index_used = 0 - for index, filt in enumerate(filters): - if filt[0] == datastore_pb.Query_Filter.EXISTS: - index_used = index - break - - filters.pop(index_used) - else: - filters = filter_info.get(prop.name()) - if len(filters) > 1: - return self.composite_multiple_filter_prop( - filter_info[prop.name()], equality_value, pre_comp_index_key, - prop.direction()) - else: - value = str(filters[0][1]) - oper = filters[0][0] - if prop.direction() == entity_pb.Index_Property.DESCENDING: - value = helper_functions.reverse_lex(value) - index_value += str(value) + self._SEPARATOR - - start_value = '' - end_value = '' - if oper == datastore_pb.Query_Filter.LESS_THAN: - start_value = equality_value - end_value = index_value - if direction == datastore_pb.Query_Order.DESCENDING: - start_value = index_value + self._TERM_STRING - end_value = equality_value + self._TERM_STRING - elif oper == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - start_value = equality_value - end_value = index_value + self._TERM_STRING - if direction == datastore_pb.Query_Order.DESCENDING: - start_value = index_value - end_value = equality_value + self._TERM_STRING - elif oper == datastore_pb.Query_Filter.GREATER_THAN: - start_value = index_value + self._TERM_STRING - end_value = equality_value + self._TERM_STRING - if direction == datastore_pb.Query_Order.DESCENDING: - start_value = equality_value + self.MIN_INDEX_VALUE - end_value = index_value + self._SEPARATOR - elif oper == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - start_value = index_value - end_value = equality_value + self._TERM_STRING - if direction == datastore_pb.Query_Order.DESCENDING: - start_value = equality_value - end_value = index_value + self._TERM_STRING - elif oper == datastore_pb.Query_Filter.EQUAL: - if value == "": - start_value = index_value - end_value = index_value + self.MIN_INDEX_VALUE + self._TERM_STRING - else: - start_value = index_value - end_value = index_value + self._TERM_STRING - else: - raise ValueError("Unsuported operator {0} for composite query".\ - format(oper)) - start_key = "{0}{1}".format(pre_comp_index_key, start_value) - end_key = "{0}{1}".format(pre_comp_index_key, end_value) - - return start_key, end_key - - def composite_multiple_filter_prop(self, filter_ops, equality_value, - pre_comp_index_key, direction): - """Returns the start and end keys for a composite query which has multiple - filters for a single property, and potentially multiple equality - filters. - - Args: - filter_ops: dictionary mapping the inequality filter to operators and - values. - equality_value: A string used for the start and end key which is derived - from equality filter values. - pre_comp_index_key: A string, contains pre-values for start and end keys. - direction: datastore_pb.Query_Order telling the direction of the scan. - Returns: - The end and start key for doing a composite query. - """ - oper1 = None - oper2 = None - value1 = None - value2 = None - start_key = "" - end_key = "" - if filter_ops[0][0] == datastore_pb.Query_Filter.GREATER_THAN or \ - filter_ops[0][0] == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - oper1 = filter_ops[0][0] - oper2 = filter_ops[1][0] - value1 = str(filter_ops[0][1]) - value2 = str(filter_ops[1][1]) - else: - oper1 = filter_ops[1][0] - oper2 = filter_ops[0][0] - value1 = str(filter_ops[1][1]) - value2 = str(filter_ops[0][1]) - - if direction == datastore_pb.Query_Order.ASCENDING: - # The first operator will always be either > or >=. - if oper1 == datastore_pb.Query_Filter.GREATER_THAN: - start_value = equality_value + value1 + self._SEPARATOR + \ - self._TERM_STRING - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - start_value = equality_value + value1 - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - # The second operator will be either < or <=. - if oper2 == datastore_pb.Query_Filter.LESS_THAN: - end_value = equality_value + value2 - elif oper2 == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - end_value = equality_value + value2 + self._SEPARATOR + \ - self._TERM_STRING - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - if direction == datastore_pb.Query_Order.DESCENDING: - value1 = helper_functions.reverse_lex(value1) - value2 = helper_functions.reverse_lex(value2) - if oper1 == datastore_pb.Query_Filter.GREATER_THAN: - end_value = equality_value + value1 - elif oper1 == datastore_pb.Query_Filter.GREATER_THAN_OR_EQUAL: - end_value = equality_value + value1 + self._SEPARATOR + \ - self._TERM_STRING - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - if oper2 == datastore_pb.Query_Filter.LESS_THAN: - start_value = equality_value + value2 + self._SEPARATOR + \ - self._TERM_STRING - elif oper2 == datastore_pb.Query_Filter.LESS_THAN_OR_EQUAL: - start_value = equality_value + value2 - else: - raise dbconstants.AppScaleMisconfiguredQuery("Bad filter ordering") - - start_key = "{0}{1}".format(pre_comp_index_key, start_value) - end_key = "{0}{1}".format(pre_comp_index_key, end_value) - - return start_key, end_key - - @gen.coroutine - def composite_v2(self, query, filter_info, composite_index): - """Performs composite queries using a range query against - the composite table. Faster than in-memory filters, but requires - indexes to be built upon each put. - - Args: - query: The query to run. - filter_info: dictionary mapping property names to tuples of - filter operators and values. - composite_index: An entity_pb.CompositeIndex object to use for the query. - Returns: - A tuple containing a list of entities retrieved from the given query and - a boolean indicating if there are more results for the query. - """ - self.logger.debug('Composite Query:\n{}'.format(query)) - - if composite_index.state() != entity_pb.CompositeIndex.READ_WRITE: - raise dbconstants.NeedsIndex( - 'The relevant composite index has not finished building') - - start_inclusive = True - startrow, endrow = self.get_range_composite_query(query, filter_info, - composite_index) - # Override the start_key with a cursor if given. - if query.has_compiled_cursor() and query.compiled_cursor().position_size(): - cursor = appscale_stub_util.ListCursor(query) - last_result = cursor._GetLastResult() - - startrow = self.get_composite_index_key(composite_index, last_result, - position_list=query.compiled_cursor().position_list(), - filters=query.filter_list()) - start_inclusive = False - if query.compiled_cursor().position_list()[0].start_inclusive() == 1: - start_inclusive = True - - if query.has_end_compiled_cursor(): - end_compiled_cursor = query.end_compiled_cursor() - list_cursor = appscale_stub_util.ListCursor(query) - last_result, _ = list_cursor._DecodeCompiledCursor(end_compiled_cursor) - endrow = self.get_composite_index_key(composite_index, last_result, - position_list=end_compiled_cursor.position_list(), - filters=query.filter_list()) - - table_name = dbconstants.COMPOSITE_TABLE - column_names = dbconstants.COMPOSITE_SCHEMA - - more_results = False - request_limit, check_more_results = self.get_limit(query) - fetch_count = request_limit - if check_more_results: - fetch_count += 1 - - if startrow > endrow: - raise gen.Return(([], more_results)) - - # TODO: Check if we should do this for other comparisons. - multiple_equality_filters = self.__get_multiple_equality_filters( - query.filter_list()) - - entities = [] - current_limit = fetch_count - while True: - references = yield self.datastore_batch.range_query( - table_name, column_names, startrow, endrow, current_limit, - offset=0, start_inclusive=start_inclusive, end_inclusive=True) - - # This is a projection query. - if query.property_name_size() > 0: - potential_entities = self._extract_entities_from_composite_indexes( - query, references, composite_index) - else: - potential_entities = yield self.__fetch_entities(references) - - if len(multiple_equality_filters) > 0: - self.logger.debug('Detected multiple equality filters on a repeated ' - 'property. Removing results that do not match query.') - potential_entities = self.__apply_multiple_equality_filters( - potential_entities, multiple_equality_filters) - - entities.extend(potential_entities) - - # If we have enough valid entities to satisfy the query, we're done. - if len(entities) >= fetch_count: - break - - # If we received fewer references than we asked for, they are exhausted. - if len(references) < current_limit: - break - - # If all of the references that we fetched were valid, we're done. - if len(potential_entities) == len(references): - break - - invalid_refs = len(references) - len(potential_entities) - - # Pad the limit to increase the likelihood of fetching all the valid - # references that we need. - current_limit = invalid_refs + dbconstants.MAX_GROUPS_FOR_XG - - self.logger.debug('{} entities do not match query. ' - 'Fetching {} more references.'.format(invalid_refs, current_limit)) - - last_startrow = startrow - # Start from the last reference fetched. - startrow = references[-1].keys()[0] - - if startrow == last_startrow: - raise dbconstants.AppScaleDBError( - 'An infinite loop was detected while fetching references.') - - if check_more_results and len(entities) > request_limit: - more_results = True - - results = entities[:request_limit] - self.logger.debug('Returning {} results'.format(len(results))) - raise gen.Return((results, more_results)) - - def __get_multiple_equality_filters(self, filter_list): - """ Returns filters from the query that contain multiple equality - comparisons on repeated properties. - - Args: - filter_list: A list of filters from the query. - Returns: - A dictionary that contains properties with multiple equality filters. - """ - equality_filters = {} - for query_filter in filter_list: - if query_filter.op() != datastore_pb.Query_Filter.EQUAL: - continue - - for prop in query_filter.property_list(): - if prop.name() not in equality_filters: - equality_filters[prop.name()] = [] - - equality_filters[prop.name()].append(prop) - - single_eq_filters = [] - for prop in equality_filters: - if len(equality_filters[prop]) < 2: - single_eq_filters.append(prop) - for prop in single_eq_filters: - del equality_filters[prop] - - return equality_filters - - def __apply_multiple_equality_filters(self, entities, filter_dict): - """ Removes entities that do not meet the criteria defined by multiple - equality filters. - - Args: - entities: A list of entities that need filtering. - filter_dict: A dictionary containing the relevant filters. - Returns: - A list of filtered entities. - """ - filtered_entities = [] - for entity in entities: - entity_proto = entity_pb.EntityProto(entity) - - relevant_props_in_entity = {} - for entity_prop in entity_proto.property_list(): - if entity_prop.name() not in filter_dict: - continue - - if entity_prop.name() not in relevant_props_in_entity: - relevant_props_in_entity[entity_prop.name()] = [] - - relevant_props_in_entity[entity_prop.name()].append(entity_prop) - - passes_all_filters = True - for filter_prop_name in filter_dict: - if filter_prop_name not in relevant_props_in_entity: - raise dbconstants.AppScaleDBError( - 'Property name not found in entity.') - - filter_props = filter_dict[filter_prop_name] - entity_props = relevant_props_in_entity[filter_prop_name] - - for filter_prop in filter_props: - # Check if filter value is in repeated property. - passes_filter = False - for entity_prop in entity_props: - if entity_prop.value().Equals(filter_prop.value()): - passes_filter = True - break - - if not passes_filter: - passes_all_filters = False - break - - if not passes_all_filters: - break - - if passes_all_filters: - filtered_entities.append(entity) - - return filtered_entities - - def __extract_value_from_index(self, index_entry, direction): - """ Takes an index entry and returns the value of the property. - - This function is for single property indexes only. - - Args: - index_entry: A dictionary containing an index entry. - direction: The direction of the index. - Returns: - A property value. - """ - reference_key = index_entry.keys()[0] - tokens = reference_key.split(self._SEPARATOR) - - # Sometimes the value can contain the separator. - value = self._SEPARATOR.join(tokens[4:-1]) - - if direction == datastore_pb.Query_Order.DESCENDING: - value = helper_functions.reverse_lex(value) - - entity = entity_pb.EntityProto() - prop = entity.add_property() - prop_value = prop.mutable_value() - self.__decode_index_str(value, prop_value) - - return prop_value - - def __valid_index_entry(self, entry, entities, direction, prop_name): - """ Checks if an index entry is valid. - - Args: - entry: A dictionary containing an index entry. - entities: A dictionary of available valid entities. - direction: The direction of the index. - prop_name: A string containing the property name. - Returns: - A boolean indicating whether or not the entry is valid. - Raises: - AppScaleDBError: The given property name is not in the matching entity. - """ - # Skip validating reserved properties. - if dbconstants.RESERVED_PROPERTY_NAME.match(prop_name): - return True - - reference = entry[entry.keys()[0]]['reference'] - - # Reference may be absent from entities if the entity was deleted or part - # of an invalid transaction. - if reference not in entities: - return False - - index_value = self.__extract_value_from_index(entry, direction) - - entity = entities[reference] - entity_proto = entity_pb.EntityProto(entity) - - # TODO: Return faster if not a repeated property. - prop_found = False - for prop in entity_proto.property_list(): - if prop.name() != prop_name: - continue - prop_found = True - - if index_value.has_uservalue() and prop.value().has_uservalue(): - if index_value.uservalue().email() == prop.value().uservalue().email(): - return True - - if index_value.Equals(prop.value()): - return True - - if not prop_found: - # Most likely, a repeated property was populated and then emptied. - self.logger.debug('Property name {} not found in entity.'. - format(prop_name)) - - return False - - def remove_extra_props(self, query, results): - """ Decodes entities, strips extra properties, and re-encodes them. - - Args: - query: A datastore_pb.Query object. - results: A list of encoded entities. - Returns: - A list of encoded entities. - """ - projected_props = query.property_name_list() - - cleaned_results = [] - for result in results: - entity = entity_pb.EntityProto(result) - props_to_keep = [prop for prop in entity.property_list() - if prop.name() in projected_props] - - # If the entity does not have the property, do not include it in the - # results. Raw (unindexed) properties should not be projected. - if not props_to_keep: - continue - - entity.clear_property() - for prop in props_to_keep: - # Projected properties should have a meaning set to INDEX_VALUE. - prop.set_meaning(entity_pb.Property.INDEX_VALUE) - new_prop = entity.add_property() - new_prop.MergeFrom(prop) - - cleaned_results.append(entity.Encode()) - - return cleaned_results - - def _extract_entities_from_composite_indexes(self, query, index_result, - composite_index): - """ Takes index values and creates partial entities out of them. - - This is required for projection queries where the query specifies certain - properties which should be returned. Distinct queries are also handled here. - A distinct query removes entities with duplicate index values. This will - only return the first result for entities which have the same values for - the properties that are being projected. - - Args: - query: A datastore_pb.Query object. - index_result: A list of index strings. - composite_index: An entity_pb.CompositeIndex object. - Returns: - A list of EntityProtos. - """ - definition = composite_index.definition() - prop_name_list = query.property_name_list() - - distinct_checker = [] - entities = [] - for index in index_result: - entity = entity_pb.EntityProto() - tokens = index.keys()[0].split(self._SEPARATOR) - app_id = tokens.pop(0) - namespace = tokens.pop(0) - comp_definition_id = tokens.pop(0) - if definition.ancestor() == 1: - ancestor = tokens.pop(0)[:-1] - distinct_str = "" - value_index = 0 - for def_prop in definition.property_list(): - # If the value contained the separator, try to recover the value. - if len(tokens[:-1]) > len(definition.property_list()): - end_slice = value_index + 1 - while end_slice <= len(tokens[:-1]): - value = self._SEPARATOR.join(tokens[value_index:end_slice]) - if def_prop.direction() == entity_pb.Index_Property.DESCENDING: - value = helper_functions.reverse_lex(value) - prop_value = entity_pb.PropertyValue() - try: - self.__decode_index_str(value, prop_value) - value_index = end_slice - break - except ProtocolBufferDecodeError: - end_slice += 1 - else: - value = tokens[value_index] - if def_prop.direction() == entity_pb.Index_Property.DESCENDING: - value = helper_functions.reverse_lex(value) - value_index += 1 - - if def_prop.name() not in prop_name_list: - self.logger.debug('Skipping prop {} in projection'. - format(def_prop.name())) - continue - - prop = entity.add_property() - prop.set_name(def_prop.name()) - prop.set_meaning(entity_pb.Property.INDEX_VALUE) - prop.set_multiple(False) - - distinct_str += value - prop_value = prop.mutable_value() - self.__decode_index_str(value, prop_value) - - key_string = tokens[-1] - path = decode_path(key_string) - - # Set the entity group. - ent_group = entity.mutable_entity_group() - new_element = ent_group.add_element() - new_element.MergeFrom(path.element(0)) - - # Set the key path. - key = entity.mutable_key() - key.set_app(clean_app_id(app_id)) - if namespace: - key.set_name_space(namespace) - - key.mutable_path().MergeFrom(path) - - # Filter entities if this is a distinct query. - if query.group_by_property_name_size() == 0: - entities.append(entity.Encode()) - elif distinct_str not in distinct_checker: - entities.append(entity.Encode()) - - distinct_checker.append(distinct_str) - return entities - - # These are the three different types of queries attempted. Queries - # can be identified by their filters and orderings. - # TODO: Queries have hints which help in picking which strategy to do first. - _QUERY_STRATEGIES = [ - __single_property_query, - __kind_query, - zigzag_merge_join, - ] - - @gen.coroutine - def __get_query_results(self, query): - """Applies the strategy for the provided query. - - Args: - query: A datastore_pb.Query protocol buffer. - Returns: - Result set. - """ - if query.has_transaction() and not query.has_ancestor(): - raise apiproxy_errors.ApplicationError( - datastore_pb.Error.BAD_REQUEST, - 'Only ancestor queries are allowed inside transactions.') - - num_components = len(query.filter_list()) + len(query.order_list()) - if query.has_ancestor(): - num_components += 1 - if num_components > self._MAX_QUERY_COMPONENTS: - raise apiproxy_errors.ApplicationError( - datastore_pb.Error.BAD_REQUEST, - ('query is too large. may not have more than {0} filters' - ' + sort orders ancestor total'.format(self._MAX_QUERY_COMPONENTS))) - - for prop_name in query.property_name_list(): - if dbconstants.RESERVED_PROPERTY_NAME.match(prop_name): - raise dbconstants.BadRequest('projections are not supported for the ' - 'property: {}'.format(prop_name)) - - app_id = clean_app_id(query.app()) - - self.validate_app_id(app_id) - filters, orders = datastore_index.Normalize(query.filter_list(), - query.order_list(), []) - filter_info = self.generate_filter_info(filters) - order_info = self.generate_order_info(orders) - - composite_indexes = yield self.get_indexes(app_id) - index_to_use = _FindIndexToUse(query, composite_indexes) - if index_to_use is not None: - result, more_results = yield self.composite_v2(query, filter_info, - index_to_use) - raise gen.Return((result, more_results)) - - for strategy in DatastoreDistributed._QUERY_STRATEGIES: - results, more_results = yield strategy(self, query, filter_info, - order_info) - if results or results == []: - raise gen.Return((results, more_results)) - - raise dbconstants.NeedsIndex( - 'An additional index is required to satisfy the query') - - @gen.coroutine - def _dynamic_run_query(self, query, query_result): - """Populates the query result and use that query result to - encode a cursor. - - Args: - query: The query to run. - query_result: The response given to the application server. - """ - result, more_results = yield self.__get_query_results(query) - last_entity = None - count = 0 - offset = query.offset() - if result: - query_result.set_skipped_results(len(result) - offset) - # Last entity is used for the cursor. It needs to be set before - # applying the offset. - last_entity = result[-1] - count = len(result) - result = result[offset:] - if query.has_limit(): - result = result[:query.limit()] - - cur = UnprocessedQueryCursor(query, result, last_entity) - cur.PopulateQueryResult(count, query.offset(), query_result) - - query_result.set_more_results(more_results) - - # If there were no results then we copy the last cursor so future queries - # can start off from the same place. - if query.has_compiled_cursor() and not query_result.has_compiled_cursor(): - query_result.mutable_compiled_cursor().CopyFrom(query.compiled_cursor()) - elif query.has_compile() and not query_result.has_compiled_cursor(): - query_result.mutable_compiled_cursor().\ - CopyFrom(datastore_pb.CompiledCursor()) - - @gen.coroutine - def dynamic_add_actions(self, app_id, request, service_id, version_id): - """ Adds tasks to enqueue upon committing the transaction. - - Args: - app_id: A string specifying the application ID. - request: A protocol buffer AddActions request. - service_id: A string specifying the client's service ID. - version_id: A string specifying the client's version ID. - """ - txid = request.add_request(0).transaction().handle() - - # Check if the tasks will exceed the limit. Though this method shouldn't - # be called concurrently for a given transaction under normal - # circumstances, this CAS should eventually be done under a lock. - existing_tasks = yield self.datastore_batch.transactional_tasks_count( - app_id, txid) - if existing_tasks > _MAX_ACTIONS_PER_TXN: - message = 'Only {} tasks can be added to a transaction'.\ - format(_MAX_ACTIONS_PER_TXN) - raise dbconstants.ExcessiveTasks(message) - - yield self.datastore_batch.add_transactional_tasks( - app_id, txid, request.add_request_list(), service_id, version_id) - - @gen.coroutine - def setup_transaction(self, app_id, is_xg): - """ Gets a transaction ID for a new transaction. - - Args: - app_id: The application for which we are getting a new transaction ID. - is_xg: A bool that indicates if this transaction operates over multiple - entity groups. - Returns: - A long representing a unique transaction ID. - """ - txid = self.transaction_manager.create_transaction_id(app_id, xg=is_xg) - in_progress = self.transaction_manager.get_open_transactions(app_id) - yield self.datastore_batch.start_transaction( - app_id, txid, is_xg, in_progress) - raise gen.Return(txid) - - @gen.coroutine - def enqueue_transactional_tasks(self, app, tasks): - """ Send a BulkAdd request to the taskqueue service. - - Args: - app: A string specifying an application ID. - task_ops: A list of tasks. - """ - # Assume all tasks have the same client version. - service_id = tasks[0]['service_id'] - version_id = tasks[0]['version_id'] - - add_requests = [task['task'] for task in tasks] - self.logger.debug('Enqueuing {} tasks'.format(len(add_requests))) - - # The transaction has already been committed, but enqueuing the tasks may - # fail. We need a way to enqueue the task with the condition that it - # executes only upon successful commit. For now, we just log the error. - try: - yield self.taskqueue_client.add_tasks(app, service_id, version_id, - add_requests) - except EnqueueError as error: - self.logger.error('Unable to enqueue tasks: {}'.format(error)) - - @gen.coroutine - def apply_txn_changes(self, app, txn): - """ Apply all operations in transaction table in a single batch. - - Args: - app: A string containing an application ID. - txn: An integer specifying a transaction ID. - """ - metadata = yield self.datastore_batch.get_transaction_metadata(app, txn) - - # If too much time has passed, the transaction cannot be committed. - if 'start' not in metadata: - raise dbconstants.BadRequest('Unable to find transaction') - - tx_duration = datetime.datetime.utcnow() - metadata['start'] - if (tx_duration > datetime.timedelta(seconds=MAX_TX_DURATION)): - raise dbconstants.BadRequest('The referenced transaction has expired') - - # If there were no changes, the transaction is complete. - if (len(metadata['puts']) + len(metadata['deletes']) + - len(metadata['tasks']) == 0): - return - - # Fail if there are too many groups involved in the transaction. - groups_put = {group_for_key(key).Encode() for key in metadata['puts']} - groups_deleted = {group_for_key(key).Encode() - for key in metadata['deletes']} - groups_mutated = groups_put | groups_deleted - tx_groups = groups_mutated | metadata['reads'] - if len(tx_groups) > dbconstants.MAX_GROUPS_FOR_XG: - raise dbconstants.TooManyGroupsException( - 'Too many groups in transaction') - - composite_indices = yield self.get_indexes(app) - decoded_groups = [entity_pb.Reference(group) for group in tx_groups] - self.transaction_manager.set_groups(app, txn, decoded_groups) - - # Allow the lock to stick around if there is an issue applying the batch. - lock = entity_lock.EntityLock(self.zookeeper.handle, decoded_groups, txn) - try: - yield lock.acquire() - except entity_lock.LockTimeout: - raise Timeout('Unable to acquire entity group locks') - - try: - try: - group_txids = yield self.datastore_batch.group_updates( - metadata['reads']) - except dbconstants.TRANSIENT_CASSANDRA_ERRORS: - lock.release() - self.transaction_manager.delete_transaction_id(app, txn) - raise dbconstants.AppScaleDBConnectionError( - 'Unable to fetch group updates') - - for group_txid in group_txids: - if group_txid in metadata['in_progress'] or group_txid > txn: - lock.release() - self.transaction_manager.delete_transaction_id(app, txn) - raise dbconstants.ConcurrentModificationException( - 'A group was modified after this transaction was started.') - - # Fetch current values so we can remove old indices. - entity_table_keys = [encode_entity_table_key(key) - for key, _ in metadata['puts'].iteritems()] - entity_table_keys.extend([encode_entity_table_key(key) - for key in metadata['deletes']]) - try: - current_values = yield self.datastore_batch.batch_get_entity( - dbconstants.APP_ENTITY_TABLE, entity_table_keys, APP_ENTITY_SCHEMA) - except dbconstants.AppScaleDBConnectionError: - lock.release() - self.transaction_manager.delete_transaction_id(app, txn) - raise - - batch = [] - entity_changes = [] - for encoded_key, encoded_entity in metadata['puts'].iteritems(): - key = entity_pb.Reference(encoded_key) - entity_table_key = encode_entity_table_key(key) - current_value = None - if current_values[entity_table_key]: - current_value = entity_pb.EntityProto( - current_values[entity_table_key][APP_ENTITY_SCHEMA[0]]) - - entity = entity_pb.EntityProto(encoded_entity) - mutations = mutations_for_entity(entity, txn, current_value, - composite_indices) - batch.extend(mutations) - - entity_changes.append({'key': key, 'old': current_value, - 'new': entity}) - - for key in metadata['deletes']: - entity_table_key = encode_entity_table_key(key) - if not current_values[entity_table_key]: - continue - - current_value = entity_pb.EntityProto( - current_values[entity_table_key][APP_ENTITY_SCHEMA[0]]) - - deletions = deletions_for_entity(current_value, composite_indices) - batch.extend(deletions) - - entity_changes.append({'key': key, 'old': current_value, 'new': None}) - - for group in groups_mutated: - batch.append( - {'table': 'group_updates', 'key': bytearray(group), - 'last_update': txn}) - - if batch_size(batch) > LARGE_BATCH_THRESHOLD: - try: - yield self.datastore_batch.large_batch(app, batch, entity_changes, - txn) - except BatchNotApplied as error: - # If the "applied" switch has not been flipped, the lock can be - # released. The transaction ID is kept so that the groomer can - # clean up the batch tables. - lock.release() - raise dbconstants.AppScaleDBConnectionError(str(error)) - else: - try: - yield self.datastore_batch.normal_batch(batch, txn) - except dbconstants.AppScaleDBConnectionError: - # Since normal batches are guaranteed to be atomic, the lock can - # be released. - lock.release() - self.transaction_manager.delete_transaction_id(app, txn) - raise - - lock.release() - - finally: - # In case of failure entity group lock should stay acquired - # as transaction groomer will handle it later. - # But tornado lock must be released. - lock.ensure_release_tornado_lock() - - self.transaction_manager.delete_transaction_id(app, txn) - - # Process transactional tasks. - if metadata['tasks']: - IOLoop.current().spawn_callback(self.enqueue_transactional_tasks, app, - metadata['tasks']) - - def rollback_transaction(self, app_id, txid): - """ Handles the rollback phase of a transaction. - - Args: - app_id: The application ID requesting the rollback. - txid: An integer specifying a transaction ID. - Raises: - InternalError if unable to roll back transaction. - """ - self.logger.info( - 'Doing a rollback on transaction {} for {}'.format(txid, app_id)) - try: - self.zookeeper.notify_failed_transaction(app_id, txid) - except zktransaction.ZKTransactionException as error: - raise InternalError(str(error)) - - @gen.coroutine - def get_indexes(self, project_id): - """ Retrieves list of indexes for a project. - - Args: - project_id: A string specifying a project ID. - Returns: - A list of entity_pb.CompositeIndex objects. - Raises: - BadRequest if project_id is not found. - InternalError if ZooKeeper is not accessible. - """ - try: - project_index_manager = self.index_manager.projects[project_id] - except KeyError: - raise BadRequest('project_id: {} not found'.format(project_id)) - - try: - indexes = project_index_manager.indexes_pb - except IndexInaccessible: - raise InternalError('ZooKeeper is not accessible') - - raise gen.Return(indexes) - - @gen.coroutine - def add_indexes(self, project_id, indexes): - """ Adds composite index definitions to a project. - - Only indexes that do not already exist will be created. - Args: - project_id: A string specifying a project ID. - indexes: An iterable containing index definitions. - """ - merge_indexes(self.zookeeper.handle, project_id, indexes) - - def _zk_state_listener(self, state): - """ Handles changes to the ZooKeeper connection state. - - Args: - state: A string specifying the new connection state. - """ - # Discard any allocated blocks if disconnected from ZooKeeper. - if state in [KazooState.LOST, KazooState.SUSPENDED]: - self.scattered_allocators.clear() diff --git a/AppDB/appscale/datastore/dbconstants.py b/AppDB/appscale/datastore/dbconstants.py index 574ca00793..9db0ec86b1 100644 --- a/AppDB/appscale/datastore/dbconstants.py +++ b/AppDB/appscale/datastore/dbconstants.py @@ -1,7 +1,6 @@ """ Datastore Constants """ -import cassandra.cluster import re SECRET_LOCATION = "/etc/appscale/secret.key" @@ -53,12 +52,8 @@ # Tombstone value for soft deletes. TOMBSTONE = "APPSCALE_SOFT_DELETE" -TRANSIENT_CASSANDRA_ERRORS = ( - cassandra.Unavailable, cassandra.Timeout, cassandra.CoordinationFailure, - cassandra.OperationTimedOut, cassandra.cluster.NoHostAvailable) - # The database backends supported by the AppScale datastore. -VALID_DATASTORES = ['cassandra', 'fdb'] +VALID_DATASTORES = ['fdb'] # Table names USERS_TABLE = "USERS__" diff --git a/AppDB/appscale/datastore/dbinterface.py b/AppDB/appscale/datastore/dbinterface.py deleted file mode 100644 index 360c597f3f..0000000000 --- a/AppDB/appscale/datastore/dbinterface.py +++ /dev/null @@ -1,37 +0,0 @@ -# -# AppScale Datastore Interface -# -__author__="Soo Hwan Park" -__date__="$2009.5.5 18:27:00$" - -import os - - -class AppDBInterface: - def get_entity(self, table_name, row_key, column_names, txnid = 0): - raise NotImplementedError("get_entity is not implemented in %s." % self.__class__) - - def put_entity(self, table_name, row_key, column_names, cell_values, txnid = 0): - raise NotImplementedError("put_entity is not implemented in %s." % self.__class__) - - def put_entity_dict(self, table_name, row_key, value_dict): - raise NotImplementedError("put_entity_dict is not implemented in %s." % self.__class__) - - def get_table(self, table_name, column_names, txnid = 0): - raise NotImplementedError("get_table is not implemented in %s." % self.__class__) - - def delete_row(self, table_name, row_id, txnid = 0): - raise NotImplementedError("delete_row is not implemented in %s." % self.__class__) - - def get_schema(self, table_name): - raise NotImplementedError("get_schema is not implemented in %s." % self.__class__) - - def delete_table(self, table_name): - raise NotImplementedError("delete_table is not implemented in %s." % self.__class__) - - def commit(self, txnid): - raise NotImplementedError("commit is not implemented in %s." % self.__class__) - def rollback(self, txnid): - raise NotImplementedError("rollback is not implemented in %s." % self.__class__) - def setup_transaction(self, txnid): - raise NotImplementedError("rollback is not implemented in %s." % self.__class__) diff --git a/AppDB/appscale/datastore/entity_utils.py b/AppDB/appscale/datastore/entity_utils.py deleted file mode 100644 index 6deddc7381..0000000000 --- a/AppDB/appscale/datastore/entity_utils.py +++ /dev/null @@ -1,58 +0,0 @@ -""" Utilities for parsing datastore entities. """ - -from appscale.datastore import dbconstants -from appscale.datastore.dbconstants import JOURNAL_SCHEMA -from appscale.datastore.dbconstants import JOURNAL_TABLE -from appscale.datastore.dbconstants import ID_SEPARATOR -from appscale.datastore.dbconstants import KEY_DELIMITER -from appscale.datastore.dbconstants import KIND_SEPARATOR -from google.appengine.datastore import entity_pb - - -def get_root_key_from_entity_key(key): - """ Extracts the root key from an entity key. We - remove any excess children from a string to get to - the root key. - - Args: - entity_key: A string representing a row key. - Returns: - The root key extracted from the row key. - """ - tokens = key.split(KIND_SEPARATOR) - return tokens[0] + KIND_SEPARATOR - - -def get_kind_from_entity_key(entity_key): - """ Extracts the kind from a key to the entity table. - - Args: - entity_key: A str representing a row key to the entity table. - Returns: - A str representing the kind. - """ - tokens = entity_key.split(KEY_DELIMITER) - return tokens[2].split(ID_SEPARATOR, 1)[0] - - -def fetch_journal_entry(db_access, key): - """ Fetches the given key from the journal. - - Args: - db_access: A datastore accessor. - keys: A str, the key to fetch. - Returns: - The entity fetched from the datastore, or None if it was deleted. - """ - result = db_access.batch_get_entity_sync( - JOURNAL_TABLE, [key], JOURNAL_SCHEMA) - if len(result.keys()) == 0: - return None - - if JOURNAL_SCHEMA[0] in result.keys()[0]: - ent_string = result[0][JOURNAL_SCHEMA[0]] - if ent_string == dbconstants.TOMBSTONE: - return None - return entity_pb.EntityProto().ParseFromString(ent_string) - else: - return None diff --git a/AppDB/appscale/datastore/fdb/fdb_datastore.py b/AppDB/appscale/datastore/fdb/fdb_datastore.py index eabfaeb3ca..0639e3f990 100644 --- a/AppDB/appscale/datastore/fdb/fdb_datastore.py +++ b/AppDB/appscale/datastore/fdb/fdb_datastore.py @@ -34,7 +34,6 @@ from appscale.datastore.fdb.utils import ( _MAX_SEQUENTIAL_ID, ABSENT_VERSION, DS_ROOT, fdb, FDBErrorCodes, next_entity_version, ScatteredAllocator, TornadoFDB) -from appscale.datastore.index_manager import IndexInaccessible sys.path.append(APPSCALE_PYTHON_APPSERVER) from google.appengine.datastore import entity_pb diff --git a/AppDB/appscale/datastore/groomer.py b/AppDB/appscale/datastore/groomer.py deleted file mode 100644 index b661a3635b..0000000000 --- a/AppDB/appscale/datastore/groomer.py +++ /dev/null @@ -1,1212 +0,0 @@ -import datetime -import logging -import os -import random -import re -import sys -import threading -import time - -from kazoo.client import KazooClient, KazooRetry -from tornado import gen - -from appscale.datastore.utils import tornado_synchronous - -import appscale_datastore_batch -import dbconstants -import utils - -from appscale.common import appscale_info -from appscale.common import constants -from appscale.common.constants import ZK_PERSISTENT_RECONNECTS -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.common.unpackaged import DASHBOARD_DIR -from . import helper_functions -from .cassandra_env import cassandra_interface -from .datastore_distributed import DatastoreDistributed -from .index_manager import IndexManager -from .utils import get_composite_indexes_rows -from .zkappscale import zktransaction as zk -from .zkappscale.entity_lock import EntityLock -from .zkappscale.transaction_manager import TransactionManager - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.api import apiproxy_stub_map -from google.appengine.api import datastore_distributed -from google.appengine.api.memcache import memcache_distributed -from google.appengine.datastore import datastore_pb -from google.appengine.datastore import entity_pb -from google.appengine.datastore.datastore_query import Cursor -from google.appengine.ext import db -from google.appengine.ext.db import stats -from google.appengine.ext.db import metadata -from google.appengine.api import datastore_errors - -sys.path.append(os.path.join(DASHBOARD_DIR, 'lib')) -from dashboard_logs import RequestLogLine - -logger = logging.getLogger(__name__) - - -class TaskName(db.Model): - """ A datastore model for tracking task names in order to prevent - tasks with the same name from being enqueued repeatedly. - - Attributes: - timestamp: The time the task was enqueued. - """ - STORED_KIND_NAME = "__task_name__" - timestamp = db.DateTimeProperty(auto_now_add=True) - queue = db.StringProperty(required=True) - state = db.StringProperty(required=True) - endtime = db.DateTimeProperty() - app_id = db.StringProperty(required=True) - - @classmethod - def kind(cls): - """ Kind name override. """ - return cls.STORED_KIND_NAME - - -class DatastoreGroomer(threading.Thread): - """ Scans the entire database for each application. """ - - # The amount of seconds between polling to get the groomer lock. - # Each datastore server does this poll, so it happens the number - # of datastore servers within this lock period. - LOCK_POLL_PERIOD = 4 * 60 * 60 # <- 4 hours - - # Retry sleep on datastore error in seconds. - DB_ERROR_PERIOD = 30 - - # The number of entities retrieved in a datastore request. - BATCH_SIZE = 100 - - # Any kind that is of __*__ is private and should not have stats. - PRIVATE_KINDS = '__(.*)__' - - # Any kind that is of _*_ is protected and should not have stats. - PROTECTED_KINDS = '_(.*)_' - - # The amount of time in seconds before we want to clean up task name holders. - TASK_NAME_TIMEOUT = 24 * 60 * 60 - - # The amount of time before logs are considered too old. - LOG_STORAGE_TIMEOUT = 24 * 60 * 60 * 7 - - # Do not generate stats for AppScale internal apps. - APPSCALE_APPLICATIONS = ['apichecker', 'appscaledashboard'] - - # A sentinel value to signify that this app does not have composite indexes. - NO_COMPOSITES = "NO_COMPS_INDEXES_HERE" - - # The path in ZooKeeper where the groomer state is stored. - GROOMER_STATE_PATH = '/appscale/groomer_state' - - # The characters used to separate values when storing the groomer state. - GROOMER_STATE_DELIMITER = '||' - - # The ID for the task to clean up entities. - CLEAN_ENTITIES_TASK = 'entities' - - # The ID for the task to clean up ascending indices. - CLEAN_ASC_INDICES_TASK = 'asc-indices' - - # The ID for the task to clean up descending indices. - CLEAN_DSC_INDICES_TASK = 'dsc-indices' - - # The ID for the task to clean up kind indices. - CLEAN_KIND_INDICES_TASK = 'kind-indices' - - # The ID for the task to clean up old logs. - CLEAN_LOGS_TASK = 'logs' - - # The ID for the task to clean up old tasks. - CLEAN_TASKS_TASK = 'tasks' - - # The task ID for populating indexes with the scatter property. - POPULATE_SCATTER = 'populate-scatter' - - # Log progress every time this many seconds have passed. - LOG_PROGRESS_FREQUENCY = 60 * 5 - - def __init__(self, zoo_keeper, table_name, ds_path): - """ Constructor. - - Args: - zk: ZooKeeper client. - table_name: The database used (ie, cassandra) - ds_path: The connection path to the datastore_server. - """ - logger.info("Logging started") - - threading.Thread.__init__(self) - self.zoo_keeper = zoo_keeper - self.table_name = table_name - self.db_access = None - self.ds_access = None - self.datastore_path = ds_path - self.stats = {} - self.namespace_info = {} - self.num_deletes = 0 - self.entities_checked = 0 - self.journal_entries_cleaned = 0 - self.index_entries_checked = 0 - self.index_entries_delete_failures = 0 - self.index_entries_cleaned = 0 - self.scatter_prop_vals_populated = 0 - self.last_logged = time.time() - self.groomer_state = [] - - def stop(self): - """ Stops the groomer thread. """ - self.zoo_keeper.close() - - def run(self): - """ Starts the main loop of the groomer thread. """ - while True: - - logger.debug("Trying to get groomer lock.") - if self.get_groomer_lock(): - logger.info("Got the groomer lock.") - self.run_groomer() - try: - self.zoo_keeper.release_lock_with_path(zk.DS_GROOM_LOCK_PATH) - except zk.ZKTransactionException, zk_exception: - logger.error("Unable to release zk lock {0}.".\ - format(str(zk_exception))) - except zk.ZKInternalException, zk_exception: - logger.error("Unable to release zk lock {0}.".\ - format(str(zk_exception))) - else: - logger.info("Did not get the groomer lock.") - sleep_time = random.randint(1, self.LOCK_POLL_PERIOD) - logger.info('Sleeping for {:.1f} minutes.'.format(sleep_time/60.0)) - time.sleep(sleep_time) - - def get_groomer_lock(self): - """ Tries to acquire the lock to the datastore groomer. - - Returns: - True on success, False otherwise. - """ - return self.zoo_keeper.get_lock_with_path(zk.DS_GROOM_LOCK_PATH) - - def get_entity_batch(self, last_key): - """ Gets a batch of entites to operate on. - - Args: - last_key: The last key from a previous query. - Returns: - A list of entities. - """ - return self.db_access.range_query_sync( - dbconstants.APP_ENTITY_TABLE, dbconstants.APP_ENTITY_SCHEMA, - last_key, "", self.BATCH_SIZE, start_inclusive=False) - - def reset_statistics(self): - """ Reinitializes statistics. """ - self.stats = {} - self.namespace_info = {} - self.num_deletes = 0 - self.journal_entries_cleaned = 0 - - def hard_delete_row(self, row_key): - """ Does a hard delete on a given row key to the entity - table. - - Args: - row_key: A str representing the row key to delete. - Returns: - True on success, False otherwise. - """ - try: - self.db_access.batch_delete_sync(dbconstants.APP_ENTITY_TABLE, [row_key]) - except dbconstants.AppScaleDBConnectionError, db_error: - logger.error("Error hard deleting key {0}-->{1}".format( - row_key, db_error)) - return False - except Exception, exception: - logger.error("Caught unexcepted exception {0}".format(exception)) - return False - - return True - - def fetch_entity_dict_for_references(self, references): - """ Fetches a dictionary of valid entities for a list of references. - - Args: - references: A list of index references to entities. - Returns: - A dictionary of validated entities. - """ - keys = [] - for item in references: - keys.append(item.values()[0][self.ds_access.INDEX_REFERENCE_COLUMN]) - keys = list(set(keys)) - entities = self.db_access.batch_get_entity_sync( - dbconstants.APP_ENTITY_TABLE, keys, dbconstants.APP_ENTITY_SCHEMA) - - # The datastore needs to know the app ID. The indices could be scattered - # across apps. - entities_by_app = {} - for key in entities: - app = key.split(self.ds_access._SEPARATOR)[0] - if app not in entities_by_app: - entities_by_app[app] = {} - entities_by_app[app][key] = entities[key] - - entities = {} - for app in entities_by_app: - app_entities = entities_by_app[app] - for key in keys: - if key not in app_entities: - continue - if dbconstants.APP_ENTITY_SCHEMA[0] not in app_entities[key]: - continue - entities[key] = app_entities[key][dbconstants.APP_ENTITY_SCHEMA[0]] - return entities - - def guess_group_from_table_key(self, entity_key): - """ Construct a group reference based on an entity key. - - Args: - entity_key: A string specifying an entity table key. - Returns: - An entity_pb.Reference object specifying the entity group. - """ - project_id, namespace, path = entity_key.split(dbconstants.KEY_DELIMITER) - - group = entity_pb.Reference() - group.set_app(project_id) - if namespace: - group.set_name_space(namespace) - - mutable_path = group.mutable_path() - first_element = mutable_path.add_element() - encoded_first_element = path.split(dbconstants.KIND_SEPARATOR)[0] - kind, id_ = encoded_first_element.split(dbconstants.ID_SEPARATOR, 1) - first_element.set_type(kind) - - # At this point, there's no way to tell if the ID was originally a name, - # so this is a guess. - try: - first_element.set_id(int(id_)) - except ValueError: - first_element.set_name(id_) - - return group - - @tornado_synchronous - @gen.coroutine - def lock_and_delete_indexes(self, references, direction, entity_key): - """ For a list of index entries that have the same entity, lock the entity - and delete the indexes. - - Since another process can update an entity after we've determined that - an index entry is invalid, we need to re-check the index entries after - locking their entity key. - - Args: - references: A list of references to an entity. - direction: The direction of the index. - entity_key: A string containing the entity key. - """ - if direction == datastore_pb.Query_Order.ASCENDING: - table_name = dbconstants.ASC_PROPERTY_TABLE - else: - table_name = dbconstants.DSC_PROPERTY_TABLE - - group_key = self.guess_group_from_table_key(entity_key) - entity_lock = EntityLock(self.zoo_keeper.handle, [group_key]) - with entity_lock: - entities = self.fetch_entity_dict_for_references(references) - - refs_to_delete = [] - for reference in references: - index_elements = reference.keys()[0].split(self.ds_access._SEPARATOR) - prop = index_elements[self.ds_access.PROP_NAME_IN_SINGLE_PROP_INDEX] - if not self.ds_access._DatastoreDistributed__valid_index_entry( - reference, entities, direction, prop): - refs_to_delete.append(reference.keys()[0]) - - logger.debug('Removing {} indexes starting with {}'. - format(len(refs_to_delete), [refs_to_delete[0]])) - try: - self.db_access.batch_delete_sync( - table_name, refs_to_delete, column_names=dbconstants.PROPERTY_SCHEMA) - self.index_entries_cleaned += len(refs_to_delete) - except Exception: - logger.exception('Unable to delete indexes') - self.index_entries_delete_failures += 1 - - @tornado_synchronous - @gen.coroutine - def lock_and_delete_kind_index(self, reference): - """ For a list of index entries that have the same entity, lock the entity - and delete the indexes. - - Since another process can update an entity after we've determined that - an index entry is invalid, we need to re-check the index entries after - locking their entity key. - - Args: - reference: A dictionary containing a kind reference. - """ - table_name = dbconstants.APP_KIND_TABLE - entity_key = reference.values()[0].values()[0] - - group_key = self.guess_group_from_table_key(entity_key) - entity_lock = EntityLock(self.zoo_keeper.handle, [group_key]) - with entity_lock: - entities = self.fetch_entity_dict_for_references([reference]) - if entity_key not in entities: - index_to_delete = reference.keys()[0] - logger.debug('Removing {}'.format([index_to_delete])) - try: - self.db_access.batch_delete_sync( - table_name, [index_to_delete], - column_names=dbconstants.APP_KIND_SCHEMA) - self.index_entries_cleaned += 1 - except dbconstants.AppScaleDBConnectionError: - logger.exception('Unable to delete index.') - self.index_entries_delete_failures += 1 - - def insert_scatter_indexes(self, entity_key, path, scatter_prop): - """ Writes scatter property references to the index tables. - - Args: - entity_key: A string specifying the entity key. - path: A list of strings representing path elements. - scatter_prop: An entity_pb.Property object. - """ - app_id, namespace, encoded_path = entity_key.split( - dbconstants.KEY_DELIMITER) - kind = path[-1].split(dbconstants.ID_SEPARATOR)[0] - asc_val = str(utils.encode_index_pb(scatter_prop.value())) - dsc_val = helper_functions.reverse_lex(asc_val) - prefix = dbconstants.KEY_DELIMITER.join([app_id, namespace]) - prop_name = '__scatter__' - rows = [{'table': dbconstants.ASC_PROPERTY_TABLE, 'val': asc_val}, - {'table': dbconstants.DSC_PROPERTY_TABLE, 'val': dsc_val}] - - for row in rows: - index_key = utils.get_index_key_from_params( - [prefix, kind, prop_name, row['val'], encoded_path]) - # There's no need to insert with a particular timestamp because - # datastore writes and deletes to this key should take precedence. - statement = """ - INSERT INTO "{table}" ({key}, {column}, {value}) - VALUES (%s, %s, %s) - """.format(table=row['table'], - key=cassandra_interface.ThriftColumn.KEY, - column=cassandra_interface.ThriftColumn.COLUMN_NAME, - value=cassandra_interface.ThriftColumn.VALUE) - params = (bytearray(index_key), 'reference', bytearray(entity_key)) - self.db_access.session.execute(statement, params) - - def populate_scatter_prop(self): - """ Populates the scatter property for existing entities. """ - task_id = self.POPULATE_SCATTER - - # If we have state information beyond what function to use, load the last - # seen start key. - start_key = '' - if len(self.groomer_state) > 1 and self.groomer_state[0] == task_id: - start_key = self.groomer_state[1] - - # Indicate that this job has started after the scatter property was added. - if not start_key: - index_state = self.db_access.get_metadata( - cassandra_interface.SCATTER_PROP_KEY) - if index_state is None: - self.db_access.set_metadata( - cassandra_interface.SCATTER_PROP_KEY, - cassandra_interface.ScatterPropStates.POPULATION_IN_PROGRESS) - - while True: - statement = """ - SELECT DISTINCT key FROM "{table}" - WHERE token(key) > %s - LIMIT {limit} - """.format(table=dbconstants.APP_ENTITY_TABLE, limit=self.BATCH_SIZE) - parameters = (bytearray(start_key),) - keys = self.db_access.session.execute(statement, parameters) - - if not keys: - break - - def create_path_element(encoded_element): - element = entity_pb.Path_Element() - # IDs are treated as names here. This avoids having to fetch the entity - # to tell the difference. - key_name = encoded_element.split(dbconstants.ID_SEPARATOR, 1)[-1] - element.set_name(key_name) - return element - - key = None - for row in keys: - key = row.key - encoded_path = key.split(dbconstants.KEY_DELIMITER)[2] - path = [element for element - in encoded_path.split(dbconstants.KIND_SEPARATOR) if element] - element_list = [create_path_element(element) for element in path] - scatter_prop = utils.get_scatter_prop(element_list) - - if scatter_prop is not None: - self.insert_scatter_indexes(key, path, scatter_prop) - self.scatter_prop_vals_populated += 1 - - start_key = key - - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Populated {} scatter property index entries' - .format(self.scatter_prop_vals_populated)) - self.last_logged = time.time() - - self.update_groomer_state([task_id, start_key]) - - self.db_access.set_metadata( - cassandra_interface.SCATTER_PROP_KEY, - cassandra_interface.ScatterPropStates.POPULATED) - - def clean_up_indexes(self, direction): - """ Deletes invalid single property index entries. - - This is needed because we do not delete index entries when updating or - deleting entities. With time, this results in queries taking an increasing - amount of time. - - Args: - direction: The direction of the index. - """ - if direction == datastore_pb.Query_Order.ASCENDING: - table_name = dbconstants.ASC_PROPERTY_TABLE - task_id = self.CLEAN_ASC_INDICES_TASK - else: - table_name = dbconstants.DSC_PROPERTY_TABLE - task_id = self.CLEAN_DSC_INDICES_TASK - - # If we have state information beyond what function to use, - # load the last seen start key. - if len(self.groomer_state) > 1 and self.groomer_state[0] == task_id: - start_key = self.groomer_state[1] - else: - start_key = '' - end_key = dbconstants.TERMINATING_STRING - - # Indicate that an index scrub has started. - if direction == datastore_pb.Query_Order.ASCENDING and not start_key: - self.db_access.set_metadata_sync( - cassandra_interface.INDEX_STATE_KEY, - cassandra_interface.IndexStates.SCRUB_IN_PROGRESS) - - while True: - references = self.db_access.range_query_sync( - table_name=table_name, - column_names=dbconstants.PROPERTY_SCHEMA, - start_key=start_key, - end_key=end_key, - limit=self.BATCH_SIZE, - start_inclusive=False, - ) - if len(references) == 0: - break - - self.index_entries_checked += len(references) - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Checked {} index entries' - .format(self.index_entries_checked)) - self.last_logged = time.time() - first_ref = references[0].keys()[0] - logger.debug('Fetched {} total refs, starting with {}, direction: {}' - .format(self.index_entries_checked, [first_ref], direction)) - - last_start_key = start_key - start_key = references[-1].keys()[0] - if start_key == last_start_key: - raise dbconstants.AppScaleDBError( - 'An infinite loop was detected while fetching references.') - - entities = self.fetch_entity_dict_for_references(references) - - # Group invalid references by entity key so we can minimize locks. - invalid_refs = {} - for reference in references: - prop_name = reference.keys()[0].split(self.ds_access._SEPARATOR)[3] - if not self.ds_access._DatastoreDistributed__valid_index_entry( - reference, entities, direction, prop_name): - entity_key = reference.values()[0][self.ds_access.INDEX_REFERENCE_COLUMN] - if entity_key not in invalid_refs: - invalid_refs[entity_key] = [] - invalid_refs[entity_key].append(reference) - - for entity_key in invalid_refs: - self.lock_and_delete_indexes(invalid_refs[entity_key], direction, entity_key) - self.update_groomer_state([task_id, start_key]) - - def clean_up_kind_indices(self): - """ Deletes invalid kind index entries. - - This is needed because the datastore does not delete kind index entries - when deleting entities. - """ - table_name = dbconstants.APP_KIND_TABLE - task_id = self.CLEAN_KIND_INDICES_TASK - - start_key = '' - end_key = dbconstants.TERMINATING_STRING - if len(self.groomer_state) > 1: - start_key = self.groomer_state[1] - - while True: - references = self.db_access.range_query_sync( - table_name=table_name, - column_names=dbconstants.APP_KIND_SCHEMA, - start_key=start_key, - end_key=end_key, - limit=self.BATCH_SIZE, - start_inclusive=False, - ) - if len(references) == 0: - break - - self.index_entries_checked += len(references) - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Checked {} index entries'. - format(self.index_entries_checked)) - self.last_logged = time.time() - first_ref = references[0].keys()[0] - logger.debug('Fetched {} kind indices, starting with {}'. - format(len(references), [first_ref])) - - last_start_key = start_key - start_key = references[-1].keys()[0] - if start_key == last_start_key: - raise dbconstants.AppScaleDBError( - 'An infinite loop was detected while fetching references.') - - entities = self.fetch_entity_dict_for_references(references) - - for reference in references: - entity_key = reference.values()[0].values()[0] - if entity_key not in entities: - self.lock_and_delete_kind_index(reference) - - self.update_groomer_state([task_id, start_key]) - - # Indicate that the index has been scrubbed after the journal was removed. - index_state = self.db_access.get_metadata_sync( - cassandra_interface.INDEX_STATE_KEY) - if index_state == cassandra_interface.IndexStates.SCRUB_IN_PROGRESS: - self.db_access.set_metadata_sync(cassandra_interface.INDEX_STATE_KEY, - cassandra_interface.IndexStates.CLEAN) - - def clean_up_composite_indexes(self): - """ Deletes old composite indexes and bad references. - - Returns: - True on success, False otherwise. - """ - return True - - def get_composite_indexes(self, app_id, kind): - """ Fetches the composite indexes for a kind. - - Args: - app_id: The application ID. - kind: A string, the kind for which we need composite indexes. - Returns: - A list of composite indexes. - """ - if not kind: - return [] - - try: - project_index_manager = self.ds_access.index_manager.projects[app_id] - except KeyError: - return [] - - return [index for index in project_index_manager.indexes_pb - if index.definition().entity_type() == kind] - - def delete_indexes(self, entity): - """ Deletes indexes for a given entity. - - Args: - entity: An EntityProto. - """ - return - - def delete_composite_indexes(self, entity, composites): - """ Deletes composite indexes for an entity. - - Args: - entity: An EntityProto. - composites: A list of datastore_pb.CompositeIndexes composite indexes. - """ - row_keys = get_composite_indexes_rows([entity], composites) - self.db_access.batch_delete_sync( - dbconstants.COMPOSITE_TABLE, row_keys, - column_names=dbconstants.COMPOSITE_SCHEMA) - - def initialize_kind(self, app_id, kind): - """ Puts a kind into the statistics object if - it does not already exist. - Args: - app_id: The application ID. - kind: A string representing an entity kind. - """ - if app_id not in self.stats: - self.stats[app_id] = {kind: {'size': 0, 'number': 0}} - if kind not in self.stats[app_id]: - self.stats[app_id][kind] = {'size': 0, 'number': 0} - - def initialize_namespace(self, app_id, namespace): - """ Puts a namespace into the namespace object if - it does not already exist. - Args: - app_id: The application ID. - namespace: A string representing a namespace. - """ - if app_id not in self.namespace_info: - self.namespace_info[app_id] = {namespace: {'size': 0, 'number': 0}} - - if namespace not in self.namespace_info[app_id]: - self.namespace_info[app_id] = {namespace: {'size': 0, 'number': 0}} - if namespace not in self.namespace_info[app_id]: - self.stats[app_id][namespace] = {'size': 0, 'number': 0} - - def process_statistics(self, key, entity, size): - """ Processes an entity and adds to the global statistics. - - Args: - key: The key to the entity table. - entity: EntityProto entity. - size: A int of the size of the entity. - Returns: - True on success, False otherwise. - """ - kind = utils.get_entity_kind(entity.key()) - namespace = entity.key().name_space() - - if not kind: - logger.warning("Entity did not have a kind {0}"\ - .format(entity)) - return False - - if re.match(self.PROTECTED_KINDS, kind): - return True - - if re.match(self.PRIVATE_KINDS, kind): - return True - - app_id = entity.key().app() - if not app_id: - logger.warning("Entity of kind {0} did not have an app id"\ - .format(kind)) - return False - - # Do not generate statistics for applications which are internal to - # AppScale. - if app_id in self.APPSCALE_APPLICATIONS: - return True - - self.initialize_kind(app_id, kind) - self.initialize_namespace(app_id, namespace) - self.namespace_info[app_id][namespace]['size'] += size - self.namespace_info[app_id][namespace]['number'] += 1 - self.stats[app_id][kind]['size'] += size - self.stats[app_id][kind]['number'] += 1 - return True - - def txn_blacklist_cleanup(self): - """ Clean up old transactions and removed unused references - to reap storage. - - Returns: - True on success, False otherwise. - """ - #TODO implement - return True - - def process_entity(self, entity): - """ Processes an entity by updating statistics, indexes, and removes - tombstones. - - Args: - entity: The entity to operate on. - Returns: - True on success, False otherwise. - """ - logger.debug("Process entity {0}".format(str(entity))) - key = entity.keys()[0] - one_entity = entity[key][dbconstants.APP_ENTITY_SCHEMA[0]] - - logger.debug("Entity value: {0}".format(entity)) - - ent_proto = entity_pb.EntityProto() - ent_proto.ParseFromString(one_entity) - self.process_statistics(key, ent_proto, len(one_entity)) - - return True - - def create_namespace_entry(self, namespace, size, number, timestamp): - """ Puts a namespace into the datastore. - - Args: - namespace: A string, the namespace. - size: An int representing the number of bytes taken by a namespace. - number: The total number of entities in a namespace. - timestamp: A datetime.datetime object. - """ - entities_to_write = [] - namespace_stat = stats.NamespaceStat(subject_namespace=namespace, - bytes=size, - count=number, - timestamp=timestamp) - entities_to_write.append(namespace_stat) - - # All application are assumed to have the default namespace. - if namespace != "": - namespace_entry = metadata.Namespace(key_name=namespace) - entities_to_write.append(namespace_entry) - - db.put(entities_to_write) - logger.debug("Done creating namespace stats") - - def create_kind_stat_entry(self, kind, size, number, timestamp): - """ Puts a kind statistic into the datastore. - - Args: - kind: The entity kind. - size: An int representing the number of bytes taken by entity kind. - number: The total number of entities. - timestamp: A datetime.datetime object. - """ - kind_stat = stats.KindStat(kind_name=kind, - bytes=size, - count=number, - timestamp=timestamp) - kind_entry = metadata.Kind(key_name=kind) - entities_to_write = [kind_stat, kind_entry] - db.put(entities_to_write) - logger.debug("Done creating kind stat") - - def create_global_stat_entry(self, app_id, size, number, timestamp): - """ Puts a global statistic into the datastore. - - Args: - app_id: The application identifier. - size: The number of bytes of all entities. - number: The total number of entities of an application. - timestamp: A datetime.datetime object. - """ - global_stat = stats.GlobalStat(key_name=app_id, - bytes=size, - count=number, - timestamp=timestamp) - db.put(global_stat) - logger.debug("Done creating global stat") - - def remove_old_tasks_entities(self): - """ Queries for old tasks and removes the entity which tells - use whether a named task was enqueued. - - Returns: - True on success. - """ - # If we have state information beyond what function to use, - # load the last seen cursor. - if (len(self.groomer_state) > 1 and - self.groomer_state[0] == self.CLEAN_TASKS_TASK): - last_cursor = Cursor(self.groomer_state[1]) - else: - last_cursor = None - self.register_db_accessor(constants.DASHBOARD_APP_ID) - timeout = datetime.datetime.utcnow() - \ - datetime.timedelta(seconds=self.TASK_NAME_TIMEOUT) - - counter = 0 - logger.debug("The current time is {0}".format(datetime.datetime.utcnow())) - logger.debug("The timeout time is {0}".format(timeout)) - while True: - query = TaskName.all() - if last_cursor: - query.with_cursor(last_cursor) - query.filter("timestamp <", timeout) - entities = query.fetch(self.BATCH_SIZE) - if len(entities) == 0: - break - last_cursor = query.cursor() - for entity in entities: - logger.debug("Removing task name {0}".format(entity.timestamp)) - entity.delete() - counter += 1 - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Removed {} task entities.'.format(counter)) - self.last_logged = self.LOG_PROGRESS_FREQUENCY - self.update_groomer_state([self.CLEAN_TASKS_TASK, last_cursor]) - - logger.info("Removed {0} task name entities".format(counter)) - return True - - def clean_up_entities(self): - # If we have state information beyond what function to use, - # load the last seen key. - if (len(self.groomer_state) > 1 and - self.groomer_state[0] == self.CLEAN_ENTITIES_TASK): - last_key = self.groomer_state[1] - else: - last_key = "" - while True: - try: - logger.debug('Fetching {} entities'.format(self.BATCH_SIZE)) - entities = self.get_entity_batch(last_key) - - if not entities: - break - - for entity in entities: - self.process_entity(entity) - - last_key = entities[-1].keys()[0] - self.entities_checked += len(entities) - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Checked {} entities'.format(self.entities_checked)) - self.last_logged = time.time() - self.update_groomer_state([self.CLEAN_ENTITIES_TASK, last_key]) - except datastore_errors.Error, error: - logger.error("Error getting a batch: {0}".format(error)) - time.sleep(self.DB_ERROR_PERIOD) - except dbconstants.AppScaleDBConnectionError, connection_error: - logger.error("Error getting a batch: {0}".format(connection_error)) - time.sleep(self.DB_ERROR_PERIOD) - - def register_db_accessor(self, app_id): - """ Gets a distributed datastore object to interact with - the datastore for a certain application. - - Args: - app_id: The application ID. - Returns: - A distributed_datastore.DatastoreDistributed object. - """ - ds_distributed = datastore_distributed.DatastoreDistributed( - app_id, self.datastore_path) - apiproxy_stub_map.apiproxy.RegisterStub('datastore_v3', ds_distributed) - apiproxy_stub_map.apiproxy.RegisterStub('memcache', - memcache_distributed.MemcacheService(app_id)) - os.environ['APPLICATION_ID'] = app_id - os.environ['APPNAME'] = app_id - os.environ['AUTH_DOMAIN'] = "appscale.com" - return ds_distributed - - def remove_old_logs(self, log_timeout): - """ Removes old logs. - - Args: - log_timeout: The timeout value in seconds. - - Returns: - True on success, False otherwise. - """ - # If we have state information beyond what function to use, - # load the last seen cursor. - if (len(self.groomer_state) > 1 and - self.groomer_state[0] == self.CLEAN_LOGS_TASK): - last_cursor = Cursor(self.groomer_state[1]) - else: - last_cursor = None - - self.register_db_accessor(constants.DASHBOARD_APP_ID) - if log_timeout: - timeout = (datetime.datetime.utcnow() - - datetime.timedelta(seconds=log_timeout)) - query = RequestLogLine.query(RequestLogLine.timestamp < timeout) - logger.debug("The timeout time is {0}".format(timeout)) - else: - query = RequestLogLine.query() - counter = 0 - logger.debug("The current time is {0}".format(datetime.datetime.utcnow())) - - while True: - entities, next_cursor, more = query.fetch_page(self.BATCH_SIZE, - start_cursor=last_cursor) - for entity in entities: - logger.debug("Removing {0}".format(entity)) - entity.key.delete() - counter += 1 - if time.time() > self.last_logged + self.LOG_PROGRESS_FREQUENCY: - logger.info('Removed {} log entries.'.format(counter)) - self.last_logged = time.time() - if more: - last_cursor = next_cursor - self.update_groomer_state([self.CLEAN_LOGS_TASK, - last_cursor.urlsafe()]) - else: - break - logger.info("Removed {0} log entries.".format(counter)) - return True - - def remove_old_statistics(self): - """ Does a range query on the current batch of statistics and - deletes them. - """ - #TODO only remove statistics older than 30 days. - for app_id in self.stats.keys(): - self.register_db_accessor(app_id) - query = stats.KindStat.all() - entities = query.run() - logger.debug("Result from kind stat query: {0}".format(str(entities))) - for entity in entities: - logger.debug("Removing kind {0}".format(entity)) - entity.delete() - - query = stats.GlobalStat.all() - entities = query.run() - logger.debug("Result from global stat query: {0}".format(str(entities))) - for entity in entities: - logger.debug("Removing global {0}".format(entity)) - entity.delete() - logger.debug("Done removing old stats for app {0}".format(app_id)) - - def update_namespaces(self, timestamp): - """ Puts the namespace information into the datastore for applications to - access. - - Args: - timestamp: A datetime time stamp to know which stat items belong - together. - """ - for app_id in self.namespace_info.keys(): - ds_distributed = self.register_db_accessor(app_id) - namespaces = self.namespace_info[app_id].keys() - for namespace in namespaces: - size = self.namespace_info[app_id][namespace]['size'] - number = self.namespace_info[app_id][namespace]['number'] - try: - self.create_namespace_entry(namespace, size, number, timestamp) - except (datastore_errors.BadRequestError, - datastore_errors.InternalError) as error: - logger.error('Unable to insert namespace info: {}'.format(error)) - - logger.info("Namespace for {0} are {1}"\ - .format(app_id, self.namespace_info[app_id])) - del ds_distributed - - def update_statistics(self, timestamp): - """ Puts the statistics into the datastore for applications - to access. - - Args: - timestamp: A datetime time stamp to know which stat items belong - together. - """ - for app_id in self.stats.keys(): - ds_distributed = self.register_db_accessor(app_id) - total_size = 0 - total_number = 0 - kinds = self.stats[app_id].keys() - for kind in kinds: - size = self.stats[app_id][kind]['size'] - number = self.stats[app_id][kind]['number'] - total_size += size - total_number += number - try: - self.create_kind_stat_entry(kind, size, number, timestamp) - except (datastore_errors.BadRequestError, - datastore_errors.InternalError) as error: - logger.error('Unable to insert kind stat: {}'.format(error)) - - try: - self.create_global_stat_entry(app_id, total_size, total_number, - timestamp) - except (datastore_errors.BadRequestError, - datastore_errors.InternalError) as error: - logger.error('Unable to insert global stat: {}'.format(error)) - - logger.info("Kind stats for {0} are {1}"\ - .format(app_id, self.stats[app_id])) - logger.info("Global stats for {0} are total size of {1} with " \ - "{2} entities".format(app_id, total_size, total_number)) - logger.info("Number of hard deletes: {0}".format(self.num_deletes)) - del ds_distributed - - def update_groomer_state(self, state): - """ Updates the groomer's internal state and persists the state to - ZooKeeper. - - Args: - state: A list of strings representing the ID of the task to resume along - with any additional data about the task. - """ - zk_data = self.GROOMER_STATE_DELIMITER.join(state) - - # We don't want to crash the groomer if we can't update the state. - try: - self.zoo_keeper.update_node(self.GROOMER_STATE_PATH, zk_data) - except zk.ZKInternalException as zkie: - logger.exception(zkie) - self.groomer_state = state - - def run_groomer(self): - """ Runs the grooming process. Loops on the entire dataset sequentially - and updates stats, indexes, and transactions. - """ - self.db_access = appscale_datastore_batch.DatastoreFactory.getDatastore( - self.table_name) - transaction_manager = TransactionManager(self.zoo_keeper.handle) - self.ds_access = DatastoreDistributed( - self.db_access, transaction_manager, zookeeper=self.zoo_keeper) - index_manager = IndexManager(self.zoo_keeper.handle, self.ds_access) - self.ds_access.index_manager = index_manager - - logger.info("Groomer started") - start = time.time() - - self.reset_statistics() - - clean_indexes = [ - { - 'id': self.CLEAN_ASC_INDICES_TASK, - 'description': 'clean up ascending indices', - 'function': self.clean_up_indexes, - 'args': [datastore_pb.Query_Order.ASCENDING] - }, - { - 'id': self.CLEAN_DSC_INDICES_TASK, - 'description': 'clean up descending indices', - 'function': self.clean_up_indexes, - 'args': [datastore_pb.Query_Order.DESCENDING] - }, - { - 'id': self.CLEAN_KIND_INDICES_TASK, - 'description': 'clean up kind indices', - 'function': self.clean_up_kind_indices, - 'args': [] - } - ] - - populate_scatter_prop = [ - {'id': self.POPULATE_SCATTER, - 'description': 'populate indexes with scatter property', - 'function': self.populate_scatter_prop, - 'args': []} - ] - - tasks = [ - { - 'id': self.CLEAN_ENTITIES_TASK, - 'description': 'clean up entities', - 'function': self.clean_up_entities, - 'args': [] - }, - { - 'id': self.CLEAN_LOGS_TASK, - 'description': 'clean up old logs', - 'function': self.remove_old_logs, - 'args': [self.LOG_STORAGE_TIMEOUT] - }, - { - 'id': self.CLEAN_TASKS_TASK, - 'description': 'clean up old tasks', - 'function': self.remove_old_tasks_entities, - 'args': [] - } - ] - - index_state = self.db_access.get_metadata_sync( - cassandra_interface.INDEX_STATE_KEY) - if index_state != cassandra_interface.IndexStates.CLEAN: - tasks.extend(clean_indexes) - - scatter_prop_state = self.db_access.get_metadata( - cassandra_interface.SCATTER_PROP_KEY) - if scatter_prop_state != cassandra_interface.ScatterPropStates.POPULATED: - tasks.extend(populate_scatter_prop) - - groomer_state = self.zoo_keeper.get_node(self.GROOMER_STATE_PATH) - logger.info('groomer_state: {}'.format(groomer_state)) - if groomer_state: - self.update_groomer_state( - groomer_state[0].split(self.GROOMER_STATE_DELIMITER)) - - for task_number in range(len(tasks)): - task = tasks[task_number] - if (len(self.groomer_state) > 0 and self.groomer_state[0] != '' and - self.groomer_state[0] != task['id']): - continue - logger.info('Starting to {}'.format(task['description'])) - try: - task['function'](*task['args']) - if task_number != len(tasks) - 1: - next_task = tasks[task_number + 1] - self.update_groomer_state([next_task['id']]) - except Exception as exception: - logger.error('Exception encountered while trying to {}:'. - format(task['description'])) - logger.exception(exception) - - self.update_groomer_state([]) - - timestamp = datetime.datetime.utcnow().replace(microsecond=0) - - self.update_statistics(timestamp) - self.update_namespaces(timestamp) - - del self.db_access - del self.ds_access - - time_taken = time.time() - start - logger.info("Groomer cleaned {0} journal entries".format( - self.journal_entries_cleaned)) - logger.info("Groomer checked {0} index entries".format( - self.index_entries_checked)) - logger.info("Groomer cleaned {0} index entries".format( - self.index_entries_cleaned)) - logger.info('Groomer populated {} scatter property index entries'.format( - self.scatter_prop_vals_populated)) - if self.index_entries_delete_failures > 0: - logger.info("Groomer failed to remove {0} index entries".format( - self.index_entries_delete_failures)) - logger.info("Groomer took {0} seconds".format(str(time_taken))) - - -def main(): - """ This main function allows you to run the groomer manually. """ - zk_connection_locations = appscale_info.get_zk_locations_string() - retry_policy = KazooRetry(max_tries=5) - zk_client = KazooClient( - zk_connection_locations, connection_retry=ZK_PERSISTENT_RECONNECTS, - command_retry=retry_policy) - zk_client.start() - zookeeper = zk.ZKTransaction(zk_client) - db_info = appscale_info.get_db_info() - table = db_info[':table'] - - datastore_path = ':'.join([appscale_info.get_db_proxy(), - str(constants.DB_SERVER_PORT)]) - ds_groomer = DatastoreGroomer(zookeeper, table, datastore_path) - - logger.debug("Trying to get groomer lock.") - if ds_groomer.get_groomer_lock(): - logger.info("Got the groomer lock.") - try: - ds_groomer.run_groomer() - except Exception as exception: - logger.exception('Encountered exception {} while running the groomer.' - .format(str(exception))) - try: - ds_groomer.zoo_keeper.release_lock_with_path(zk.DS_GROOM_LOCK_PATH) - except zk.ZKTransactionException, zk_exception: - logger.error("Unable to release zk lock {0}.".\ - format(str(zk_exception))) - except zk.ZKInternalException, zk_exception: - logger.error("Unable to release zk lock {0}.".\ - format(str(zk_exception))) - finally: - zk_client.stop() - zk_client.close() - else: - logger.info("Did not get the groomer lock.") diff --git a/AppDB/appscale/datastore/index_manager.py b/AppDB/appscale/datastore/index_manager.py deleted file mode 100644 index 4ec38b26a5..0000000000 --- a/AppDB/appscale/datastore/index_manager.py +++ /dev/null @@ -1,268 +0,0 @@ -""" Keeps track of configured datastore indexes. """ -import json -import logging -import time -from kazoo.client import NoNodeError -from kazoo.protocol.states import KazooState - -from tornado import gen -from tornado.ioloop import IOLoop -from tornado.locks import Event as AsyncEvent - -from appscale.common.async_retrying import retry_children_watch_coroutine -from appscale.common.datastore_index import DatastoreIndex -from appscale.datastore.zkappscale.tornado_kazoo import AsyncKazooLock - -logger = logging.getLogger('appscale-admin') - - -class IndexInaccessible(Exception): - """ Indicates that an index is not currently accessible. """ - pass - - -class ProjectIndexManager(object): - """ Keeps track of composite index definitions for a project. """ - - def __init__(self, project_id, zk_client, index_manager, datastore_access): - """ Creates a new ProjectIndexManager. - - Args: - project_id: A string specifying a project ID. - zk_client: A KazooClient. - update_callback: A function that should be called with the project ID - and index list every time the indexes get updated. - index_manager: An IndexManager used for checking lock status. - datastore_access: A DatastoreDistributed object. - """ - self.project_id = project_id - self.indexes_node = '/appscale/projects/{}/indexes'.format(self.project_id) - self.active = True - self.update_event = AsyncEvent() - - self._creation_times = {} - self._index_manager = index_manager - self._zk_client = zk_client - self._ds_access = datastore_access - - self._zk_client.DataWatch(self.indexes_node, self._update_indexes_watch) - - # Since this manager can be used synchronously, ensure that the indexes - # are populated for this IOLoop iteration. - try: - encoded_indexes = self._zk_client.get(self.indexes_node)[0] - except NoNodeError: - encoded_indexes = '[]' - - self.indexes = [DatastoreIndex.from_dict(self.project_id, index) - for index in json.loads(encoded_indexes)] - - @property - def indexes_pb(self): - if self._zk_client.state != KazooState.CONNECTED: - raise IndexInaccessible('ZooKeeper connection is not active') - - return [index.to_pb() for index in self.indexes] - - @gen.coroutine - def apply_definitions(self): - """ Populate composite indexes that are not marked as ready yet. """ - try: - yield self.update_event.wait() - self.update_event.clear() - if not self._index_manager.admin_lock.is_acquired or not self.active: - return - - logger.info( - 'Applying composite index definitions for {}'.format(self.project_id)) - - for index in self.indexes: - if index.ready: - continue - - # Wait until all clients have either timed out or received the new index - # definition. This prevents entities from being added without entries - # while the index is being rebuilt. - creation_time = self._creation_times.get(index.id, time.time()) - consensus = creation_time + (self._zk_client._session_timeout / 1000.0) - yield gen.sleep(max(consensus - time.time(), 0)) - - yield self._ds_access.update_composite_index( - self.project_id, index.to_pb()) - logger.info('Index {} is now ready'.format(index.id)) - self._mark_index_ready(index.id) - - logging.info( - 'All composite indexes for {} are ready'.format(self.project_id)) - finally: - IOLoop.current().spawn_callback(self.apply_definitions) - - def delete_index_definition(self, index_id): - """ Remove a definition from a project's list of configured indexes. - - Args: - index_id: An integer specifying an index ID. - """ - try: - encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) - except NoNodeError: - # If there are no index definitions, there is nothing to do. - return - - node_version = znode_stat.version - indexes = [DatastoreIndex.from_dict(self.project_id, index) - for index in json.loads(encoded_indexes)] - - encoded_indexes = json.dumps([index.to_dict() for index in indexes - if index.id != index_id]) - self._zk_client.set(self.indexes_node, encoded_indexes, - version=node_version) - - def _mark_index_ready(self, index_id): - """ Updates the index metadata to reflect the new state of the index. - - Args: - index_id: An integer specifying an index ID. - """ - try: - encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) - node_version = znode_stat.version - except NoNodeError: - # If for some reason the index no longer exists, there's nothing to do. - return - - existing_indexes = [DatastoreIndex.from_dict(self.project_id, index) - for index in json.loads(encoded_indexes)] - for existing_index in existing_indexes: - if existing_index.id == index_id: - existing_index.ready = True - - indexes_dict = [index.to_dict() for index in existing_indexes] - self._zk_client.set(self.indexes_node, json.dumps(indexes_dict), - version=node_version) - - @gen.coroutine - def _update_indexes(self, encoded_indexes): - """ Handles changes to the list of a project's indexes. - - Args: - encoded_indexes: A string containing index node data. - """ - encoded_indexes = encoded_indexes or '[]' - self.indexes = [DatastoreIndex.from_dict(self.project_id, index) - for index in json.loads(encoded_indexes)] - - # Mark when indexes are defined so they can be backfilled later. - self._creation_times.update( - {index.id: time.time() for index in self.indexes - if not index.ready and index.id not in self._creation_times}) - - self.update_event.set() - - def _update_indexes_watch(self, encoded_indexes, znode_stat): - """ Handles updates to the project's indexes node. - - Args: - encoded_indexes: A string containing index node data. - znode_stat: A kazoo.protocol.states.ZnodeStat object. - """ - if not self.active: - return False - - IOLoop.current().add_callback(self._update_indexes, encoded_indexes) - - -class IndexManager(object): - """ Keeps track of configured datastore indexes. """ - # The node which keeps track of admin lock contenders. - ADMIN_LOCK_NODE = '/appscale/datastore/index_manager_lock' - - def __init__(self, zk_client, datastore_access, perform_admin=False): - """ Creates a new IndexManager. - - Args: - zk_client: A kazoo.client.KazooClient object. - datastore_access: A DatastoreDistributed object. - perform_admin: A boolean specifying whether or not to perform admin - operations. - """ - self.projects = {} - self._wake_event = AsyncEvent() - self._zk_client = zk_client - self.admin_lock = AsyncKazooLock(self._zk_client, self.ADMIN_LOCK_NODE) - - # TODO: Refactor so that this dependency is not needed. - self._ds_access = datastore_access - - self._zk_client.ensure_path('/appscale/projects') - self._zk_client.ChildrenWatch('/appscale/projects', self._update_projects) - - # Since this manager can be used synchronously, ensure that the projects - # are populated for this IOLoop iteration. - project_ids = self._zk_client.get_children('/appscale/projects') - self._update_projects_sync(project_ids) - - if perform_admin: - IOLoop.current().spawn_callback(self._contend_for_admin_lock) - - def _update_projects_sync(self, new_project_ids): - """ Updates the list of the deployment's projects. - - Args: - new_project_ids: A list of strings specifying current project IDs. - """ - for project_id in new_project_ids: - if project_id not in self.projects: - self.projects[project_id] = ProjectIndexManager( - project_id, self._zk_client, self, self._ds_access) - if self.admin_lock.is_acquired: - IOLoop.current().spawn_callback( - self.projects[project_id].apply_definitions) - - for project_id in self.projects.keys(): - if project_id not in new_project_ids: - self.projects[project_id].active = False - del self.projects[project_id] - - def _update_projects(self, project_ids): - """ Watches for changes to list of existing projects. - - Args: - project_ids: A list of strings specifying current project IDs. - """ - persistent_update_projects = retry_children_watch_coroutine( - '/appscale/projects', self._update_projects_sync) - IOLoop.instance().add_callback(persistent_update_projects, project_ids) - - def _handle_connection_change(self, state): - """ Notifies the admin lock holder when the connection changes. - - Args: - state: The new connection state. - """ - IOLoop.current().add_callback(self._wake_event.set) - - @gen.coroutine - def _contend_for_admin_lock(self): - """ - Waits to acquire an admin lock that gives permission to apply index - definitions. The lock is useful for preventing many servers from writing - the same index entries at the same time. After acquiring the lock, the - individual ProjectIndexManagers are responsible for mutating state whenever - a project's index definitions change. - """ - while True: - # Set up a callback to get notified if the ZK connection changes. - self._wake_event.clear() - self._zk_client.add_listener(self._handle_connection_change) - - yield self.admin_lock.acquire() - try: - for project_index_manager in self.projects.values(): - IOLoop.current().spawn_callback( - project_index_manager.apply_definitions) - - # Release the lock if the kazoo client gets disconnected. - yield self._wake_event.wait() - finally: - self.admin_lock.release() diff --git a/AppDB/appscale/datastore/range_iterator.py b/AppDB/appscale/datastore/range_iterator.py deleted file mode 100644 index cc52e743a1..0000000000 --- a/AppDB/appscale/datastore/range_iterator.py +++ /dev/null @@ -1,242 +0,0 @@ -""" Iterates through a range of index entries. """ - -import sys -from collections import namedtuple - -from tornado import gen - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore.dbconstants import ( - ASC_PROPERTY_TABLE, BadRequest, KEY_DELIMITER, PROPERTY_SCHEMA, - TERMINATING_STRING) -from appscale.datastore.utils import decode_path, encode_index_pb - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore.datastore_pb import Path, Query_Filter - - -IndexEntry = namedtuple('IndexEntry', - ['encoded_path', 'entity_reference', 'key', 'path']) - - -class Cursor(object): - """ Represents a position within a range. """ - __slots__ = ['key', 'inclusive'] - - def __init__(self, key, inclusive): - """ Creates a new Cursor. - - Args: - key: A string specifying an encoded entity key. - inclusive: A boolean indicating that the next value can include the key. - """ - self.key = key - self.inclusive = inclusive - - -class RangeExhausted(Exception): - """ Indicates that there are no more entries in the range. """ - pass - - -class RangeIterator(object): - """ Iterates through a range of index entries. - - This was designed for merge join queries. The range can only be narrowed. - """ - CHUNK_SIZE = 1000 - - def __init__(self, db, project_id, namespace, kind, prop_name, value): - """ Creates a new RangeIterator. - - Args: - db: A database interface object. - project_id: A string specifying a project ID. - namespace: A string specifying a namespace. - kind: A string specifying an entity kind. - prop_name: A string specifying a property name. - value: An entity_pb.PropertyValue. - """ - self.project_id = project_id - self.namespace = namespace - self.kind = kind - self.prop_name = prop_name - - self._db = db - self._value = value - - self._range = (self.prefix, ''.join([self.prefix, TERMINATING_STRING])) - self._cursor = Cursor(self.prefix, inclusive=True) - - self._cache = [] - self._index_exhausted = False - - @property - def prefix(self): - """ The encoded reference without the path element. """ - return KEY_DELIMITER.join( - [self.project_id, self.namespace, self.kind, self.prop_name, - str(encode_index_pb(self._value))]) + KEY_DELIMITER - - @gen.coroutine - def async_next(self): - """ Retrieves the next index entry in the range. - - Returns: - An IndexEntry. - Raises: - RangeExhausted when there are no more entries in the range. - """ - try: - # First check if the request can be fulfilled with the cache. - entry = self._next_from_cache() - self._cursor = Cursor(entry.key, inclusive=False) - raise gen.Return(entry) - except ValueError: - # If the cache and index have been exhausted, there are no more entries. - if self._index_exhausted: - raise RangeExhausted() - - self._cache = yield self._db.range_query( - ASC_PROPERTY_TABLE, PROPERTY_SCHEMA, self._cursor.key, self._range[-1], - self.CHUNK_SIZE, start_inclusive=self._cursor.inclusive) - - if len(self._cache) < self.CHUNK_SIZE: - self._index_exhausted = True - - if not self._cache: - raise RangeExhausted() - - entry = self.entry_from_result(self._cache[0]) - self._cursor = Cursor(entry.key, inclusive=False) - raise gen.Return(entry) - - @classmethod - def from_filter(cls, db, project_id, namespace, kind, pb_filter): - """ Creates a new RangeIterator from a filter. - - Args: - db: A database interface object. - project_id: A string specifying a project ID. - namespace: A string specifying a namespace. - kind: A string specifying an entity kind. - pb_filter: A datastore_pb.Query_Filter object. - Raises: - BadRequest if the filter cannot be used to create the range. - """ - # Make sure this filter can be used for a merge join. - if pb_filter.op() != Query_Filter.EQUAL: - raise BadRequest('Invalid filter for merge join ' - '(op must be equal): {}'.format(pb_filter)) - - if pb_filter.property_size() != 1: - raise BadRequest('Invalid filter for merge join ' - '(multiple properties): {}'.format(pb_filter)) - - property_ = pb_filter.property(0) - if property_.name() == '__key__': - raise BadRequest('Invalid property for merge join ' - '(must not be __key__): {}'.format(property_)) - - return cls(db, project_id, namespace, kind, property_.name(), - property_.value()) - - @staticmethod - def entry_from_result(result): - """ Creates an IndexEntry from a Cassandra result. - - Args: - result: A dictionary mapping a Cassandra key to a reference value. - Returns: - An IndexEntry. - """ - entry_key = result.keys()[0] - encoded_path = entry_key.rsplit(KEY_DELIMITER)[-1] - path = decode_path(encoded_path) - entity_ref = result.values()[0]['reference'] - return IndexEntry(encoded_path, entity_ref, entry_key, path) - - def get_cursor(self): - """ Fetches the range's current cursor position. - - Returns: - An entity_pb.Path object. - """ - # If the current cursor does not have a path, return an empty one. - if self._cursor.key == self.prefix: - return Path() - - encoded_path = self._cursor.key.rsplit(KEY_DELIMITER)[-1] - return decode_path(encoded_path) - - def set_cursor(self, path, inclusive): - """ Changes the range's cursor position. - - Args: - path: An entity_pb.Path object. - inclusive: A boolean specifying that the next result can include the - given path. - Raises: - BadRequest if unable to set the cursor to the given path. - """ - range_start, range_end = self._range - cursor = Cursor(self.prefix + str(encode_index_pb(path)), inclusive) - - if cursor.key < self._cursor.key: - raise BadRequest( - 'Cursor cannot be moved backwards ' - '({} < {})'.format(repr(cursor.key), repr(self._cursor.key))) - - if cursor.key < range_start or cursor.key > range_end: - raise BadRequest('Cursor outside range: {}'.format(self._range)) - - self._cursor = cursor - - def restrict_to_path(self, path): - """ Narrows the range to a specific entity path. - - Args: - path: An entity_pb.Path object. - """ - start_key = self.prefix + str(encode_index_pb(path)) - end_key = ''.join([start_key, TERMINATING_STRING]) - if start_key < self._range[0] or end_key > self._range[-1]: - raise BadRequest('Restriction must be within range') - - if self._cursor.key > end_key: - raise BadRequest('Cursor already exceeds new range') - - self._range = (start_key, end_key) - self._cursor.key = max(start_key, self._cursor.key) - - def _next_from_cache(self): - """ Retrieves the next index entry from the cache. - - Returns: - An IndexEntry. - Raises: - ValueError if the cache does not contain a suitable entry. - """ - lo = 0 - hi = len(self._cache) - # Bisect the cache to find the smallest key that is >= the cursor. - while lo < hi: - mid = (lo + hi) // 2 - if self._cache[mid].keys()[0] < self._cursor.key: - lo = mid + 1 - else: - hi = mid - - try: - entry = self.entry_from_result(self._cache[lo]) - except IndexError: - raise ValueError - - # If cursor is not inclusive, exclude matching entries. - if entry.key == self._cursor.key and not self._cursor.inclusive: - try: - entry = self.entry_from_result(self._cache[lo + 1]) - except IndexError: - raise ValueError - - return entry diff --git a/AppDB/appscale/datastore/scripts/data_layout.py b/AppDB/appscale/datastore/scripts/data_layout.py deleted file mode 100644 index 4a1cf3f380..0000000000 --- a/AppDB/appscale/datastore/scripts/data_layout.py +++ /dev/null @@ -1,23 +0,0 @@ -import argparse -import sys - -from ..appscale_datastore_batch import DatastoreFactory - -# The exit code that indicates the data layout version is unexpected. -INVALID_VERSION_EXIT_CODE = 64 - - -def main(): - parser = argparse.ArgumentParser( - description='Checks if the data layout is valid') - parser.add_argument('--db-type', help='The database type') - args = parser.parse_args() - - datastore_batch = DatastoreFactory.getDatastore(args.db_type) - try: - is_valid = datastore_batch.valid_data_version_sync() - finally: - datastore_batch.close() - - if not is_valid: - sys.exit(INVALID_VERSION_EXIT_CODE) diff --git a/AppDB/appscale/datastore/scripts/datastore.py b/AppDB/appscale/datastore/scripts/datastore.py index 1efd02ae94..f46f23688b 100644 --- a/AppDB/appscale/datastore/scripts/datastore.py +++ b/AppDB/appscale/datastore/scripts/datastore.py @@ -26,6 +26,7 @@ from appscale.common.datastore_index import DatastoreIndex from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER from appscale.datastore.fdb.codecs import Path +from appscale.datastore.fdb.fdb_datastore import FDBDatastore from kazoo.client import KazooState from kazoo.exceptions import NodeExistsError, NoNodeError from tornado import gen @@ -33,14 +34,9 @@ from tornado.ioloop import IOLoop from tornado.options import options from .. import dbconstants -from ..appscale_datastore_batch import DatastoreFactory -from ..datastore_distributed import DatastoreDistributed -from ..index_manager import IndexManager from ..utils import (clean_app_id, logger, UnprocessedQueryResult) -from ..zkappscale import zktransaction -from ..zkappscale.transaction_manager import TransactionManager sys.path.append(APPSCALE_PYTHON_APPSERVER) from google.appengine.api import api_base_pb @@ -51,7 +47,7 @@ from google.appengine.ext.remote_api import remote_api_pb from google.net.proto.ProtocolBuffer import ProtocolBufferDecodeError -# Global for accessing the datastore. An instance of DatastoreDistributed. +# Global for accessing the datastore. datastore_access = None # A record of active datastore servers. @@ -333,8 +329,7 @@ def begin_transaction_request(self, app_id, http_request_data): raise gen.Return(('', datastore_pb.Error.INTERNAL_ERROR, str(error))) except dbconstants.BadRequest as error: raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except (zktransaction.ZKInternalException, - dbconstants.AppScaleDBConnectionError) as error: + except dbconstants.AppScaleDBConnectionError as error: logger.exception('Unable to begin transaction') raise gen.Return( ('', datastore_pb.Error.INTERNAL_ERROR, @@ -433,16 +428,11 @@ def run_query(self, http_request_data): clone_qr_pb = UnprocessedQueryResult() try: yield datastore_access._dynamic_run_query(query, clone_qr_pb) + except dbconstants.InternalError as error: + raise gen.Return(('', datastore_pb.Error.INTERNAL_ERROR, str(error))) except dbconstants.BadRequest as error: raise gen.Return( ('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKBadRequest as error: - logger.exception( - 'Illegal arguments in transaction during {}'.format(query)) - raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKInternalException as error: - logger.exception('ZKInternalException during {}'.format(query)) - raise gen.Return(('', datastore_pb.Error.INTERNAL_ERROR, str(error))) - except zktransaction.ZKTransactionException as error: + except dbconstants.ConcurrentModificationException as error: logger.exception('Concurrent transaction during {}'.format(query)) raise gen.Return( ('', datastore_pb.Error.CONCURRENT_TRANSACTION, str(error))) @@ -694,14 +684,14 @@ def put_request(self, app_id, http_request_data): try: yield datastore_access.dynamic_put(app_id, putreq_pb, putresp_pb) raise gen.Return((putresp_pb.Encode(), 0, '')) - except (dbconstants.InternalError, zktransaction.ZKInternalException, + except (dbconstants.InternalError, dbconstants.AppScaleDBConnectionError) as error: raise gen.Return(('', datastore_pb.Error.INTERNAL_ERROR, str(error))) except dbconstants.Timeout as error: raise gen.Return(('', datastore_pb.Error.TIMEOUT, str(error))) - except (dbconstants.BadRequest, zktransaction.ZKBadRequest) as error: + except dbconstants.BadRequest as error: raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKTransactionException as error: + except dbconstants.ConcurrentModificationException as error: logger.exception('Concurrent transaction during {}'.format(putreq_pb)) raise gen.Return( ('', datastore_pb.Error.CONCURRENT_TRANSACTION, str(error))) @@ -721,13 +711,13 @@ def get_request(self, app_id, http_request_data): getresp_pb = datastore_pb.GetResponse() try: yield datastore_access.dynamic_get(app_id, getreq_pb, getresp_pb) - except zktransaction.ZKBadRequest as error: + except dbconstants.BadRequest as error: logger.exception('Illegal argument during {}'.format(getreq_pb)) raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKInternalException as error: - logger.exception('ZKInternalException during {}'.format(getreq_pb)) + except dbconstants.InternalError as error: + logger.exception('InternalError during {}'.format(getreq_pb)) raise gen.Return(('', datastore_pb.Error.INTERNAL_ERROR, str(error))) - except zktransaction.ZKTransactionException as error: + except dbconstants.ConcurrentModificationException as error: logger.exception('Concurrent transaction during {}'.format(getreq_pb)) raise gen.Return( ('', datastore_pb.Error.CONCURRENT_TRANSACTION, str(error))) @@ -770,15 +760,7 @@ def delete_request(self, app_id, http_request_data): raise gen.Return(('', datastore_pb.Error.TIMEOUT, str(error))) except dbconstants.BadRequest as error: raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKBadRequest as error: - logger.exception('Illegal argument during {}'.format(delreq_pb)) - raise gen.Return(('', datastore_pb.Error.BAD_REQUEST, str(error))) - except zktransaction.ZKInternalException: - logger.exception('ZKInternalException during {}'.format(delreq_pb)) - raise gen.Return( - ('', datastore_pb.Error.INTERNAL_ERROR, - 'Internal error with ZooKeeper connection.')) - except zktransaction.ZKTransactionException: + except dbconstants.ConcurrentModificationException: logger.exception('Concurrent transaction during {}'.format(delreq_pb)) raise gen.Return( ('', datastore_pb.Error.CONCURRENT_TRANSACTION, @@ -934,34 +916,21 @@ def main(): command_retry=retry_policy) zk_client.start() - if args.type == 'cassandra': - datastore_batch = DatastoreFactory.getDatastore( - args.type, log_level=logger.getEffectiveLevel()) - zookeeper = zktransaction.ZKTransaction( - zk_client=zk_client, db_access=datastore_batch, - log_level=logger.getEffectiveLevel()) - transaction_manager = TransactionManager(zk_client) - datastore_access = DatastoreDistributed( - datastore_batch, transaction_manager, zookeeper=zookeeper, - log_level=logger.getEffectiveLevel(), - taskqueue_locations=taskqueue_locations) - else: - from appscale.datastore.fdb.fdb_datastore import FDBDatastore - clusterfile_path = args.fdb_clusterfile - if not clusterfile_path: - try: - clusterfile_content = zk_client.get(FDB_CLUSTERFILE_NODE)[0] - clusterfile_path = '/run/appscale/appscale-datastore-fdb.cluster' - with open(clusterfile_path, 'w') as clusterfile: - clusterfile.write(clusterfile_content) - except NoNodeError: - logger.warning( - 'Neither --fdb-clusterfile was specified nor {} ZK node exists,' - 'FDB client will try to find clusterfile in one of default locations' - .format(FDB_CLUSTERFILE_NODE) - ) - datastore_access = FDBDatastore() - datastore_access.start(clusterfile_path) + clusterfile_path = args.fdb_clusterfile + if not clusterfile_path: + try: + clusterfile_content = zk_client.get(FDB_CLUSTERFILE_NODE)[0] + clusterfile_path = '/run/appscale/appscale-datastore-fdb.cluster' + with open(clusterfile_path, 'w') as clusterfile: + clusterfile.write(clusterfile_content) + except NoNodeError: + logger.warning( + 'Neither --fdb-clusterfile was specified nor {} ZK node exists,' + 'FDB client will try to find clusterfile in one of default locations' + .format(FDB_CLUSTERFILE_NODE) + ) + datastore_access = FDBDatastore() + datastore_access.start(clusterfile_path) zk_client.add_listener(zk_state_listener) zk_client.ensure_path(DATASTORE_SERVERS_NODE) @@ -970,11 +939,6 @@ def main(): zk_state_listener(zk_client.state) zk_client.ChildrenWatch(DATASTORE_SERVERS_NODE, update_servers_watch) - if args.type == 'cassandra': - index_manager = IndexManager(zk_client, datastore_access, - perform_admin=True) - datastore_access.index_manager = index_manager - server = tornado.httpserver.HTTPServer(pb_application) server.listen(args.port) diff --git a/AppDB/appscale/datastore/scripts/delete_records.py b/AppDB/appscale/datastore/scripts/delete_records.py deleted file mode 100644 index 51aaeeb0ba..0000000000 --- a/AppDB/appscale/datastore/scripts/delete_records.py +++ /dev/null @@ -1,44 +0,0 @@ -""" Deletes all application data. """ - -import logging -import sys - -from appscale.common.constants import LOG_FORMAT -from ..dbconstants import APP_ENTITY_SCHEMA -from ..dbconstants import APP_ENTITY_TABLE -from ..dbconstants import APP_KIND_SCHEMA -from ..dbconstants import APP_KIND_TABLE -from ..dbconstants import ASC_PROPERTY_TABLE -from ..dbconstants import COMPOSITE_SCHEMA -from ..dbconstants import COMPOSITE_TABLE -from ..dbconstants import DSC_PROPERTY_TABLE -from ..dbconstants import PROPERTY_SCHEMA -from ..utils import fetch_and_delete_entities - - -def main(): - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - database = "cassandra" - first_key = "" - last_key = "" - - if len(sys.argv) > 2: - print "usage: appscale-delete-all-records [app_id]" - exit(1) - - if len(sys.argv) == 2: - first_key = sys.argv[1] - - try: - tables_to_schemas = { - APP_ENTITY_TABLE: APP_ENTITY_SCHEMA, - ASC_PROPERTY_TABLE: PROPERTY_SCHEMA, - DSC_PROPERTY_TABLE: PROPERTY_SCHEMA, - COMPOSITE_TABLE: COMPOSITE_SCHEMA, - APP_KIND_TABLE: APP_KIND_SCHEMA, - } - for table, schema in tables_to_schemas.items(): - fetch_and_delete_entities(database, table, schema, first_key, False) - except: - raise diff --git a/AppDB/appscale/datastore/scripts/groomer_service.py b/AppDB/appscale/datastore/scripts/groomer_service.py deleted file mode 100644 index ef22b66a7d..0000000000 --- a/AppDB/appscale/datastore/scripts/groomer_service.py +++ /dev/null @@ -1,35 +0,0 @@ -""" Provides a service which periodically runs the groomer. """ -import logging - -from kazoo.client import KazooClient, KazooRetry - -from appscale.common import appscale_info -from appscale.common.constants import DB_SERVER_PORT -from appscale.common.constants import LOG_FORMAT -from appscale.common.constants import ZK_PERSISTENT_RECONNECTS -from .. import groomer -from ..zkappscale import zktransaction as zk - - -def main(): - logging.basicConfig(level=logging.INFO, format=LOG_FORMAT) - logger = logging.getLogger(__name__) - zookeeper_locations = appscale_info.get_zk_locations_string() - retry_policy = KazooRetry(max_tries=5) - zk_client = KazooClient( - zookeeper_locations, connection_retry=ZK_PERSISTENT_RECONNECTS, - command_retry=retry_policy) - zk_client.start() - gc_zookeeper = zk.ZKTransaction(zk_client) - logger.info("Using ZK locations {0}".format(zookeeper_locations)) - - datastore_location = ':'.join([appscale_info.get_db_proxy(), - str(DB_SERVER_PORT)]) - ds_groomer = groomer.DatastoreGroomer(gc_zookeeper, "cassandra", - datastore_location) - try: - ds_groomer.start() - except Exception, exception: - logger.warning("An exception slipped through:") - logger.exception(exception) - logger.warning("Exiting service.") diff --git a/AppDB/appscale/datastore/scripts/prime_cassandra.py b/AppDB/appscale/datastore/scripts/prime_cassandra.py deleted file mode 100644 index d5f9094437..0000000000 --- a/AppDB/appscale/datastore/scripts/prime_cassandra.py +++ /dev/null @@ -1,24 +0,0 @@ -""" Create Cassandra keyspace and initial tables. """ - -import argparse -import logging - -from appscale.common.constants import LOG_FORMAT -from ..cassandra_env import schema - - -def main(): - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - parser = argparse.ArgumentParser() - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument('--replication', type=int, - help='The replication factor for the keyspace') - group.add_argument('--check', action='store_true', - help='Check if the required tables are present') - args = parser.parse_args() - - if args.check: - assert schema.primed() - else: - schema.prime_cassandra(args.replication) diff --git a/AppDB/appscale/datastore/scripts/transaction_groomer.py b/AppDB/appscale/datastore/scripts/transaction_groomer.py deleted file mode 100644 index ed06ad5a02..0000000000 --- a/AppDB/appscale/datastore/scripts/transaction_groomer.py +++ /dev/null @@ -1,498 +0,0 @@ -""" A daemon that cleans up expired transactions. """ -import argparse -import datetime -import json -import logging -import sys -import time -import uuid - -from concurrent.futures import ThreadPoolExecutor -from kazoo.client import KazooClient -from kazoo.client import KazooState -from kazoo.exceptions import NoNodeError -from kazoo.exceptions import NotEmptyError -from kazoo.exceptions import ZookeeperError -from kazoo.retry import KazooRetry -from tornado import gen -from tornado.ioloop import IOLoop -from tornado.locks import Event as AsyncEvent -from tornado.queues import Queue as AsyncQueue - -from appscale.common import appscale_info -from appscale.common.constants import LOG_FORMAT -from appscale.common.constants import ZK_PERSISTENT_RECONNECTS -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from ..cassandra_env.cassandra_interface import DatastoreProxy -from ..cassandra_env.large_batch import BatchResolver -from ..dbconstants import MAX_TX_DURATION -from ..index_manager import IndexManager -from ..zkappscale.constants import CONTAINER_PREFIX -from ..zkappscale.constants import COUNTER_NODE_PREFIX -from ..zkappscale.constants import MAX_SEQUENCE_COUNTER -from ..zkappscale.constants import OFFSET_NODE -from ..zkappscale.tornado_kazoo import TornadoKazoo - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore.entity_pb import CompositeIndex - -# The maximum number of transactions per project to clean up at the same time. -MAX_CONCURRENCY = 10 - -logger = logging.getLogger(__name__) - - -class GroomingCoordinator(object): - """ Distributes grooming work between registered groomers. """ - def __init__(self, zk_client): - self.index = 0 - self.total_workers = 1 - - self._groomer_id = uuid.uuid4().hex - self._zk_client = zk_client - self._node = None - self._registration_path = '/appscale/datastore/tx_groomer' - - self._zk_client.ensure_path(self._registration_path) - - # Make sure the ephemeral registration node is recreated upon reconnect. - self._zk_client.add_listener(self._state_listener) - self._register_groomer() - - # Make sure the assignment is updated whenever a new groomer registers. - self._zk_client.ChildrenWatch(self._registration_path, - self._update_assignment_watch) - - def _update_assignment(self, workers): - """ Updates the portion of transactions this groomer needs to clean up. - - Args: - workers: A list of strings specifying registered groomers. - """ - workers.sort(key=lambda name: name.rsplit('-')[1]) - - self.total_workers = len(workers) - try: - self.index = workers.index(self._node) - except ValueError: - self._register_groomer() - workers = self._zk_client.retry(self._zk_client.get_children, - self._registration_path) - return self._update_assignment(workers) - - logger.info('Currently acting as worker {}/{}'.format(self.index + 1, - self.total_workers)) - - def _update_assignment_watch(self, children): - """ Watches for new or lost groomers. - - Args: - children: A list of strings specifying registered groomers. - """ - IOLoop.instance().add_callback(self._update_assignment, children) - - def _clean_created_nodes(self): - """ Removes any registrations this service may have created. """ - all_nodes = self._zk_client.retry(self._zk_client.get_children, - self._registration_path) - to_delete = [node for node in all_nodes - if node.startswith(self._groomer_id)] - for node in to_delete: - full_path = '/'.join([self._registration_path, node]) - while True: - try: - self._zk_client.delete(full_path) - break - except NoNodeError: - break - except ZookeeperError: - continue - - def _register_groomer(self): - """ Creates a ZooKeeper entry that broadcasts this service's presence. """ - logger.info('Registering service with ZooKeeper') - node_prefix = '/'.join([self._registration_path, self._groomer_id]) + '-' - - # Make sure an older node from this groomer did not remain. - self._clean_created_nodes() - - # The groomer must be registered before it can continue working. - while True: - try: - full_path = self._zk_client.create(node_prefix, ephemeral=True, - sequence=True) - self._node = full_path[len(self._registration_path) + 1:] - break - except ZookeeperError: - self._clean_created_nodes() - continue - - def _state_listener(self, state): - """ Watches for changes to the ZooKeeper connection state. """ - if state == KazooState.CONNECTED: - IOLoop.instance().add_callback(self._register_groomer) - - -class ProjectGroomer(object): - """ Cleans up expired transactions for a project. """ - def __init__(self, project_id, coordinator, zk_client, db_access, - thread_pool, index_manager): - """ Creates a new ProjectGroomer. - - Args: - project_id: A string specifying a project ID. - coordinator: A GroomingCoordinator. - zk_client: A KazooClient. - db_access: A DatastoreProxy. - thread_pool: A ThreadPoolExecutor. - index_manager: An IndexManager object. - """ - self.project_id = project_id - - self._coordinator = coordinator - self._zk_client = zk_client - self._tornado_zk = TornadoKazoo(self._zk_client) - self._db_access = db_access - self._thread_pool = thread_pool - self._index_manager = index_manager - self._project_node = '/appscale/apps/{}'.format(self.project_id) - self._containers = [] - self._inactive_containers = set() - self._batch_resolver = BatchResolver(self.project_id, self._db_access) - - self._zk_client.ensure_path(self._project_node) - self._zk_client.ChildrenWatch(self._project_node, self._update_containers) - - self._txid_manual_offset = 0 - self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) - self._zk_client.DataWatch(self._offset_node, self._update_offset) - - self._stop_event = AsyncEvent() - self._stopped_event = AsyncEvent() - - # Keeps track of cleanup results for each round of grooming. - self._txids_cleaned = 0 - self._oldest_valid_tx_time = None - - self._worker_queue = AsyncQueue(maxsize=MAX_CONCURRENCY) - for _ in range(MAX_CONCURRENCY): - IOLoop.current().spawn_callback(self._worker) - - IOLoop.current().spawn_callback(self.start) - - @gen.coroutine - def start(self): - """ Starts the grooming process until the stop event is set. """ - logger.info('Grooming {}'.format(self.project_id)) - while True: - if self._stop_event.is_set(): - break - - try: - yield self._groom_project() - except Exception: - # Prevent the grooming loop from stopping if an error is encountered. - logger.exception( - 'Unexpected error while grooming {}'.format(self.project_id)) - yield gen.sleep(MAX_TX_DURATION) - - self._stopped_event.set() - - @gen.coroutine - def stop(self): - """ Stops the grooming process. """ - logger.info('Stopping grooming process for {}'.format(self.project_id)) - self._stop_event.set() - yield self._stopped_event.wait() - - @gen.coroutine - def _worker(self): - """ Processes items in the worker queue. """ - while True: - tx_path, composite_indexes = yield self._worker_queue.get() - try: - tx_time = yield self._resolve_txid(tx_path, composite_indexes) - if tx_time is None: - self._txids_cleaned += 1 - - if tx_time is not None and tx_time < self._oldest_valid_tx_time: - self._oldest_valid_tx_time = tx_time - except Exception: - logger.exception('Unexpected error while resolving {}'.format(tx_path)) - finally: - self._worker_queue.task_done() - - def _update_offset(self, new_offset, _): - """ Watches for updates to the manual offset node. - - Args: - new_offset: A string specifying the new manual offset. - """ - self._txid_manual_offset = int(new_offset or 0) - - def _update_containers(self, nodes): - """ Updates the list of active txid containers. - - Args: - nodes: A list of strings specifying ZooKeeper nodes. - """ - counters = [int(node[len(CONTAINER_PREFIX):] or 1) - for node in nodes if node.startswith(CONTAINER_PREFIX) - and node not in self._inactive_containers] - counters.sort() - - containers = [CONTAINER_PREFIX + str(counter) for counter in counters] - if containers and containers[0] == '{}1'.format(CONTAINER_PREFIX): - containers[0] = CONTAINER_PREFIX - - self._containers = containers - - @gen.coroutine - def _groom_project(self): - """ Runs the grooming process. """ - index = self._coordinator.index - worker_count = self._coordinator.total_workers - - oldest_valid_tx_time = yield self._fetch_and_clean(index, worker_count) - - # Wait until there's a reasonable chance that some transactions have - # timed out. - next_timeout_eta = oldest_valid_tx_time + MAX_TX_DURATION - - # The oldest ignored transaction should still be valid, but ensure that - # the timeout is not negative. - next_timeout = max(0, next_timeout_eta - time.time()) - time_to_wait = datetime.timedelta( - seconds=next_timeout + (MAX_TX_DURATION / 2)) - - # Allow the wait to be cut short when a project is removed. - try: - yield self._stop_event.wait(timeout=time_to_wait) - except gen.TimeoutError: - return - - @gen.coroutine - def _remove_locks(self, txid, tx_path): - """ Removes entity locks involved with the transaction. - - Args: - txid: An integer specifying the transaction ID. - tx_path: A string specifying the location of the transaction node. - """ - groups_path = '/'.join([tx_path, 'groups']) - try: - groups_data = yield self._tornado_zk.get(groups_path) - except NoNodeError: - # If the group list does not exist, the locks have not been acquired. - return - - group_paths = json.loads(groups_data[0]) - for group_path in group_paths: - try: - contenders = yield self._tornado_zk.get_children(group_path) - except NoNodeError: - # The lock may have been cleaned up or not acquired in the first place. - continue - - for contender in contenders: - contender_path = '/'.join([group_path, contender]) - contender_data = yield self._tornado_zk.get(contender_path) - contender_txid = int(contender_data[0]) - if contender_txid != txid: - continue - - yield self._tornado_zk.delete(contender_path) - break - - @gen.coroutine - def _remove_path(self, tx_path): - """ Removes a ZooKeeper node. - - Args: - tx_path: A string specifying the path to delete. - """ - try: - yield self._tornado_zk.delete(tx_path) - except NoNodeError: - pass - except NotEmptyError: - yield self._thread_pool.submit(self._zk_client.delete, tx_path, - recursive=True) - - @gen.coroutine - def _resolve_txid(self, tx_path, composite_indexes): - """ Cleans up a transaction if it has expired. - - Args: - tx_path: A string specifying the location of the ZooKeeper node. - composite_indexes: A list of CompositeIndex objects. - Returns: - The transaction start time if still valid, None if invalid because this - method will also delete it. - """ - try: - tx_data = yield self._tornado_zk.get(tx_path) - except NoNodeError: - return - - tx_time = float(tx_data[0]) - - _, container, tx_node = tx_path.rsplit('/', 2) - tx_node_id = int(tx_node.lstrip(COUNTER_NODE_PREFIX)) - container_count = int(container[len(CONTAINER_PREFIX):] or 1) - if tx_node_id < 0: - yield self._remove_path(tx_path) - return - - container_size = MAX_SEQUENCE_COUNTER + 1 - automatic_offset = (container_count - 1) * container_size - txid = self._txid_manual_offset + automatic_offset + tx_node_id - - if txid < 1: - yield self._remove_path(tx_path) - return - - # If the transaction is still valid, return the time it was created. - if tx_time + MAX_TX_DURATION >= time.time(): - raise gen.Return(tx_time) - - yield self._batch_resolver.resolve(txid, composite_indexes) - yield self._remove_locks(txid, tx_path) - yield self._remove_path(tx_path) - yield self._batch_resolver.cleanup(txid) - - @gen.coroutine - def _fetch_and_clean(self, worker_index, worker_count): - """ Cleans up expired transactions. - - Args: - worker_index: An integer specifying this worker's index. - worker_count: An integer specifying the number of total workers. - Returns: - A float specifying the time of the oldest valid transaction as a unix - timestamp. - """ - self._txids_cleaned = 0 - self._oldest_valid_tx_time = time.time() - - children = [] - for index, container in enumerate(self._containers): - container_path = '/'.join([self._project_node, container]) - new_children = yield self._tornado_zk.get_children(container_path) - - if not new_children and index < len(self._containers) - 1: - self._inactive_containers.add(container) - - children.extend(['/'.join([container_path, node]) - for node in new_children]) - - logger.debug( - 'Found {} transaction IDs for {}'.format(len(children), self.project_id)) - - if not children: - raise gen.Return(self._oldest_valid_tx_time) - - # Refresh these each time so that the indexes are fresh. - project_index_manager = self._index_manager.projects[self.project_id] - composite_indexes = project_index_manager.indexes_pb - - for tx_path in children: - tx_node_id = int(tx_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) - # Only resolve transactions that this worker has been assigned. - if tx_node_id % worker_count != worker_index: - continue - - yield self._worker_queue.put((tx_path, composite_indexes)) - - yield self._worker_queue.join() - - if self._txids_cleaned > 0: - logger.info('Cleaned up {} expired txids for {}'.format( - self._txids_cleaned, self.project_id)) - - raise gen.Return(self._oldest_valid_tx_time) - - -class TransactionGroomer(object): - """ Cleans up expired transactions. """ - def __init__(self, zk_client, db_access, thread_pool, index_manager): - """ Creates a new TransactionGroomer. - - Args: - zk_client: A KazooClient. - db_access: A DatastoreProxy. - thread_pool: A ThreadPoolExecutor. - index_manager: An IndexManager. - """ - self.projects = {} - - self._zk_client = zk_client - self._db_access = db_access - self._thread_pool = thread_pool - self._index_manager = index_manager - - self._coordinator = GroomingCoordinator(self._zk_client) - - self._zk_client.ensure_path('/appscale/projects') - self.projects_watch = zk_client.ChildrenWatch( - '/appscale/projects', self._update_projects) - - @gen.coroutine - def _update_projects(self, new_projects): - """ Handles project additions and deletions. - - Args: - new_projects: A list of string specifying project IDs. - """ - # The DatastoreProxy expects bare strings for project IDs. - new_projects = [str(project) for project in new_projects] - to_remove = [project for project in self.projects - if project not in new_projects] - for old_project in to_remove: - yield self.projects[old_project].stop() - del self.projects[old_project] - - for new_project in new_projects: - if new_project not in self.projects: - self.projects[new_project] = ProjectGroomer( - new_project, self._coordinator, self._zk_client, self._db_access, - self._thread_pool, self._index_manager) - - def _update_projects_watch(self, new_projects): - """ Handles project additions or deletions. - - Args: - new_projects: A list of strings specifying project IDs. - """ - main_io_loop = IOLoop.instance() - main_io_loop.add_callback(self._update_projects, new_projects) - - -def main(): - """ Starts the groomer. """ - logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) - - parser = argparse.ArgumentParser() - parser.add_argument('-v', '--verbose', action='store_true', - help='Output debug-level logging') - args = parser.parse_args() - - if args.verbose: - logging.getLogger('appscale').setLevel(logging.DEBUG) - - zk_hosts = appscale_info.get_zk_node_ips() - zk_client = KazooClient(hosts=','.join(zk_hosts), - connection_retry=ZK_PERSISTENT_RECONNECTS, - command_retry=KazooRetry(max_tries=-1)) - zk_client.start() - - db_access = DatastoreProxy() - - thread_pool = ThreadPoolExecutor(4) - - index_manager = IndexManager(zk_client, None) - - TransactionGroomer(zk_client, db_access, thread_pool, index_manager) - logger.info('Starting transaction groomer') - - IOLoop.current().start() diff --git a/AppDB/appscale/datastore/scripts/ua_server.py b/AppDB/appscale/datastore/scripts/ua_server.py index 45efa20577..cf09af2e7f 100644 --- a/AppDB/appscale/datastore/scripts/ua_server.py +++ b/AppDB/appscale/datastore/scripts/ua_server.py @@ -21,7 +21,6 @@ from appscale.common import appscale_info, retrying from appscale.common.constants import ( LOG_FORMAT, UA_SERVERS_NODE, ZK_PERSISTENT_RECONNECTS) -from appscale.datastore import appscale_datastore from appscale.datastore.dbconstants import ( AppScaleDBConnectionError, USERS_SCHEMA, USERS_TABLE ) @@ -30,24 +29,15 @@ # Name of the users table which stores information about AppScale users. USER_TABLE = USERS_TABLE -# The default datastore used to store user and app information. -DEFAULT_DATASTORE = "cassandra" - # The port this server binds to. DEFAULT_PORT = 4342 # The port avaialble from the outside via SSL. DEFAULT_SSL_PORT = 4343 -# The default datastore used. -datastore_type = DEFAULT_DATASTORE - # The port this application binds to. bindport = DEFAULT_PORT -# The datastore error codes. -ERROR_CODES = [] - # Global secret to validate incoming soap requests. super_secret = appscale_info.get_secret() @@ -173,7 +163,7 @@ def connect_to_postgres(zk_client): ) -class Users: +class Users(object): attributes_ = USERS_SCHEMA def __init__(self, email, password, utype): self.email_ = email @@ -273,31 +263,21 @@ def does_user_exist(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT 1 FROM {table} ' - 'WHERE email = %(username)s' - .format(table=full_table_name), - vars={ - 'username': username, - } - ) - row = pg_cursor.fetchone() - - if not row: - raise gen.Return("false") - raise gen.Return("true") + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT 1 FROM {table} ' + 'WHERE email = %(username)s' + .format(table=full_table_name), + vars={ + 'username': username, + } + ) + row = pg_cursor.fetchone() - try: - result = yield db.get_entity(USER_TABLE, username, ["email"]) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) - if result[0] in ERROR_CODES and len(result) == 2: - raise gen.Return("true") - else: + if not row: raise gen.Return("false") + raise gen.Return("true") @retry_pg_connection @@ -310,54 +290,33 @@ def get_user_data(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT {columns} FROM {table} ' - 'WHERE email = %(username)s' - .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), - vars={ - 'username': username, - } - ) - result = pg_cursor.fetchone() - - # todo: delete it adter removal of Cassandra - result = list(result) - result[2] = time.mktime(result[2].timetuple()) - result[3] = time.mktime(result[3].timetuple()) - result[4] = time.mktime(result[4].timetuple()) - - if not result: - raise gen.Return('Error: User {} does not exist'.format(username)) - if len(USERS_SCHEMA) != len(result): - raise gen.Return( - "Error: Bad length of user schema vs user result " - "user schema: " + str(USERS_SCHEMA) + " result: " + str(result) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT {columns} FROM {table} ' + 'WHERE email = %(username)s' + .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), + vars={ + 'username': username, + } ) + result = pg_cursor.fetchone() - user = Users("a", "b", "c") - user.unpackit(result) - raise gen.Return(user.stringit()) - + # todo: delete it adter removal of Cassandra + result = list(result) + result[2] = time.mktime(result[2].timetuple()) + result[3] = time.mktime(result[3].timetuple()) + result[4] = time.mktime(result[4].timetuple()) - try: - result = yield db.get_entity(USER_TABLE, username, user_schema) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) - - if result[0] in ERROR_CODES or len(result) == 1: - result = result[1:] - else: - raise gen.Return("Error: " + result[0]) - if len(user_schema) != len(result): + if not result: + raise gen.Return('Error: User {} does not exist'.format(username)) + if len(USERS_SCHEMA) != len(result): raise gen.Return( "Error: Bad length of user schema vs user result " - "user schem:" + str(user_schema) + " result: " + str(result) + "user schema: " + str(USERS_SCHEMA) + " result: " + str(result) ) - user = Users("a","b", "c") + user = Users("a", "b", "c") user.unpackit(result) raise gen.Return(user.stringit()) @@ -383,35 +342,27 @@ def commit_new_user(user, passwd, utype, secret): if ret == "true": raise gen.Return(error) - if pg_connection_wrapper: - n_user = Users(user, passwd, utype) - params = n_user.paramit() - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'INSERT INTO {table} ({columns}) ' - 'VALUES ( ' - ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' - ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' - ' %(appdrop_rem_token_exp)s, %(visit_cnt)s, %(cookie)s, ' - ' %(cookie_ip)s, %(cookie_exp)s, %(cksum)s, %(enabled)s, %(type)s, ' - ' %(is_cloud_admin)s, %(capabilities)s ' - ') ' - 'RETURNING date_last_login' - .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), - vars=params - ) - row = pg_cursor.fetchone() - if row: - raise gen.Return("true") - raise gen.Return("false") - n_user = Users(user, passwd, utype) - array = n_user.arrayit() - result = yield db.put_entity(USER_TABLE, user, user_schema, array) - if result[0] not in ERROR_CODES: - raise gen.Return("false") - raise gen.Return("true") + params = n_user.paramit() + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'INSERT INTO {table} ({columns}) ' + 'VALUES ( ' + ' %(email)s, %(pw)s, %(date_creation)s, %(date_change)s, ' + ' %(date_last_login)s, %(applications)s, %(appdrop_rem_token)s, ' + ' %(appdrop_rem_token_exp)s, %(visit_cnt)s, %(cookie)s, ' + ' %(cookie_ip)s, %(cookie_exp)s, %(cksum)s, %(enabled)s, %(type)s, ' + ' %(is_cloud_admin)s, %(capabilities)s ' + ') ' + 'RETURNING date_last_login' + .format(table=full_table_name, columns=', '.join(USERS_SCHEMA)), + vars=params + ) + row = pg_cursor.fetchone() + if row: + raise gen.Return("true") + raise gen.Return("false") @retry_pg_connection @@ -433,45 +384,22 @@ def add_admin_for_app(user, app, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET applications = applications || %(app)s, ' - ' date_change = current_timestamp ' - 'WHERE email = %(user)s ' - 'RETURNING date_change' - .format(table=full_table_name), - vars={'app': '{' + app + '}', 'user': user} - ) - user_result = pg_cursor.fetchone() - - if user_result: - raise gen.Return("true") - raise gen.Return('Error: User {} does not exist'.format(user)) - - try: - user_result = yield db.get_entity(USER_TABLE, user, user_schema) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) - - if user_result[0] not in ERROR_CODES or len(user_result) <= 1: - raise gen.Return(user_result) - - user_result = user_result[1:] - n_user = Users("a", "b", "c") - n_user.unpackit(user_result) - n_user.applications_.append(app) - t = datetime.datetime.now() - n_user.date_change_ = str(time.mktime(t.timetuple())) - array = n_user.arrayit() + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET applications = applications || %(app)s, ' + ' date_change = current_timestamp ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=full_table_name), + vars={'app': '{' + app + '}', 'user': user} + ) + user_result = pg_cursor.fetchone() - result = yield db.put_entity(USER_TABLE, user, user_schema, array) - if result[0] in ERROR_CODES: + if user_result: raise gen.Return("true") - else: - raise gen.Return("Error: Unable to update the user.") + raise gen.Return('Error: User {} does not exist'.format(user)) @retry_pg_connection @@ -485,42 +413,21 @@ def get_all_users(secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT email FROM {table}' - .format(table=full_table_name) - ) - emails = pg_cursor.fetchall() - - if not emails: - raise gen.Return("Error: no users in database") - - # this is a placeholder, soap exception happens if returning empty string - userstring = "____" - for email in emails: - userstring += ":" + email[0] - raise gen.Return(userstring) - + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT email FROM {table}' + .format(table=full_table_name) + ) + emails = pg_cursor.fetchall() - result = yield db.get_table(USER_TABLE, user_schema) - if result[0] not in ERROR_CODES: - raise gen.Return("Error:" + result[0]) - result = result[1:] - for ii in range(0, (len(result)/len(user_schema))): - partial = result[(ii * len(user_schema)): ((1 + ii) * len(user_schema))] - if len(partial) != len(user_schema): - pass - else: - a = Users("x", "x", "user") - a.unpackit(partial) - users.append(a) + if not emails: + raise gen.Return("Error: no users in database") # this is a placeholder, soap exception happens if returning empty string userstring = "____" - for kk in users: - userstring += ":" + kk.email_ + for email in emails: + userstring += ":" + email[0] raise gen.Return(userstring) @@ -535,51 +442,28 @@ def commit_new_token(user, token, token_exp, secret): raise gen.Return("Error: bad secret") columns = ['appdrop_rem_token', 'appdrop_rem_token_exp'] - if pg_connection_wrapper: - params = {'token': token, - 'token_exp': token_exp, - 'user': user} - - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET appdrop_rem_token = %(token)s, ' - ' appdrop_rem_token_exp = %(token_exp)s, ' - ' date_change = current_timestamp ' - 'WHERE email = %(user)s ' - 'RETURNING email' - .format(table=full_table_name), - vars=params - ) - - result = pg_cursor.fetchone() - - if result: - raise gen.Return("true") - raise gen.Return('Error: User {} does not exist'.format(user)) - - try: - result = yield db.get_entity(USER_TABLE, user, columns) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) - - if result[0] not in ERROR_CODES or len(result) == 1: - raise gen.Return("Error: User does not exist") - - result = result[1:] - #appdrop_rem_token = result[0] - #appdrop_rem_token_exp = result[1] - t = datetime.datetime.now() - date_change = str(time.mktime(t.timetuple())) + params = {'token': token, + 'token_exp': token_exp, + 'user': user} + + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET appdrop_rem_token = %(token)s, ' + ' appdrop_rem_token_exp = %(token_exp)s, ' + ' date_change = current_timestamp ' + 'WHERE email = %(user)s ' + 'RETURNING email' + .format(table=full_table_name), + vars=params + ) - values = [token, token_exp, date_change] - columns += ['date_change'] + result = pg_cursor.fetchone() - result = yield db.put_entity(USER_TABLE, user, columns, values) - if result[0] not in ERROR_CODES: - raise gen.Return("false") - raise gen.Return("true") + if result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(user)) @retry_pg_connection @@ -596,43 +480,26 @@ def change_password(user, password, secret): if not password: raise gen.Return("Error: Null password") - if pg_connection_wrapper: - # If user not exist in database - write corresponding message - exist = does_user_exist(user, secret) - if exist != "true": - raise gen.Return('Error: User {} does not exist'.format(user)) - - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - - pg_cursor.execute( - 'UPDATE {table} ' - 'SET pw = %(password)s ' - 'WHERE email = %(user)s AND enabled = TRUE ' - 'RETURNING enabled' - .format(table=full_table_name), - vars={'password': password, 'user': user} - ) - row = pg_cursor.fetchone() - - if not row: - raise gen.Return("Error: User must be enabled to change password") - raise gen.Return("true") + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) - try: - result = yield db.get_entity(USER_TABLE, user, ['enabled']) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: - if result[0] not in ERROR_CODES or len(result) == 1: - raise gen.Return("Error: user does not exist") + pg_cursor.execute( + 'UPDATE {table} ' + 'SET pw = %(password)s ' + 'WHERE email = %(user)s AND enabled = TRUE ' + 'RETURNING enabled' + .format(table=full_table_name), + vars={'password': password, 'user': user} + ) + row = pg_cursor.fetchone() - if result[1] == "false": + if not row: raise gen.Return("Error: User must be enabled to change password") - - result = yield db.put_entity(USER_TABLE, user, ['pw'], [password]) - if result[0] not in ERROR_CODES: - raise gen.Return("Error:" + result[0]) raise gen.Return("true") @@ -646,40 +513,25 @@ def enable_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - # If user not exist in database - write corresponding message - exist = does_user_exist(user, secret) - if exist != "true": - raise gen.Return('Error: User {} does not exist'.format(user)) - - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET enabled = TRUE ' - 'WHERE email = %(user)s AND enabled = FALSE ' - 'RETURNING enabled' - .format(table=full_table_name), - vars={'user': user} - ) - row = pg_cursor.fetchone() - - if not row: - raise gen.Return("Error: Trying to enable a user twice") - raise gen.Return("true") + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) - try: - result = yield db.get_entity(USER_TABLE, user, ['enabled']) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET enabled = TRUE ' + 'WHERE email = %(user)s AND enabled = FALSE ' + 'RETURNING enabled' + .format(table=full_table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() - if result[0] not in ERROR_CODES or len(result) != 2: - raise gen.Return("Error: " + result[0]) - if result[1] == "true": + if not row: raise gen.Return("Error: Trying to enable a user twice") - result = yield db.put_entity(USER_TABLE, user, ['enabled'], ['true']) - if result[0] not in ERROR_CODES: - raise gen.Return("false") raise gen.Return("true") @@ -693,41 +545,25 @@ def disable_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - # If user not exist in database - write corresponding message - exist = does_user_exist(user, secret) - if exist != "true": - raise gen.Return('Error: User {} does not exist'.format(user)) - - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET enabled = FALSE ' - 'WHERE email = %(user)s AND enabled = TRUE ' - 'RETURNING enabled' - .format(table=full_table_name), - vars={'user': user} - ) - row = pg_cursor.fetchone() - - if not row: - raise gen.Return("Error: Trying to disable a user twice") - raise gen.Return("true") + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) - try: - result = yield db.get_entity(USER_TABLE, user, ['enabled']) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET enabled = FALSE ' + 'WHERE email = %(user)s AND enabled = TRUE ' + 'RETURNING enabled' + .format(table=full_table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() - if result[0] not in ERROR_CODES or len(result) != 2: - raise gen.Return("Error: " + result[0]) - if result[1] == "false": + if not row: raise gen.Return("Error: Trying to disable a user twice") - - result = yield db.put_entity(USER_TABLE, user, ['enabled'], ['false']) - if result[0] not in ERROR_CODES: - raise gen.Return("false") raise gen.Return("true") @@ -741,41 +577,24 @@ def delete_user(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - # If user not exist in database - write corresponding message - exist = does_user_exist(user, secret) - if exist != "true": - raise gen.Return('Error: User {} does not exist'.format(user)) - - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'DELETE FROM {table} ' - 'WHERE email = %(user)s AND enabled = FALSE ' - 'RETURNING enabled' - .format(table=full_table_name), - vars={'user': user} - ) - row = pg_cursor.fetchone() - - if not row: - raise gen.Return("Error: unable to delete active user. Disable user first") - raise gen.Return("true") - - try: - result = yield db.get_entity(USER_TABLE, user, ['enabled']) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + # If user not exist in database - write corresponding message + exist = does_user_exist(user, secret) + if exist != "true": + raise gen.Return('Error: User {} does not exist'.format(user)) - if result[0] not in ERROR_CODES or len(result) != 2: - raise gen.Return("false") + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'DELETE FROM {table} ' + 'WHERE email = %(user)s AND enabled = FALSE ' + 'RETURNING enabled' + .format(table=full_table_name), + vars={'user': user} + ) + row = pg_cursor.fetchone() - if result[1] == 'true': + if not row: raise gen.Return("Error: unable to delete active user. Disable user first") - - result = yield db.delete_row(USER_TABLE, user) - if result[0] not in ERROR_CODES: - raise gen.Return("false") raise gen.Return("true") @@ -789,29 +608,19 @@ def is_user_enabled(user, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT enabled FROM {table} ' - 'WHERE email = %(user)s' - .format(table=full_table_name), - vars={'user': user} - ) - result = pg_cursor.fetchone() - - if not result: - raise gen.Return("false") - raise gen.Return(str(result[0]).lower()) - - try: - result = yield db.get_entity(USER_TABLE, user, ['enabled']) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT enabled FROM {table} ' + 'WHERE email = %(user)s' + .format(table=full_table_name), + vars={'user': user} + ) + result = pg_cursor.fetchone() - if result[0] not in ERROR_CODES or len(result) == 1: + if not result: raise gen.Return("false") - raise gen.Return(result[1]) + raise gen.Return(str(result[0]).lower()) @retry_pg_connection @@ -823,30 +632,19 @@ def is_user_cloud_admin(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT is_cloud_admin FROM {table} ' - 'WHERE email = %(user)s ' - .format(table=full_table_name), - vars={'user': username} - ) - result = pg_cursor.fetchone() - - if not result: - raise gen.Return("false") - raise gen.Return(str(result[0]).lower()) - - try: - result = yield db.get_entity(USER_TABLE, username, ["is_cloud_admin"]) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT is_cloud_admin FROM {table} ' + 'WHERE email = %(user)s ' + .format(table=full_table_name), + vars={'user': username} + ) + result = pg_cursor.fetchone() - if result[0] in ERROR_CODES and len(result) == 2: - raise gen.Return(result[1]) - else: + if not result: raise gen.Return("false") + raise gen.Return(str(result[0]).lower()) @retry_pg_connection @@ -858,27 +656,21 @@ def set_cloud_admin_status(username, is_cloud_admin, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET is_cloud_admin = %(is_cloud_admin)s ' - 'WHERE email = %(user)s ' - 'RETURNING date_change' - .format(table=full_table_name), - vars={'is_cloud_admin': is_cloud_admin, 'user': username} - ) - user_result = pg_cursor.fetchone() - - if user_result: - raise gen.Return("true") - raise gen.Return('Error: User {} does not exist'.format(username)) + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET is_cloud_admin = %(is_cloud_admin)s ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=full_table_name), + vars={'is_cloud_admin': is_cloud_admin, 'user': username} + ) + user_result = pg_cursor.fetchone() - result = yield db.put_entity(USER_TABLE, username, ['is_cloud_admin'], [is_cloud_admin]) - if result[0] not in ERROR_CODES: - raise gen.Return("false:" + result[0]) - raise gen.Return("true") + if user_result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(username)) @retry_pg_connection @@ -890,31 +682,19 @@ def get_capabilities(username, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'SELECT capabilities FROM {table} ' - 'WHERE email = %(user)s ' - .format(table=full_table_name), - vars={'user': username} - ) - user_result = pg_cursor.fetchone() - - if user_result: - raise gen.Return(user_result[0]) - raise gen.Return('Error: User {} does not exist'.format(username)) - + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'SELECT capabilities FROM {table} ' + 'WHERE email = %(user)s ' + .format(table=full_table_name), + vars={'user': username} + ) + user_result = pg_cursor.fetchone() - try: - result = yield db.get_entity(USER_TABLE, username, ["capabilities"]) - except AppScaleDBConnectionError as db_error: - raise gen.Return('Error: {}'.format(db_error)) - - if result[0] in ERROR_CODES and len(result) == 2: - raise gen.Return(result[1]) - else: - raise gen.Return([result[0]]) + if user_result: + raise gen.Return(user_result[0]) + raise gen.Return('Error: User {} does not exist'.format(username)) @retry_pg_connection @@ -926,34 +706,25 @@ def set_capabilities(username, capabilities, secret): if secret != super_secret: raise gen.Return("Error: bad secret") - if pg_connection_wrapper: - with pg_connection_wrapper.get_connection() as pg_connection: - with pg_connection.cursor() as pg_cursor: - pg_cursor.execute( - 'UPDATE {table} ' - 'SET capabilities = %(capabilities)s ' - 'WHERE email = %(user)s ' - 'RETURNING date_change' - .format(table=full_table_name), - vars={'capabilities': capabilities, 'user': username} - ) - user_result = pg_cursor.fetchone() - - if user_result: - raise gen.Return("true") - raise gen.Return('Error: User {} does not exist'.format(username)) - - result = yield db.put_entity(USER_TABLE, username, ['capabilities'], [capabilities]) - if result[0] not in ERROR_CODES: - raise gen.Return("false:" + result[0]) - raise gen.Return("true") + with pg_connection_wrapper.get_connection() as pg_connection: + with pg_connection.cursor() as pg_cursor: + pg_cursor.execute( + 'UPDATE {table} ' + 'SET capabilities = %(capabilities)s ' + 'WHERE email = %(user)s ' + 'RETURNING date_change' + .format(table=full_table_name), + vars={'capabilities': capabilities, 'user': username} + ) + user_result = pg_cursor.fetchone() + if user_result: + raise gen.Return("true") + raise gen.Return('Error: User {} does not exist'.format(username)) def usage(): print "args: --apps or -a for the application location" print " --users or -u for the user location" - print " --type or -t for type of datastore" - print " type available: cassandra" print " --port or -p for server port" @@ -998,9 +769,7 @@ def main(): logger.info('Starting UAServer') global bindport - global datastore_type global db - global ERROR_CODES global user_schema for ii in range(1, len(sys.argv)): @@ -1008,7 +777,7 @@ def main(): usage() sys.exit() elif sys.argv[ii] in ('-t', "--type"): - datastore_type = sys.argv[ii + 1] + # type is ignored ii += 1 elif sys.argv[ii] in ('-p', "--port"): bindport = int(sys.argv[ii + 1] ) @@ -1018,35 +787,8 @@ def main(): register_location(appscale_info.get_private_ip(), bindport) - ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() - valid_datastores = appscale_datastore.DatastoreFactory.valid_datastores() - if datastore_type not in valid_datastores: - raise Exception('{} not in valid datastores ({})'. - format(datastore_type, valid_datastores)) - connect_to_postgres(zk_client) - if pg_connection_wrapper: - init_table(pg_connection_wrapper) - db = None - else: - db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) - - # Keep trying until it gets the schema. - timeout = 5 - while 1: - try: - user_schema = db.get_schema_sync(USER_TABLE) - except AppScaleDBConnectionError: - time.sleep(timeout) - continue - - if user_schema[0] in ERROR_CODES: - user_schema = user_schema[1:] - Users.attributes_ = user_schema - else: - time.sleep(timeout) - continue - break + init_table(pg_connection_wrapper) ip = "0.0.0.0" server = SOAPpy.SOAPServer((ip, bindport)) diff --git a/AppDB/appscale/datastore/scripts/ua_server_backup.py b/AppDB/appscale/datastore/scripts/ua_server_backup.py index 9162d3c86c..ad8f4cbcf3 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_backup.py +++ b/AppDB/appscale/datastore/scripts/ua_server_backup.py @@ -94,7 +94,7 @@ def connect_to_postgres(zk_client): logger.info('Using PostgreSQL as a backend for UA Server') else: pg_dsn = None - logger.info('Using Cassandra as a backend for UA Server') + logger.warn('PostgreSQL backend configuration not found for UA Server') if pg_dsn: pg_connection_wrapper = ( PostgresConnectionWrapper(dsn=pg_dsn[0]) @@ -151,23 +151,6 @@ def create_backup_dir(backup_dir): logger.info("Backup dir created: {0}".format(backup_dir)) -def prepare_for_backup(rows): - """ Converts date fields to timestamp and application list to str. - - Args: - rows: A tuple of all rows in postgres database. - """ - # todo: delete it after removal of Cassandra - for row in rows: - # 2 - 4 indexes of dates - row[2] = datetime.datetime.fromtimestamp(row[2]) - row[3] = datetime.datetime.fromtimestamp(row[3]) - row[4] = datetime.datetime.fromtimestamp(row[4]) - # 5 index of applications list - if row[5]: - row[5] = row[5].split(':') - - def main(): logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) @@ -186,42 +169,9 @@ def main(): zk_client.start() connect_to_postgres(zk_client) - datastore_type = 'cassandra' - - ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() - - db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) - - # Keep trying until it gets the schema. - backoff = 5 - retries = 3 - while retries >= 0: - try: - user_schema = db.get_schema_sync(USERS_TABLE) - except AppScaleDBConnectionError: - retries -= 1 - time.sleep(backoff) - continue - - if user_schema[0] in ERROR_CODES: - user_schema = user_schema[1:] - else: - retries -= 1 - time.sleep(backoff) - continue - break - - # If no response from cassandra - if retries == -1: - raise AppScaleDBConnectionError('No response from cassandra.') - schema_cols_num = len(USERS_SCHEMA) - if pg_connection_wrapper: - table = get_table_sync(db, table_name, USERS_SCHEMA) - else: - table = get_table_sync(db, USERS_TABLE, user_schema)[1:] - reshaped_table = reshape(table, schema_cols_num) + table = get_table_sync(db, table_name, USERS_SCHEMA) create_backup_dir(BACKUP_FILE_LOCATION) @@ -233,9 +183,5 @@ def main(): with open(output_file, 'w') as fout: writer = csv.DictWriter(fout, delimiter=',', fieldnames=USERS_SCHEMA) writer.writeheader() - if pg_connection_wrapper: - rows = [dict(zip(USERS_SCHEMA, row)) for row in table] - else: - prepare_for_backup(reshaped_table) - rows = [dict(zip(USERS_SCHEMA, row)) for row in reshaped_table] + rows = [dict(zip(USERS_SCHEMA, row)) for row in table] writer.writerows(rows) diff --git a/AppDB/appscale/datastore/scripts/ua_server_restore.py b/AppDB/appscale/datastore/scripts/ua_server_restore.py index 78fc266f89..68d7fbfdde 100644 --- a/AppDB/appscale/datastore/scripts/ua_server_restore.py +++ b/AppDB/appscale/datastore/scripts/ua_server_restore.py @@ -89,7 +89,7 @@ def connect_to_postgres(zk_client): logger.info('Using PostgreSQL as a backend for UA Server') else: pg_dsn = None - logger.info('Using Cassandra as a backend for UA Server') + logger.warn('PostgreSQL backend configuration not found for UA Server') if pg_dsn: pg_connection_wrapper = ( PostgresConnectionWrapper(dsn=pg_dsn[0]) @@ -150,66 +150,18 @@ def main(): zk_client.start() connect_to_postgres(zk_client) - datastore_type = 'cassandra' - - ERROR_CODES = appscale_datastore.DatastoreFactory.error_codes() - - db = appscale_datastore.DatastoreFactory.getDatastore(datastore_type) - - # Keep trying until it gets the schema. - backoff = 5 - retries = 3 - while retries >= 0: - try: - user_schema = db.get_schema_sync(USERS_TABLE) - except AppScaleDBConnectionError: - retries -= 1 - time.sleep(backoff) - continue - - if user_schema[0] in ERROR_CODES: - user_schema = user_schema[1:] - else: - retries -= 1 - time.sleep(backoff) - continue - break - - # If no response from cassandra - if retries == -1: - raise AppScaleDBConnectionError('No response from cassandra.') - input_file = args.input with open(input_file, 'r') as fin: reader = csv.DictReader(fin, delimiter=',') # Iterate through all users in file for row in reader: - if pg_connection_wrapper: - if not row['applications']: - row['applications'] = None - else: - # delete square brackets added by csv module - apps = row['applications'][1:-1] - # csv module adds extra quotes each time - apps = apps.replace("'", "") - row['applications'] = '{' + apps + '}' - put_entity_sync(db, table_name, row['email'], USERS_SCHEMA, row) + if not row['applications']: + row['applications'] = None else: - # Convert dates to timestamp - t = str(time.mktime(datetime.datetime.strptime( - row['date_creation'], '%Y-%m-%d %H:%M:%S').timetuple())) - row['date_creation'] = t - t = str(time.mktime(datetime.datetime.strptime( - row['date_change'], '%Y-%m-%d %H:%M:%S').timetuple())) - row['date_change'] = t - t = str(time.mktime(datetime.datetime.strptime( - row['date_last_login'], '%Y-%m-%d %H:%M:%S').timetuple())) - row['date_last_login'] = t - + # delete square brackets added by csv module apps = row['applications'][1:-1] - apps = apps.replace("'", "").replace(', ', ':') - row['applications'] = apps - - array = [row[key] for key in USERS_SCHEMA] - put_entity_sync(db, USERS_TABLE, array[0], user_schema, array) + # csv module adds extra quotes each time + apps = apps.replace("'", "") + row['applications'] = '{' + apps + '}' + put_entity_sync(db, table_name, row['email'], USERS_SCHEMA, row) diff --git a/AppDB/appscale/datastore/scripts/update_index.py b/AppDB/appscale/datastore/scripts/update_index.py deleted file mode 100644 index e780083acf..0000000000 --- a/AppDB/appscale/datastore/scripts/update_index.py +++ /dev/null @@ -1,101 +0,0 @@ -import argparse -import sys - -from kazoo.client import KazooClient, KazooRetry - -from appscale.common import appscale_info -from appscale.common.constants import ZK_PERSISTENT_RECONNECTS -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore.index_manager import IndexManager -from appscale.datastore.utils import tornado_synchronous -from .. import appscale_datastore_batch -from ..datastore_distributed import DatastoreDistributed -from ..zkappscale import zktransaction as zk -from ..zkappscale.transaction_manager import TransactionManager - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore import datastore_pb - - -def prettify_index(index, initial_indent=3): - """ Formats an index definition as it appears in the YAML definition. - - Args: - index: An entity_pb.Index object. - Returns: - A string describing the index. - """ - indent = ' ' * initial_indent - output = 'kind: {}\n'.format(index.entity_type()) - if index.ancestor(): - output += '{}ancestor: yes\n'.format(indent) - output += '{}properties:\n'.format(indent) - for prop in index.property_list(): - output += '{}- name: {}\n'.format(indent, prop.name()) - if prop.direction() == prop.DESCENDING: - output += '{}direction: desc\n'.format(indent + ' ') - return output - - -def main(): - """ Updates a composite index after prompting the user. """ - parser = argparse.ArgumentParser(description='Updates composite indexes') - parser.add_argument('--type', '-t', default='cassandra', - help='The datastore backend type') - parser.add_argument('--app_id', '-a', required=True, help='The project ID') - parser.add_argument('--all', action='store_true', - help='Updates all composite indexes') - args = parser.parse_args() - - datastore_batch = appscale_datastore_batch.DatastoreFactory.\ - getDatastore(args.type) - zookeeper_locations = appscale_info.get_zk_locations_string() - retry_policy = KazooRetry(max_tries=5) - zk_client = KazooClient( - zookeeper_locations, connection_retry=ZK_PERSISTENT_RECONNECTS, - command_retry=retry_policy) - zk_client.start() - zookeeper = zk.ZKTransaction(zk_client) - transaction_manager = TransactionManager(zookeeper.handle) - datastore_access = DatastoreDistributed( - datastore_batch, transaction_manager, zookeeper=zookeeper) - index_manager = IndexManager(zookeeper.handle, datastore_access) - datastore_access.index_manager = index_manager - - indices = index_manager.projects[args.app_id].indexes_pb - if len(indices) == 0: - print('No composite indices found for app {}'.format(args.app_id)) - zk_client.stop() - zk_client.close() - return - - update_composite_index_sync = tornado_synchronous( - datastore_access.update_composite_index) - - if args.all: - for index in indices: - update_composite_index_sync(args.app_id, index) - print('Successfully updated all composite indexes') - return - - selection = -1 - selection_range = range(1, len(indices) + 1) - while selection not in selection_range: - for number, index in enumerate(indices, start=1): - pretty_index = prettify_index(index.definition()) - print('{}) {}'.format(number, pretty_index)) - - try: - selection = int(raw_input('Select the index you want to update. (1-{}) ' - .format(len(indices)))) - except KeyboardInterrupt: - zk_client.stop() - zk_client.close() - sys.exit() - - selected_index = indices[selection - 1] - update_composite_index_sync(args.app_id, selected_index) - - zk_client.stop() - zk_client.close() - print('Index successfully updated') diff --git a/AppDB/appscale/datastore/scripts/upgrade_schema.py b/AppDB/appscale/datastore/scripts/upgrade_schema.py deleted file mode 100644 index b090719a08..0000000000 --- a/AppDB/appscale/datastore/scripts/upgrade_schema.py +++ /dev/null @@ -1,114 +0,0 @@ -""" Performs schema upgrades. """ - -import logging -import time - -from cassandra.cluster import Cluster - -from appscale.common import appscale_info -from ..cassandra_env.cassandra_interface import KEYSPACE -from ..cassandra_env.constants import LB_POLICY - -# The number of rows to copy at a time. -BATCH_SIZE = 100 - -# The number of seconds to wait before logging progress. -LOGGING_INTERVAL = 5 - -logger = logging.getLogger(__name__) - - -def copy_column(session, table, key_column, old_column, new_column): - """ Copies values from one column to another. - - Args: - session: A cassandra-driver session. - table: A string specifying the table. - key_column: A string specifying the partition key column. - old_column: A string specifying the column that should be copied from. - new_column: A string specifying the column that should be copied to. - """ - select = session.prepare(""" - SELECT {key}, {old_column} - FROM {table} - WHERE {key} > ? - LIMIT {batch_size} - ALLOW FILTERING - """.format(table=table, key=key_column, old_column=old_column, - batch_size=BATCH_SIZE)) - insert = session.prepare(""" - INSERT INTO {table} ({key}, {new_column}) - VALUES (?, ?) - """.format(table=table, key=key_column, new_column=new_column)) - - logger.info('Populating {}.{}'.format(table, new_column)) - start_row = '' - last_logged = time.time() - total_copied = 0 - while True: - results = session.execute(select, (start_row,)) - futures = [] - last_row = None - for result in results: - futures.append( - session.execute_async(insert, (result[0], result[1]))) - last_row = result[0] - - if last_row is None: - break - - for future in futures: - future.result() - total_copied += 1 - - if time.time() > last_logged + LOGGING_INTERVAL: - logger.info('Copied {} rows'.format(total_copied)) - - start_row = last_row - - logger.info('Copied {} rows'.format(total_copied)) - - -def main(): - """ Performs schema upgrades. """ - hosts = appscale_info.get_db_ips() - cluster = Cluster(hosts, load_balancing_policy=LB_POLICY) - session = cluster.connect(KEYSPACE) - - table = 'group_updates' - column = 'last_update' - temp_column = 'last_update_temp' - key_column = 'group' - tables = cluster.metadata.keyspaces[KEYSPACE].tables - - assert table in tables, 'The table {} was not found'.format(table) - - columns = tables[table].columns - assert column in columns or temp_column in columns,\ - '{}.{} was not found'.format(table, column) - - if (column in columns and columns[column].cql_type == 'bigint' and - temp_column not in columns): - logger.info('{}.{} is already the correct type'.format(table, column)) - return - - if column in columns and columns[column].cql_type != 'bigint': - if temp_column not in columns: - logger.info('Adding new column with correct type') - statement = 'ALTER TABLE {} ADD {} int'.format(table, temp_column) - session.execute(statement) - - copy_column(session, table, 'group', column, temp_column) - - logger.info('Dropping {}.{}'.format(table, column)) - session.execute('ALTER TABLE {} DROP {}'.format(table, column)) - - logger.info('Creating {}.{}'.format(table, column)) - session.execute('ALTER TABLE {} ADD {} bigint'.format(table, column)) - - copy_column(session, table, key_column, temp_column, column) - - logger.info('Dropping {}.{}'.format(table, temp_column)) - session.execute('ALTER TABLE {} DROP {}'.format(table, temp_column)) - - logger.info('Schema upgrade complete') diff --git a/AppDB/appscale/datastore/scripts/view_records.py b/AppDB/appscale/datastore/scripts/view_records.py deleted file mode 100644 index b96351330f..0000000000 --- a/AppDB/appscale/datastore/scripts/view_records.py +++ /dev/null @@ -1,82 +0,0 @@ -""" View all application entities. """ - -import sys - -from .. import appscale_datastore_batch -from ..dbconstants import APP_ENTITY_SCHEMA -from ..dbconstants import APP_ENTITY_TABLE -from ..dbconstants import APP_KIND_SCHEMA -from ..dbconstants import APP_KIND_TABLE -from ..dbconstants import ASC_PROPERTY_TABLE -from ..dbconstants import COMPOSITE_SCHEMA -from ..dbconstants import COMPOSITE_TABLE -from ..dbconstants import DATASTORE_METADATA_SCHEMA -from ..dbconstants import DATASTORE_METADATA_TABLE -from ..dbconstants import DSC_PROPERTY_TABLE -from ..dbconstants import PROPERTY_SCHEMA -from ..dbconstants import TERMINATING_STRING - -_MAX_ENTITIES = 1000000 - - -def get_entities(table, schema, db, first_key, last_key): - """ Gets entities from a table. - - Args: - table: Name of the table - schema: The schema of table to get from - db: The database accessor - first_key: The entity key to start from - last_key: The entity key to stop at - Returns: - The entire table up to _MAX_ENTITIES. - """ - return db.range_query_sync( - table, schema, first_key, last_key, _MAX_ENTITIES) - - -def view_all(entities, table, db): - """ View all entities for a table - - Args: - entities: Shows all entities in a list - table: The table these entities are from - db: database accessor - """ - print - print "TABLE:",table - for ii in entities: - print ii - print - - -def main(): - # Parse args. - DB_TYPE="cassandra" - first_key = "" - last_key = "" - - if len(sys.argv) > 2: - print "usage: appscale-view-all-records [app_id]" - exit(1) - - if len(sys.argv) == 2: - first_key = sys.argv[1] - last_key = first_key + TERMINATING_STRING - - # Fetch entities. - db = appscale_datastore_batch.DatastoreFactory.getDatastore(DB_TYPE) - - tables_to_schemas = { - APP_ENTITY_TABLE: APP_ENTITY_SCHEMA, - ASC_PROPERTY_TABLE: PROPERTY_SCHEMA, - DSC_PROPERTY_TABLE: PROPERTY_SCHEMA, - COMPOSITE_TABLE: COMPOSITE_SCHEMA, - APP_KIND_TABLE: APP_KIND_SCHEMA, - DATASTORE_METADATA_TABLE: DATASTORE_METADATA_SCHEMA, - } - - for table in tables_to_schemas: - entities = get_entities(table, tables_to_schemas[table], - db, first_key, last_key) - view_all(entities, table, db) diff --git a/AppDB/appscale/datastore/utils.py b/AppDB/appscale/datastore/utils.py index 07ba12eebb..64936b2bd2 100644 --- a/AppDB/appscale/datastore/utils.py +++ b/AppDB/appscale/datastore/utils.py @@ -11,7 +11,6 @@ from tornado import ioloop from appscale.datastore import dbconstants, helper_functions -from appscale.datastore.appscale_datastore_batch import DatastoreFactory from appscale.datastore.dbconstants import ( AppScaleDBConnectionError, BadRequest, ID_KEY_LENGTH, ID_SEPARATOR, KEY_DELIMITER, KIND_SEPARATOR, METADATA_TABLE, TERMINATING_STRING @@ -184,58 +183,6 @@ def PopulateQueryResult(self, count, offset, result): self._EncodeCompiledCursor(result.mutable_compiled_cursor()) -def fetch_and_delete_entities(database, table, schema, first_key, - entities_only=False): - """ Deletes all data from datastore. - - Args: - database: The datastore type (e.g. cassandra). - first_key: A str, the first key to be deleted. - Either the app ID or "" to delete all db data. - entities_only: True to delete entities from APP_ENTITY/PROPERTY tables, - False to delete every trace of the given app ID. - """ - # The amount of time to wait before retrying to fetch entities. - backoff_timeout = 30 - - # The default number of rows to fetch at a time. - batch_size = 1000 - - last_key = first_key + '\0' + TERMINATING_STRING - - logger.debug("Deleting application data in the range: {0} - {1}". - format(first_key, last_key)) - - db = DatastoreFactory.getDatastore(database) - - # Do not delete metadata, just entities. - if entities_only and table == METADATA_TABLE: - return - - # Loop through the datastore tables and delete data. - logger.info("Deleting data from {0}".format(table)) - - start_inclusive = True - while True: - try: - entities = db.range_query_sync( - table, schema, first_key, last_key, batch_size, - start_inclusive=start_inclusive) - if not entities: - logger.info("No entities found for {}".format(table)) - break - - for ii in entities: - db.batch_delete_sync(table, ii.keys()) - logger.info("Deleted {0} entities".format(len(entities))) - - first_key = entities[-1].keys()[0] - start_inclusive = False - except AppScaleDBConnectionError: - logger.exception('Error while deleting data') - time.sleep(backoff_timeout) - - def encode_index_pb(pb): """ Returns an encoded protocol buffer. diff --git a/AppDB/appscale/datastore/zkappscale/__init__.py b/AppDB/appscale/datastore/zkappscale/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/AppDB/appscale/datastore/zkappscale/constants.py b/AppDB/appscale/datastore/zkappscale/constants.py deleted file mode 100644 index d8a42e302b..0000000000 --- a/AppDB/appscale/datastore/zkappscale/constants.py +++ /dev/null @@ -1,11 +0,0 @@ -# Containers for txid sequences start with this string. -CONTAINER_PREFIX = 'txids' - -# Transaction ID sequence nodes start with this string. -COUNTER_NODE_PREFIX = 'tx' - -# ZooKeeper stores the sequence counter as a signed 32-bit integer. -MAX_SEQUENCE_COUNTER = 2 ** 31 - 1 - -# The name of the node used for manually setting a txid offset. -OFFSET_NODE = 'txid_offset' diff --git a/AppDB/appscale/datastore/zkappscale/entity_lock.py b/AppDB/appscale/datastore/zkappscale/entity_lock.py deleted file mode 100644 index 65bd689d97..0000000000 --- a/AppDB/appscale/datastore/zkappscale/entity_lock.py +++ /dev/null @@ -1,394 +0,0 @@ -import base64 -import uuid - -from kazoo.exceptions import ( - CancelledError, - KazooException, - LockTimeout, - NoNodeError, - NotEmptyError -) -from kazoo.retry import ( - ForceRetryError, - KazooRetry, - RetryFailedError -) -from tornado import gen, ioloop -from tornado.locks import Lock as TornadoLock - -# The ZooKeeper node that contains lock entries for an entity group. -LOCK_PATH_TEMPLATE = u'/appscale/apps/{project}/locks/{namespace}/{group}' - -# The number of seconds to wait for a lock before raising a timeout error. -LOCK_TIMEOUT = 10 - - -def zk_group_path(key): - """ Retrieve the ZooKeeper lock path for a given entity key. - - Args: - key: An entity Reference object. - Returns: - A string containing the location of a ZooKeeper path. - """ - project = key.app().decode('utf-8') - if key.name_space(): - namespace = key.name_space().decode('utf-8') - else: - namespace = u':default' - - first_element = key.path().element(0) - kind = first_element.type().decode('utf-8') - - # Differentiate between types of identifiers. - if first_element.has_id(): - group = u'{}:{}'.format(kind, first_element.id()) - else: - # Kazoo does not accept certain characters (eg. newlines) that Cloud - # Datastore allows. - encoded_id = base64.b64encode(first_element.name()) - group = u'{}::{}'.format(kind, encoded_id.decode('utf-8').rstrip('=')) - - return LOCK_PATH_TEMPLATE.format(project=project, namespace=namespace, - group=group) - - -class EntityLock(object): - """ A ZooKeeper-based entity lock that allows test-and-set operations. - - This is based on kazoo's lock recipe, and has been modified to lock multiple - entity groups. This lock is not re-entrant. Repeated calls after already - acquired will block. - """ - _NODE_NAME = '__lock__' - - # Tornado lock which allows tornado to switch to different coroutine - # if current one is waiting for entity group lock - _tornado_lock = TornadoLock() - - def __init__(self, client, keys, txid=None): - """ Create an entity lock. - - Args: - client: A kazoo client. - keys: A list of entity Reference objects. - txid: An integer specifying the transaction ID. - """ - self.client = client - self.paths = [zk_group_path(key) for key in keys] - - # The txid is written to the contender nodes for deadlock resolution. - self.data = str(txid or '') - - self.wake_event = client.handler.event_object() - - # Give the contender nodes a uniquely identifiable prefix in case its - # existence is in question. - self.prefix = uuid.uuid4().hex + self._NODE_NAME - - self.create_paths = [path + '/' + self.prefix for path in self.paths] - - self.create_tried = False - self.is_acquired = False - self.cancelled = False - self._retry = KazooRetry(max_tries=None, - sleep_func=client.handler.sleep_func) - self._lock = client.handler.lock_object() - - def _ensure_path(self): - """ Make sure the ZooKeeper lock paths have been created. """ - for path in self.paths: - self.client.ensure_path(path) - - def cancel(self): - """ Cancel a pending lock acquire. """ - self.cancelled = True - self.wake_event.set() - - @gen.coroutine - def acquire(self): - now = ioloop.IOLoop.current().time() - yield EntityLock._tornado_lock.acquire(now + LOCK_TIMEOUT) - try: - locked = self.unsafe_acquire() - raise gen.Return(locked) - finally: - if not self.is_acquired: - EntityLock._tornado_lock.release() - - def unsafe_acquire(self): - """ Acquire the lock. By default blocks and waits forever. - - Returns: - A boolean indicating whether or not the lock was acquired. - """ - - def _acquire_lock(): - """ Acquire a kazoo thread lock. """ - got_it = self._lock.acquire(False) - if not got_it: - raise ForceRetryError() - return True - - retry = self._retry.copy() - retry.deadline = LOCK_TIMEOUT - - # Prevent other threads from acquiring the lock at the same time. - locked = self._lock.acquire(False) - if not locked: - try: - retry(_acquire_lock) - except RetryFailedError: - return False - - already_acquired = self.is_acquired - try: - gotten = False - try: - gotten = retry(self._inner_acquire) - except RetryFailedError: - if not already_acquired: - self._best_effort_cleanup() - except KazooException: - if not already_acquired: - self._best_effort_cleanup() - self.cancelled = False - raise - if gotten: - self.is_acquired = gotten - if not gotten and not already_acquired: - self._delete_nodes(self.nodes) - return gotten - finally: - self._lock.release() - - def _watch_session(self, state): - """ A callback function for handling connection state changes. - - Args: - state: The new connection state. - """ - self.wake_event.set() - return True - - def _resolve_deadlocks(self, children_list): - """ Check if there are any concurrent cross-group locks. - - Args: - children_list: A list of current transactions for each group. - """ - current_txid = int(self.data) - for index, children in enumerate(children_list): - our_index = children.index(self.nodes[index]) - - # Skip groups where this lock already has the earliest contender. - if our_index == 0: - continue - - # Get transaction IDs for earlier contenders. - for child in children[:our_index - 1]: - try: - data, _ = self.client.get( - self.paths[index] + '/' + child) - except NoNodeError: - continue - - # If data is not set, it doesn't belong to a cross-group - # transaction. - if not data: - continue - - child_txid = int(data) - # As an arbitrary rule, require later transactions to - # resolve deadlocks. - if current_txid > child_txid: - # TODO: Implement a more graceful deadlock detection. - self.client.retry(self._delete_nodes, self.nodes) - raise ForceRetryError() - - def _inner_acquire(self): - """ Create contender node(s) and wait until the lock is acquired. """ - - # Make sure the group lock node exists. - self._ensure_path() - - nodes = [None for _ in self.paths] - if self.create_tried: - nodes = self._find_nodes() - else: - self.create_tried = True - - for index, node in enumerate(nodes): - if node is not None: - continue - - # The entity group lock root may have been deleted, so try a few times. - try_num = 0 - while True: - try: - node = self.client.create( - self.create_paths[index], self.data, sequence=True) - break - except NoNodeError: - self.client.ensure_path(self.paths[index]) - if try_num > 3: - raise ForceRetryError() - try_num += 1 - - # Strip off path to node. - node = node[len(self.paths[index]) + 1:] - nodes[index] = node - - self.nodes = nodes - - while True: - self.wake_event.clear() - - # Bail out with an exception if cancellation has been requested. - if self.cancelled: - raise CancelledError() - - children_list = self._get_sorted_children() - - predecessors = [] - for index, children in enumerate(children_list): - try: - our_index = children.index(nodes[index]) - except ValueError: - raise ForceRetryError() - - # If the lock for this group hasn't been acquired, get the predecessor. - if our_index != 0: - predecessors.append( - self.paths[index] + "/" + children[our_index - 1]) - - if not predecessors: - return True - - if len(nodes) > 1: - self._resolve_deadlocks(children_list) - - # Wait for predecessor to be removed. - # TODO: Listen for all at the same time. - for index, predecessor in enumerate(predecessors): - self.client.add_listener(self._watch_session) - try: - if self.client.exists(predecessor, self._watch_predecessor): - self.wake_event.wait(LOCK_TIMEOUT) - if not self.wake_event.isSet(): - error = 'Failed to acquire lock on {} after {} '\ - 'seconds'.format(self.paths, LOCK_TIMEOUT * (index + 1)) - raise LockTimeout(error) - finally: - self.client.remove_listener(self._watch_session) - - def _watch_predecessor(self, event): - """ A callback function for handling contender deletions. - - Args: - event: A ZooKeeper event. - """ - self.wake_event.set() - - def _get_sorted_children(self): - """ Retrieve a list of sorted contenders for each group. - - Returns: - A list of contenders for each group. - """ - children = [] - for path in self.paths: - try: - children.append(self.client.get_children(path)) - except NoNodeError: - children.append([]) - - # Ignore lock path prefix when sorting contenders. - lockname = self._NODE_NAME - for child_list in children: - child_list.sort(key=lambda c: c[c.find(lockname) + len(lockname):]) - return children - - def _find_nodes(self): - """ Retrieve a list of paths this lock has created. - - Returns: - A list of ZooKeeper paths. - """ - nodes = [] - for path in self.paths: - try: - children = self.client.get_children(path) - except NoNodeError: - children = [] - - node = None - for child in children: - if child.startswith(self.prefix): - node = child - nodes.append(node) - return nodes - - def _delete_nodes(self, nodes): - """ Remove ZooKeeper nodes. - - Args: - nodes: A list of nodes to delete. - """ - for index, node in enumerate(nodes): - if node is None: - continue - self.client.delete(self.paths[index] + "/" + node) - - def _best_effort_cleanup(self): - """ Attempt to delete nodes that this lock has created. """ - try: - nodes = self._find_nodes() - self._delete_nodes(nodes) - except KazooException: - pass - - def release(self): - """ Release the lock immediately. """ - try: - self.client.retry(self._inner_release) - - # Try to clean up the group lock path. - for path in self.paths: - try: - self.client.delete(path) - except (NotEmptyError, NoNodeError): - pass - return - finally: - if not self.is_acquired: - EntityLock._tornado_lock.release() - - def ensure_release_tornado_lock(self): - """ Ensures that tornado lock (which is global for datastore server) - is released. - It MUST BE CALLED any time when lock is acquired - even if entity group lock in zookeeper left acquired after failure. - """ - if self.is_acquired: - EntityLock._tornado_lock.release() - - def _inner_release(self): - """ Release the lock by removing created nodes. """ - if not self.is_acquired: - return False - - try: - self._delete_nodes(self.nodes) - except NoNodeError: - pass - - self.is_acquired = False - self.nodes = [None for _ in self.paths] - return True - - def __enter__(self): - self.unsafe_acquire() - - def __exit__(self, exc_type, exc_value, traceback): - self.release() diff --git a/AppDB/appscale/datastore/zkappscale/inspectable_counter.py b/AppDB/appscale/datastore/zkappscale/inspectable_counter.py deleted file mode 100644 index 7fc5bb1294..0000000000 --- a/AppDB/appscale/datastore/zkappscale/inspectable_counter.py +++ /dev/null @@ -1,98 +0,0 @@ -""" A ZooKeeper counter that returns the new value when incremented. """ - -from kazoo.exceptions import BadVersionError -from kazoo.retry import ForceRetryError - - -class InspectableCounter(object): - """ A ZooKeeper counter that returns the new value when incremented. - - This is based off the Kazoo Counter recipe. - """ - def __init__(self, client, path, default=0): - """ Create an InspectableCounter. - - Args: - client: A KazooClient object. - path: A string containing the ZooKeeper path to use for the counter. - default: An integer containing the default counter value. - """ - self.client = client - self.path = path - self.default = default - self.default_type = type(default) - self._ensured_path = False - - def _ensure_node(self): - """ Make sure the ZooKeeper path that stores the counter value exists. """ - if not self._ensured_path: - self.client.ensure_path(self.path) - self._ensured_path = True - - def _value(self): - """ Retrieve the current value and node version from ZooKeeper. - - Returns: - A tuple consisting of the current count and node version. - """ - self._ensure_node() - old, stat = self.client.get(self.path) - old = old.decode('ascii') if old != b'' else self.default - version = stat.version - data = self.default_type(old) - return data, version - - @property - def value(self): - """ Retrieve the current value from ZooKeeper. - - Returns: - An integer containing the current count. - """ - return self._value()[0] - - def _change(self, value): - """ Add a value to the counter. - - Args: - value: An integer specifying how much to add. - Returns: - An integer indicating the new count after the change. - """ - if not isinstance(value, self.default_type): - raise TypeError('Invalid type for value change') - - return self.client.retry(self._inner_change, value) - - def _inner_change(self, value): - """ Add a value to the counter. - - Args: - value: An integer specifying how much to add. - Returns: - An integer indicating the new count after the change. - """ - data, version = self._value() - new_value = data + value - new_data = repr(new_value).encode('ascii') - try: - self.client.set(self.path, new_data, version=version) - return new_value - except BadVersionError: - raise ForceRetryError() - - def __add__(self, value): - """ Add value to counter. - - Returns: - An integer indicating the new count after the change. - """ - return self._change(value) - - def __sub__(self, value): - """ Subtract value from counter. - - Returns: - An integer indicating the new count after the change. - """ - return self._change(-value) diff --git a/AppDB/appscale/datastore/zkappscale/tornado_kazoo.py b/AppDB/appscale/datastore/zkappscale/tornado_kazoo.py deleted file mode 100644 index 2e1635e7ed..0000000000 --- a/AppDB/appscale/datastore/zkappscale/tornado_kazoo.py +++ /dev/null @@ -1,546 +0,0 @@ -""" A wrapper that converts Kazoo operations to Tornado futures. """ -import datetime -import random -import six -import sys -import time -import uuid - -from kazoo.exceptions import ( - CancelledError, ConnectionClosedError, ConnectionLoss, KazooException, - LockTimeout, NoNodeError, OperationTimeoutError, SessionExpiredError) -from kazoo.retry import ( - ForceRetryError, InterruptedError as KazooInterruptedError, - RetryFailedError) -from tornado import gen -from tornado.concurrent import Future as TornadoFuture -from tornado.ioloop import IOLoop -from tornado.locks import Event as AsyncEvent, Lock as AsyncLock - - -class AsyncKazooRetry(object): - """ A retry helper based on kazoo.retry.KazooRetry and modified to work with - coroutines. """ - RETRY_EXCEPTIONS = ( - ConnectionLoss, - OperationTimeoutError, - ForceRetryError - ) - - EXPIRED_EXCEPTIONS = ( - SessionExpiredError, - ) - - def __init__(self, max_tries=1, delay=0.1, backoff=2, max_jitter=0.8, - max_delay=60, ignore_expire=True, deadline=None, - interrupt=None): - """ Creates an AsyncKazooRetry for retrying coroutines. - - Args: - max_tries: How many times to retry the command. -1 means infinite tries. - delay: Initial delay between retry attempts. - backoff: Backoff multiplier between retry attempts. Defaults to 2 for - exponential backoff. - max_jitter: Additional max jitter period to wait between retry attempts - to avoid slamming the server. - max_delay: Maximum delay in seconds, regardless of other backoff - settings. Defaults to one minute. - ignore_expire: Whether a session expiration should be ignored and treated - as a retry-able command. - interrupt: - Function that will be called with no args that may return - True if the retry should be ceased immediately. This will - be called no more than every 0.1 seconds during a wait - between retries. - - """ - self.max_tries = max_tries - self.delay = delay - self.backoff = backoff - self.max_jitter = int(max_jitter * 100) - self.max_delay = float(max_delay) - self._attempts = 0 - self._cur_delay = delay - self.deadline = deadline - self._cur_stoptime = None - self.retry_exceptions = self.RETRY_EXCEPTIONS - self.interrupt = interrupt - if ignore_expire: - self.retry_exceptions += self.EXPIRED_EXCEPTIONS - - def reset(self): - """ Resets the attempt counter. """ - self._attempts = 0 - self._cur_delay = self.delay - self._cur_stoptime = None - - def copy(self): - """ Returns a clone of this retry manager. """ - obj = AsyncKazooRetry(max_tries=self.max_tries, - delay=self.delay, - backoff=self.backoff, - max_jitter=self.max_jitter / 100.0, - max_delay=self.max_delay, - deadline=self.deadline, - interrupt=self.interrupt) - obj.retry_exceptions = self.retry_exceptions - return obj - - @gen.coroutine - def __call__(self, func, *args, **kwargs): - """ Calls a coroutine with arguments until it completes without - throwing a Kazoo exception. - - Args: - func: Coroutine to yield - args: Positional arguments to call the function with - kwargs: Keyword arguments to call the function with - - The coroutine will be called until it doesn't throw one of the - retryable exceptions (ConnectionLoss, OperationTimeout, or - ForceRetryError), and optionally retrying on session - expiration. - """ - self.reset() - - while True: - try: - if self.deadline is not None and self._cur_stoptime is None: - self._cur_stoptime = time.time() + self.deadline - response = yield func(*args, **kwargs) - raise gen.Return(response) - except ConnectionClosedError: - raise - except self.retry_exceptions: - # Note: max_tries == -1 means infinite tries. - if self._attempts == self.max_tries: - raise RetryFailedError("Too many retry attempts") - self._attempts += 1 - sleeptime = self._cur_delay + ( - random.randint(0, self.max_jitter) / 100.0) - - if self._cur_stoptime is not None and \ - time.time() + sleeptime >= self._cur_stoptime: - raise RetryFailedError("Exceeded retry deadline") - - if self.interrupt: - while sleeptime > 0: - # Break the time period down and sleep for no - # longer than 0.1 before calling the interrupt - if sleeptime < 0.1: - yield gen.sleep(sleeptime) - sleeptime -= sleeptime - else: - yield gen.sleep(0.1) - sleeptime -= 0.1 - if self.interrupt(): - raise KazooInterruptedError() - else: - yield gen.sleep(sleeptime) - self._cur_delay = min(self._cur_delay * self.backoff, - self.max_delay) - - -class AsyncKazooLock(object): - """ A lock based on kazoo.recipe.Lock and modified to work as a coroutine. - """ - - # Node name, after the contender UUID, before the sequence - # number. Involved in read/write locks. - _NODE_NAME = "__lock__" - - # Node names which exclude this contender when present at a lower - # sequence number. Involved in read/write locks. - _EXCLUDE_NAMES = ["__lock__"] - - def __init__(self, client, path, identifier=None): - """ Creates an AsyncKazooLock. - - Args: - client: A KazooClient. - path: The lock path to use. - identifier: The name to use for this lock contender. This can be useful - for querying to see who the current lock contenders are. - """ - self.client = client - self.tornado_kazoo = TornadoKazoo(client) - self.path = path - - # some data is written to the node. this can be queried via - # contenders() to see who is contending for the lock - self.data = str(identifier or "").encode('utf-8') - self.node = None - - self.wake_event = AsyncEvent() - - # props to Netflix Curator for this trick. It is possible for our - # create request to succeed on the server, but for a failure to - # prevent us from getting back the full path name. We prefix our - # lock name with a uuid and can check for its presence on retry. - self.prefix = uuid.uuid4().hex + self._NODE_NAME - self.create_path = self.path + "/" + self.prefix - - self.create_tried = False - self.is_acquired = False - self.assured_path = False - self.cancelled = False - self._retry = AsyncKazooRetry(max_tries=-1) - self._lock = AsyncLock() - - @gen.coroutine - def _ensure_path(self): - yield self.tornado_kazoo.ensure_path(self.path) - self.assured_path = True - - def cancel(self): - """ Cancels a pending lock acquire. """ - self.cancelled = True - self.wake_event.set() - - @gen.coroutine - def acquire(self, timeout=None, ephemeral=True): - """ Acquires the lock. By default, it blocks and waits forever. - - Args: - timeout: A float specifying how long to wait to acquire the lock. - ephemeral: A boolean indicating that the lock should use an ephemeral - node. - - Raises: - LockTimeout if the lock wasn't acquired within `timeout` seconds. - """ - retry = self._retry.copy() - retry.deadline = timeout - - # Ensure we are locked so that we avoid multiple coroutines in - # this acquisition routine at the same time... - timeout_interval = None - if timeout is not None: - timeout_interval = datetime.timedelta(seconds=timeout) - - try: - with (yield self._lock.acquire(timeout=timeout_interval)): - already_acquired = self.is_acquired - gotten = False - try: - gotten = yield retry(self._inner_acquire, timeout=timeout, - ephemeral=ephemeral) - except RetryFailedError: - pass - except KazooException: - # if we did ultimately fail, attempt to clean up - exc_info = sys.exc_info() - if not already_acquired: - yield self._best_effort_cleanup() - self.cancelled = False - six.reraise(exc_info[0], exc_info[1], exc_info[2]) - if gotten: - self.is_acquired = gotten - if not gotten and not already_acquired: - yield self._best_effort_cleanup() - raise gen.Return(gotten) - except gen.TimeoutError: - raise LockTimeout("Failed to acquire lock on %s after " - "%s seconds" % (self.path, timeout)) - - def _watch_session(self, state): - self.wake_event.set() - return True - - def _watch_session_listener(self, state): - IOLoop.current().add_callback(self._watch_session, state) - - @gen.coroutine - def _inner_acquire(self, timeout, ephemeral=True): - - # wait until it's our chance to get it.. - if self.is_acquired: - raise ForceRetryError() - - # make sure our election parent node exists - if not self.assured_path: - yield self._ensure_path() - - node = None - if self.create_tried: - node = yield self._find_node() - else: - self.create_tried = True - - if not node: - node = yield self.tornado_kazoo.create( - self.create_path, self.data, ephemeral=ephemeral, sequence=True) - # strip off path to node - node = node[len(self.path) + 1:] - - self.node = node - - while True: - self.wake_event.clear() - - # bail out with an exception if cancellation has been requested - if self.cancelled: - raise CancelledError() - - children = yield self._get_sorted_children() - - try: - our_index = children.index(node) - except ValueError: # pragma: nocover - # somehow we aren't in the children -- probably we are - # recovering from a session failure and our ephemeral - # node was removed - raise ForceRetryError() - - predecessor = self.predecessor(children, our_index) - if not predecessor: - raise gen.Return(True) - - # otherwise we are in the mix. watch predecessor and bide our time - predecessor = self.path + "/" + predecessor - self.client.add_listener(self._watch_session_listener) - try: - yield self.tornado_kazoo.get(predecessor, self._watch_predecessor) - except NoNodeError: - pass # predecessor has already been deleted - else: - try: - yield self.wake_event.wait(timeout) - except gen.TimeoutError: - raise LockTimeout("Failed to acquire lock on %s after " - "%s seconds" % (self.path, timeout)) - finally: - self.client.remove_listener(self._watch_session_listener) - - def predecessor(self, children, index): - for c in reversed(children[:index]): - if any(n in c for n in self._EXCLUDE_NAMES): - return c - return None - - def _watch_predecessor(self, event): - self.wake_event.set() - - @gen.coroutine - def _get_sorted_children(self): - children = yield self.tornado_kazoo.get_children(self.path) - - # Node names are prefixed by a type: strip the prefix first, which may - # be one of multiple values in case of a read-write lock, and return - # only the sequence number (as a string since it is padded and will - # sort correctly anyway). - # - # In some cases, the lock path may contain nodes with other prefixes - # (eg. in case of a lease), just sort them last ('~' sorts after all - # ASCII digits). - def _seq(c): - for name in ["__lock__", "__rlock__"]: - idx = c.find(name) - if idx != -1: - return c[idx + len(name):] - # Sort unknown node names eg. "lease_holder" last. - return '~' - - children.sort(key=_seq) - raise gen.Return(children) - - @gen.coroutine - def _find_node(self): - children = yield self.tornado_kazoo.get_children(self.path) - for child in children: - if child.startswith(self.prefix): - raise gen.Return(child) - raise gen.Return(None) - - @gen.coroutine - def _delete_node(self, node): - yield self.tornado_kazoo.delete(self.path + "/" + node) - - @gen.coroutine - def _best_effort_cleanup(self): - try: - node = self.node - if not node: - node = yield self._find_node() - if node: - yield self._delete_node(node) - except KazooException: # pragma: nocover - pass - - @gen.coroutine - def release(self): - """Release the lock immediately.""" - retry = self._retry.copy() - release_response = yield retry(self._inner_release) - raise gen.Return(release_response) - - @gen.coroutine - def _inner_release(self): - if not self.is_acquired: - raise gen.Return(False) - - try: - yield self._delete_node(self.node) - except NoNodeError: # pragma: nocover - pass - - self.is_acquired = False - self.node = None - raise gen.Return(True) - - @gen.coroutine - def contenders(self): - """ Returns an ordered list of the current contenders for the lock. """ - # make sure our election parent node exists - if not self.assured_path: - yield self._ensure_path() - - children = yield self._get_sorted_children() - - contenders = [] - for child in children: - try: - data = yield self.tornado_kazoo.get(self.path + "/" + child)[0] - contenders.append(data.decode('utf-8')) - except NoNodeError: # pragma: nocover - pass - raise gen.Return(contenders) - - -class IncompleteOperation(Exception): - """ Indicates that a Kazoo operation is not complete. """ - pass - - -class TornadoKazooFuture(TornadoFuture): - """ A TornadoFuture that handles Kazoo results. """ - def handle_zk_result(self, async_result): - """ Completes the TornadoFuture. - - Args: - async_result: An IAsyncResult. - """ - io_loop = IOLoop.instance() - - # This method should not be called if the result is not ready. - if not async_result.ready(): - error = IncompleteOperation('Kazoo operation is not ready') - io_loop.add_callback(self.set_exception, error) - return - - if async_result.successful(): - io_loop.add_callback(self.set_result, async_result.value) - else: - io_loop.add_callback(self.set_exception, async_result.exception) - - -class TornadoKazoo(object): - """ A wrapper that converts Kazoo operations to Tornado futures. """ - def __init__(self, zk_client): - """ Creates a new TornadoKazoo manager. - - Args: - zk_client: A KazooClient. - """ - self._zk_client = zk_client - - def create(self, path, value=b'', acl=None, ephemeral=False, sequence=False, - makepath=False): - """ Creates a node with the given value as its data. - - Args: - path: A string specifying the path of the node. - value: A byte string specifying the node contents. - acl: A kazoo.security.ACL list. - ephemeral: A boolean indicating whether or not the node should be removed - upon client disconnection. - sequence: A boolean indicating whether or not the path should be suffixed - with a unique index. - makepath: A boolean indicating whether or not the parent path should be - created if it doesn't exist. - """ - tornado_future = TornadoKazooFuture() - zk_future = self._zk_client.create_async( - path, value, acl=acl, ephemeral=ephemeral, sequence=sequence, - makepath=makepath) - zk_future.rawlink(tornado_future.handle_zk_result) - return tornado_future - - def get(self, path, watch=None): - """ Gets the value of a node. - - Args: - path: A string specifying the path of the node. - watch: A function that is called when the node changes. - Returns: - A TornadoKazooFuture. - """ - tornado_future = TornadoKazooFuture() - if watch is None: - wrapped_watch = None - else: - wrapped_watch = self._wrap_in_io_loop(watch) - - zk_future = self._zk_client.get_async(path, wrapped_watch) - zk_future.rawlink(tornado_future.handle_zk_result) - return tornado_future - - def get_children(self, path, watch=None, include_data=False): - """ Gets a list of child nodes of a path. - - Args: - path: A string specifying the path of the parent node. - watch: A function that is called when the node changes. - include_data: A boolean specifying that the parent node contents should - also be fetched. - Returns: - A TornadoKazooFuture. - """ - tornado_future = TornadoKazooFuture() - if watch is None: - wrapped_watch = None - else: - wrapped_watch = self._wrap_in_io_loop(watch) - - zk_future = self._zk_client.get_children_async( - path, wrapped_watch, include_data) - zk_future.rawlink(tornado_future.handle_zk_result) - return tornado_future - - def delete(self, path, version=-1): - """ Deletes a node. - - Args: - path: A string specifying the path of the node. - version: An integer specifying the expected version of the node. - Returns: - A TornadoKazooFuture. - """ - tornado_future = TornadoKazooFuture() - zk_future = self._zk_client.delete_async(path, version=version) - zk_future.rawlink(tornado_future.handle_zk_result) - return tornado_future - - def ensure_path(self, path, acl=None): - """ Ensures a node exists. - - Args: - path: A string specifying the path of the node. - acl: Permissions for the node (a kazoo.security.ACL list). - """ - tornado_future = TornadoKazooFuture() - zk_future = self._zk_client.ensure_path_async(path, acl) - zk_future.rawlink(tornado_future.handle_zk_result) - return tornado_future - - @staticmethod - def _wrap_in_io_loop(watch): - """ Returns a function that runs the given function in the main IO loop. - - Args: - watch: The function to wrap. - """ - def run_in_io_loop(*args): - IOLoop.current().add_callback(watch, *args) - - return run_in_io_loop diff --git a/AppDB/appscale/datastore/zkappscale/transaction_manager.py b/AppDB/appscale/datastore/zkappscale/transaction_manager.py deleted file mode 100644 index 8158fc9062..0000000000 --- a/AppDB/appscale/datastore/zkappscale/transaction_manager.py +++ /dev/null @@ -1,373 +0,0 @@ -""" Generates and keeps track of transaction IDs. """ -from __future__ import division - -import json -import logging -import time - -from kazoo.exceptions import KazooException -from kazoo.exceptions import NodeExistsError -from tornado.ioloop import IOLoop - -from appscale.common.async_retrying import retry_children_watch_coroutine -from .constants import CONTAINER_PREFIX -from .constants import COUNTER_NODE_PREFIX -from .constants import MAX_SEQUENCE_COUNTER -from .constants import OFFSET_NODE -from .entity_lock import zk_group_path -from ..dbconstants import BadRequest -from ..dbconstants import InternalError - -logger = logging.getLogger(__name__) - -# Nodes that indicate a cross-group transaction start with this string. -XG_PREFIX = 'xg' - - -class ProjectTransactionManager(object): - """ Generates and keeps track of transaction IDs for a project. """ - def __init__(self, project_id, zk_client): - """ Creates a new ProjectTransactionManager. - - Args: - project_id: A string specifying a project ID. - """ - self.project_id = project_id - self.zk_client = zk_client - - self._project_node = '/appscale/apps/{}'.format(self.project_id) - - # Allows users to manually modify transaction IDs after a binary migration. - self._txid_manual_offset = 0 - self._offset_node = '/'.join([self._project_node, OFFSET_NODE]) - self.zk_client.DataWatch(self._offset_node, self._update_offset) - - # Allows the manager to use a new container after one is exhausted. - self._txid_automatic_offset = 0 - self._counter_path = '/'.join([self._project_node, CONTAINER_PREFIX]) - self.zk_client.ensure_path(self._project_node) - self.zk_client.ensure_path(self._counter_path) - self.zk_client.ChildrenWatch(self._project_node, self._update_project) - - # Containers that do not need to be checked for open transactions. - self._inactive_containers = set() - - def create_transaction_id(self, xg): - """ Generates a new transaction ID. - - Args: - xg: A boolean indicating a cross-group transaction. - Returns: - An integer specifying the created transaction ID. - Raises: - InternalError if unable to create a new transaction ID. - """ - current_time = time.time() - counter_path_prefix = '/'.join([self._counter_path, COUNTER_NODE_PREFIX]) - try: - new_path = self.zk_client.create( - counter_path_prefix, value=str(current_time), sequence=True) - except KazooException: - message = 'Unable to create new transaction ID' - logger.exception(message) - raise InternalError(message) - - counter = int(new_path.split('/')[-1].lstrip(COUNTER_NODE_PREFIX)) - - if counter < 0: - logger.debug('Removing invalid counter') - self._delete_counter(new_path) - self._update_auto_offset() - return self.create_transaction_id(xg) - - txid = self._txid_manual_offset + self._txid_automatic_offset + counter - - if txid == 0: - self._delete_counter(new_path) - return self.create_transaction_id(xg) - - if xg: - xg_path = '/'.join([new_path, XG_PREFIX]) - try: - self.zk_client.create(xg_path, value=str(current_time)) - except KazooException: - message = 'Unable to create new cross-group transaction ID' - logger.exception(message) - raise InternalError(message) - - self._last_txid_created = txid - return txid - - def delete_transaction_id(self, txid): - """ Removes a transaction ID from the list of active transactions. - - Args: - txid: An integer specifying a transaction ID. - """ - path = self._txid_to_path(txid) - self._delete_counter(path) - - def get_open_transactions(self): - """ Fetches a list of active transactions. - - Returns: - A list of integers specifying transaction IDs. - Raises: - InternalError if unable to fetch list of transaction IDs. - """ - txids = [] - active_containers = self._active_containers() - for index, container in enumerate(active_containers): - container_name = container.split('/')[-1] - container_count = int(container_name[len(CONTAINER_PREFIX):] or 1) - container_size = MAX_SEQUENCE_COUNTER + 1 - auto_offset = (container_count - 1) * container_size - offset = self._txid_manual_offset + auto_offset - - try: - paths = self.zk_client.get_children(container) - except KazooException: - message = 'Unable to fetch list of counters' - logger.exception(message) - raise InternalError(message) - - counter_nodes = [path.split('/')[-1] for path in paths] - txids.extend([offset + int(node.lstrip(COUNTER_NODE_PREFIX)) - for node in counter_nodes]) - - # If there are no counters left in an old container, mark it inactive. - if not counter_nodes and index < len(active_containers) - 1: - self._inactive_containers.add(container_name) - - return txids - - def set_groups(self, txid, groups): - """ Defines which groups will be involved in a transaction. - - Args: - txid: An integer specifying a transaction ID. - groups: An iterable of entity group Reference objects. - """ - txid_path = self._txid_to_path(txid) - groups_path = '/'.join([txid_path, 'groups']) - encoded_groups = [zk_group_path(group) for group in groups] - try: - self.zk_client.create(groups_path, value=json.dumps(encoded_groups)) - except KazooException: - message = 'Unable to set lock list for transaction' - logger.exception(message) - raise InternalError(message) - - def _delete_counter(self, path): - """ Removes a counter node. - - Args: - path: A string specifying a ZooKeeper path. - """ - try: - self.zk_client.delete(path, recursive=True) - except KazooException: - # Let the transaction groomer clean it up. - logger.exception('Unable to delete counter') - - def _active_containers(self): - """ Determines the containers that need to be checked for transactions. - - Returns: - A tuple of strings specifying ZooKeeper paths. - """ - container_name = self._counter_path.split('/')[-1] - container_count = int(container_name[len(CONTAINER_PREFIX):] or 1) - - all_containers = [CONTAINER_PREFIX + str(index + 1) - for index in range(container_count)] - all_containers[0] = CONTAINER_PREFIX - - return tuple('/'.join([self._project_node, container]) - for container in all_containers - if container not in self._inactive_containers) - - def _txid_to_path(self, txid): - """ Determines the ZooKeeper path for a given transaction ID. - - Args: - txid: An integer specifying a transaction ID. - Returns: - A strings specifying the transaction's ZooKeeper path. - """ - corrected_counter = txid - self._txid_manual_offset - - # The number of counters a container can store (including 0). - container_size = MAX_SEQUENCE_COUNTER + 1 - - container_count = int(corrected_counter / container_size) + 1 - container_suffix = '' if container_count == 1 else str(container_count) - container_name = CONTAINER_PREFIX + container_suffix - container_path = '/'.join([self._project_node, container_name]) - - counter_value = corrected_counter % container_size - node_name = COUNTER_NODE_PREFIX + str(counter_value).zfill(10) - return '/'.join([container_path, node_name]) - - def _update_auto_offset(self): - """ Ensures there is a usable sequence container. """ - container_name = self._counter_path.split('/')[-1] - container_count = int(container_name[len(CONTAINER_PREFIX):] or 1) - next_node = CONTAINER_PREFIX + str(container_count + 1) - next_path = '/'.join([self._project_node, next_node]) - - try: - self.zk_client.create(next_path) - except NodeExistsError: - # Another process may have already created the new counter. - pass - except KazooException: - message = 'Unable to create transaction ID counter' - logger.exception(message) - raise InternalError(message) - - try: - node_list = self.zk_client.get_children(self._project_node) - except KazooException: - message = 'Unable to find transaction ID counter' - logger.exception(message) - raise InternalError(message) - - self._update_project_sync(node_list) - - def _update_offset(self, new_offset, _): - """ Watches for updates to the manual offset node. """ - # This assignment is atomic, so it does not need to happen in the IOLoop. - self._txid_manual_offset = int(new_offset or 0) - - def _update_project_sync(self, node_list): - """ Updates the record of usable sequence containers. """ - counters = [int(node[len(CONTAINER_PREFIX):] or 1) - for node in node_list if node.startswith(CONTAINER_PREFIX)] - counters.sort() - - container_suffix = '' if len(counters) == 1 else str(counters[-1]) - latest_node = CONTAINER_PREFIX + container_suffix - - self._counter_path = '/'.join([self._project_node, latest_node]) - - # The number of counters a container can store (including 0). - container_size = MAX_SEQUENCE_COUNTER + 1 - self._txid_automatic_offset = (len(counters) - 1) * container_size - - def _update_project(self, node_list): - """ Watches for updates to the list of containers. """ - IOLoop.instance().add_callback(self._update_project_sync, node_list) - - -class TransactionManager(object): - """ Generates and keeps track of transaction IDs. """ - def __init__(self, zk_client): - """ Creates a new TransactionManager. - - Args: - zk_client: A KazooClient. - """ - self.zk_client = zk_client - self.zk_client.ensure_path('/appscale/projects') - self.projects = {} - - # Since this manager can be used synchronously, ensure that the projects - # are populated for this IOLoop iteration. - project_ids = self.zk_client.get_children('/appscale/projects') - self._update_projects_sync(project_ids) - - self.zk_client.ChildrenWatch('/appscale/projects', self._update_projects) - - def create_transaction_id(self, project_id, xg=False): - """ Generates a new transaction ID. - - Args: - project_id: A string specifying a project ID. - xg: A boolean indicating a cross-group transaction. - Returns: - An integer specifying the created transaction ID. - Raises: - BadRequest if the project does not exist. - InternalError if unable to create a new transaction ID. - """ - try: - project_tx_manager = self.projects[project_id] - except KeyError: - raise BadRequest('The project {} was not found'.format(project_id)) - - return project_tx_manager.create_transaction_id(xg) - - def delete_transaction_id(self, project_id, txid): - """ Removes a transaction ID from the list of active transactions. - - Args: - project_id: A string specifying a project ID. - txid: An integer specifying a transaction ID. - Raises: - BadRequest if the project does not exist. - """ - try: - project_tx_manager = self.projects[project_id] - except KeyError: - raise BadRequest('The project {} was not found'.format(project_id)) - - return project_tx_manager.delete_transaction_id(txid) - - def get_open_transactions(self, project_id): - """ Fetch a list of open transactions for a given project. - - Args: - project_id: A string specifying a project ID. - Returns: - A list of integers specifying transaction IDs. - Raises: - BadRequest if the project does not exist. - InternalError if unable to fetch list of open transactions. - """ - try: - project_tx_manager = self.projects[project_id] - except KeyError: - raise BadRequest('The project {} was not found'.format(project_id)) - - return project_tx_manager.get_open_transactions() - - def set_groups(self, project_id, txid, groups): - """ Defines which groups will be involved in a transaction. - - Args: - project_id: A string specifying a project ID. - txid: An integer specifying a transaction ID. - groups: An iterable of entity group Reference objects. - """ - try: - project_tx_manager = self.projects[project_id] - except KeyError: - raise BadRequest('The project {} was not found'.format(project_id)) - - return project_tx_manager.set_groups(txid, groups) - - def _update_projects_sync(self, new_project_ids): - """ Updates the available projects for starting transactions. - - Args: - new_project_ids: A list of strings specifying current project IDs. - """ - for project_id in new_project_ids: - if project_id not in self.projects: - self.projects[project_id] = ProjectTransactionManager(project_id, - self.zk_client) - - for project_id in self.projects.keys(): - if project_id not in new_project_ids: - del self.projects[project_id] - - def _update_projects(self, project_ids): - """ Watches for changes to list of existing projects. - - Args: - project_ids: A list of strings specifying current project IDs. - """ - persistent_setup_projects = retry_children_watch_coroutine( - '/appscale/projects', self._update_projects_sync) - IOLoop.instance().add_callback(persistent_setup_projects, project_ids) diff --git a/AppDB/appscale/datastore/zkappscale/zktransaction.py b/AppDB/appscale/datastore/zkappscale/zktransaction.py deleted file mode 100644 index 133d804e36..0000000000 --- a/AppDB/appscale/datastore/zkappscale/zktransaction.py +++ /dev/null @@ -1,988 +0,0 @@ -#!/usr/bin/python -""" -Distributed id and lock service for transaction support. -""" -import kazoo.client -import kazoo.exceptions -import logging -import os -import re -import time -import urllib - -from .inspectable_counter import InspectableCounter -from ..dbconstants import MAX_GROUPS_FOR_XG - -from cassandra.policies import FallthroughRetryPolicy -from kazoo.exceptions import (KazooException, - ZookeeperError) -from kazoo.retry import KazooRetry - -# A cassandra-driver policy that does not retry operations. -NO_RETRIES = FallthroughRetryPolicy() - - -class ZKTimeoutException(Exception): - """ A special Exception class that should be thrown if a function is - taking longer than expected by the caller to run - """ - pass - - -# A list that indicates that the Zookeeper node to create should be readable -# and writable by anyone. -ZOO_ACL_OPEN = None - -# The value that we should set for paths whose value we don't care about. -DEFAULT_VAL = "default" - -# Paths are separated by this for the tree structure in zookeeper. -PATH_SEPARATOR = "/" - -# This is the path which contains the different application's lock meta-data. -APPS_PATH = "/appscale/apps" - -# This path contains different transaction IDs. -APP_TX_PATH = "txids" - -# This is the node which holds all the locks of an application. -APP_LOCK_PATH = "locks" - -APP_ID_PATH = "ids" - -APP_TX_PREFIX = "tx" - -APP_LOCK_PREFIX = "lk" - -APP_ID_PREFIX = "id" - -# This is the prefix of all keys which have been updated within a transaction. -TX_UPDATEDKEY_PREFIX = "ukey" - -# This is the name of the leaf. It holds a list of locks as a string. -TX_LOCK_PATH = "lockpath" - -# The path for blacklisted transactions. -TX_BLACKLIST_PATH = "blacklist" - -# This is the path name for valid versions of entities used in a transaction. -TX_VALIDLIST_PATH = "validlist" - -GC_LOCK_PATH = "gclock" - -GC_TIME_PATH = "gclast_time" - -# Lock path for the datastore groomer. -DS_GROOM_LOCK_PATH = "/appscale_datastore_groomer" - -# Lock path for the datastore backup. -DS_BACKUP_LOCK_PATH = "/appscale_datastore_backup" - -# Lock path for the datastore backup. -DS_RESTORE_LOCK_PATH = "/appscale_datastore_restore" - -# A unique prefix for cross group transactions. -XG_PREFIX = "xg" - -# The separator value for the lock list when using XG transactions. -LOCK_LIST_SEPARATOR = "!XG_LIST!" - -# The location of the ZooKeeper server script. -ZK_SERVER_CMD_LOCATIONS = [ - os.path.join('/usr', 'share', 'zookeeper', 'bin', 'zkServer.sh'), - os.path.join('/usr', 'lib', 'zookeeper', 'bin', 'zkServer.sh') -] - -class ZKTransactionException(Exception): - """ ZKTransactionException defines a custom exception class that should be - thrown whenever there was a problem involving a transaction (e.g., the - transaction failed, we couldn't get a transaction ID). - """ - pass - -class ZKInternalException(Exception): - """ ZKInternalException defines a custom exception class that should be - thrown whenever we cannot connect to ZooKeeper for an extended amount of time. - """ - pass - -class ZKBadRequest(ZKTransactionException): - """ A class thrown when there are too many locks acquired in a XG transaction - or when XG operations are done on a non XG transaction. - """ - pass - -class ZKTransaction: - """ ZKTransaction provides an interface that can be used to acquire locks - and other functions needed to perform database-agnostic transactions - (e.g., releasing locks, keeping track of transaction metadata). - """ - - # How long to wait before retrying an operation. - ZK_RETRY_TIME = .5 - - def __init__(self, zk_client, db_access=None, log_level=logging.INFO): - """ Creates a new ZKTransaction, which will communicate with Zookeeper - on the given host. - - Args: - zk_client: An instance of Zookeeper client. - db_access: A DatastoreProxy instance. - log_level: A logging constant that specifies the instance logging level. - """ - class_name = self.__class__.__name__ - self.logger = logging.getLogger(class_name) - self.logger.setLevel(log_level) - self.logger.info('Starting {}'.format(class_name)) - - # Connection instance variables. - self.handle = zk_client - self.run_with_retry = self.handle.retry - - self.__counter_cache = {} - - self.db_access = db_access - - def increment_and_get_counter(self, path, value): - """ Increment a counter atomically. - - Args: - path: A str of unique path to the counter. - value: An int of how much to increment the counter by. - Returns: - A tuple (int, int) of the previous value and the new value. - Raises: - ZKTransactionException: If it could not increment the counter. - """ - if path not in self.__counter_cache: - self.__counter_cache[path] = InspectableCounter(self.handle, path) - - counter = self.__counter_cache[path] - try: - new_value = counter + value - return new_value - value, new_value - except kazoo.exceptions.ZookeeperError as zoo_exception: - self.logger.exception(zoo_exception) - raise ZKTransactionException("Couldn't increment path {0} by value {1}" \ - .format(path, value)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException( - "Couldn't increment path {0} with value {1}" \ - .format(path, value)) - - def get_node(self, path, retries=5): - """ Fetch the ZooKeeper node at the given path. - - Args: - path: A PATH_SEPARATOR-separated str that represents the node whose value - should be updated. - retries: The number of times to retry fetching the node. - Returns: - The value of the node. - Raises: - ZKInternalException: If there was an error trying to fetch the node. - """ - try: - return self.run_with_retry(self.handle.get, path) - except kazoo.exceptions.NoNodeError: - return False - except kazoo.exceptions.ZookeeperError as zoo_exception: - self.logger.exception(zoo_exception) - if retries > 0: - self.logger.info('Trying again to fetch node {} with retry #{}' - .format(path, retries)) - time.sleep(self.ZK_RETRY_TIME) - return self.get_node(path, retries=retries - 1) - raise ZKInternalException('Unable to fetch node {}'.format(path)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - if retries > 0: - self.logger.info('Trying again to fetch node {} with retry #{}' - .format(path, retries)) - time.sleep(self.ZK_RETRY_TIME) - return self.get_node(path, retries=retries - 1) - raise ZKInternalException('Unable to fetch node {}'.format(path)) - - def update_node(self, path, value): - """ Sets the ZooKeeper node at path to value, creating the node if it - doesn't exist. - - Args: - path: A PATH_SEPARATOR-separated str that represents the node whose value - should be updated. - value: A str representing the value that should be associated with the - updated node. - """ - self.logger.debug( - 'Updating node at {}, with new value {}'.format(path, value)) - try: - self.run_with_retry(self.handle.set, path, str(value)) - except kazoo.exceptions.NoNodeError: - self.run_with_retry(self.handle.create, path, str(value), ZOO_ACL_OPEN, - makepath=True) - - def delete_recursive(self, path): - """ Deletes the ZooKeeper node at path, and any child nodes it may have. - - Args: - path: A PATH_SEPARATOR-separated str that represents the node to delete. - """ - try: - children = self.run_with_retry(self.handle.get_children, path) - for child in children: - self.delete_recursive(PATH_SEPARATOR.join([path, child])) - self.run_with_retry(self.handle.delete, path) - except kazoo.exceptions.NoNodeError: - pass - - def get_app_root_path(self, app_id): - """ Returns the ZooKeeper path that holds all information for the given - application. - - Args: - app_id: A str that represents the application we wish to get the root - path for. - Returns: - A str that represents a ZooKeeper node, whose immediate children are - the transaction prefix path and the locks prefix path. - """ - return PATH_SEPARATOR.join([APPS_PATH, urllib.quote_plus(app_id)]) - - def get_transaction_prefix_path(self, app_id): - """ Returns the location of the ZooKeeper node who contains all transactions - in progress for the given application. - - Args: - app_id: A str that represents the application we wish to get all - transaction information for. - Returns: - A str that represents a ZooKeeper node, whose immediate children are all - of the transactions currently in progress. - """ - return PATH_SEPARATOR.join([self.get_app_root_path(app_id), APP_TX_PATH]) - - def get_txn_path_before_getting_id(self, app_id): - """ Returns a path that callers can use to get new transaction IDs from - ZooKeeper, which are given as sequence nodes. - - Args: - app_id: A str that represents the application we wish to build a new - transaction path for. - Returns: A str that can be used to create new transactions. - """ - return PATH_SEPARATOR.join([self.get_transaction_prefix_path(app_id), - APP_TX_PREFIX]) - - def get_transaction_path(self, app_id, txid): - """ Returns the location of the ZooKeeper node who contains all information - for a transaction, and is the parent of the transaction lock list and - registered keys for the transaction. - - Args: - app_id: A str that represents the application we wish to get the prefix - path for. - txid: An int that represents the transaction ID whose path we wish to - acquire. - """ - txstr = APP_TX_PREFIX + "%010d" % txid - return PATH_SEPARATOR.join([self.get_app_root_path(app_id), APP_TX_PATH, - txstr]) - - def get_transaction_lock_list_path(self, app_id, txid): - """ Returns the location of the ZooKeeper node whose value is a - XG_LIST-separated str, representing all of the locks that have been acquired - for the given transaction ID. - - Args: - app_id: A str that represents the application we wish to get the - transaction information about. - txid: A str that represents the transaction ID we wish to get the lock - list location for. - Returns: - A PATH_SEPARATOR-delimited str corresponding to the ZooKeeper node that - contains the list of locks that have been taken for the given transaction. - """ - return PATH_SEPARATOR.join([self.get_transaction_path(app_id, txid), - TX_LOCK_PATH]) - - def get_blacklist_root_path(self, app_id): - """ Returns the location of the ZooKeeper node whose children are - all of the blacklisted transaction IDs for the given application ID. - - Args: - app_id: A str corresponding to the application who we want to get - blacklisted transaction IDs for. - Returns: - A str corresponding to the ZooKeeper node whose children are blacklisted - transaction IDs. - """ - return PATH_SEPARATOR.join([self.get_transaction_prefix_path(app_id), - TX_BLACKLIST_PATH]) - - def get_valid_transaction_root_path(self, app_id): - """ Returns the location of the ZooKeeper node whose children are - all of the valid transaction IDs for the given application ID. - - Args: - app_id: A str corresponding to the application who we want to get - valid transaction IDs for. - Returns: - A str corresponding to the ZooKeeper node whose children are valid - transaction IDs. - """ - return PATH_SEPARATOR.join([self.get_transaction_prefix_path(app_id), - TX_VALIDLIST_PATH]) - - def get_valid_transaction_path(self, app_id, entity_key): - """ Gets the valid transaction path with the entity key. - Args: - app_id: The application ID. - entity_key: The entity within the path. - Returns: - A str representing the transaction path. - """ - return PATH_SEPARATOR.join([self.get_valid_transaction_root_path(app_id), - urllib.quote_plus(entity_key)]) - - def get_lock_root_path(self, app_id, key): - """ Gets the root path of the lock for a particular app. - - Args: - app_id: The application ID. - key: The key for which we're getting the root path lock. - Returns: - A str of the root lock path. - """ - return PATH_SEPARATOR.join([self.get_app_root_path(app_id), APP_LOCK_PATH, - urllib.quote_plus(key)]) - - def get_xg_path(self, app_id, tx_id): - """ Gets the XG path for a transaction. - - Args: - app_id: The application ID whose XG path we want. - tx_id: The transaction ID whose XG path we want. - Returns: - A str representing the XG path for the given transaction. - """ - txstr = APP_TX_PREFIX + "%010d" % tx_id - return PATH_SEPARATOR.join([self.get_app_root_path(app_id), APP_TX_PATH, - txstr, XG_PREFIX]) - - def create_node(self, path, value): - """ Creates a new node in ZooKeeper, with the given value. - - Args: - path: The path to create the node at. - value: The value that we should store in the node. - Raises: - ZKTransactionException: If the sequence node couldn't be created. - """ - try: - self.run_with_retry(self.handle.create, path, value=str(value), - acl=ZOO_ACL_OPEN, ephemeral=False, sequence=False, makepath=True) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't create path {0} with value {1} " \ - .format(path, value)) - - def create_sequence_node(self, path, value): - """ Creates a new sequence node in ZooKeeper, with a non-zero initial ID. - - We avoid using zero as the initial ID because Google App Engine apps can - use a zero ID as a sentinel value, to indicate that an ID should be - allocated for them. - - Args: - path: The prefix to create the sequence node at. For example, a prefix - of '/abc' would result in a sequence node of '/abc1' being created. - value: The value that we should store in the sequence node. - Returns: - A long that represents the sequence ID. - Raises: - ZKTransactionException: If the sequence node couldn't be created. - """ - try: - txn_id_path = self.run_with_retry(self.handle.create, path, - value=str(value), acl=ZOO_ACL_OPEN, ephemeral=False, sequence=True, - makepath=True) - if txn_id_path: - txn_id = long(txn_id_path.split(PATH_SEPARATOR)[-1].lstrip( - APP_TX_PREFIX)) - if txn_id == 0: - self.logger.warning("Created sequence ID 0 - deleting it.") - self.run_with_retry(self.handle.delete, txn_id_path) - txn_id_path = self.run_with_retry(self.handle.create, path, - value=str(value), acl=ZOO_ACL_OPEN, ephemeral=False, - sequence=True, makepath=True) - return long(txn_id_path.split(PATH_SEPARATOR)[-1].lstrip( - APP_TX_PREFIX)) - else: - return txn_id - except kazoo.exceptions.ZookeeperError as zoo_exception: - self.logger.exception(zoo_exception) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - - raise ZKTransactionException("Unable to create sequence node with path" \ - " {0}, value {1}".format(path, value)) - - def check_transaction(self, app_id, txid): - """ Gets the status of the given transaction. - - Args: - app_id: A str representing the application whose transaction we wish to - query. - txid: An int that indicates the transaction ID we should query. - Returns: - True if the transaction is in progress. - Raises: - ZKTransactionException: If the transaction is not in progress, or it - has timed out. - """ - txpath = self.get_transaction_path(app_id, txid) - try: - if self.is_blacklisted(app_id, txid): - raise ZKTransactionException("Transaction {0} timed out.".format(txid)) - except ZKInternalException as zk_exception: - self.logger.exception(zk_exception) - raise ZKTransactionException("Couldn't see if transaction {0} is valid" \ - .format(txid)) - - try: - if not self.run_with_retry(self.handle.exists, txpath): - self.logger.debug( - 'check_transaction: {} does not exist'.format(txpath)) - raise ZKTransactionException('Transaction {} is invalid'.format(txid)) - return True - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException( - 'Unable to determine status of transaction {}'.format(txid)) - - def is_in_transaction(self, app_id, txid, retries=5): - """ Checks to see if the named transaction is currently running. - - Args: - app_id: A str representing the application whose transaction we wish to - query. - txid: An int that indicates the transaction ID we should query. - Returns: - True if the transaction is in progress, and False otherwise. - Raises: - ZKTransactionException: If the transaction is blacklisted. - ZKInternalException: If there was an error seeing if the transaction was - blacklisted. - """ - tx_lock_path = self.get_transaction_lock_list_path(app_id, txid) - if self.is_blacklisted(app_id, txid): - raise ZKTransactionException( - 'Transaction {} is blacklisted'.format(txid)) - try: - if not self.run_with_retry(self.handle.exists, tx_lock_path): - return False - return True - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - if retries > 0: - self.logger.info( - 'Trying again to see if transaction {} is in progress. Retry #{}' - .format(txid, retries)) - time.sleep(self.ZK_RETRY_TIME) - return self.is_in_transaction(app_id=app_id, txid=txid, - retries=retries - 1) - raise ZKInternalException("Couldn't see if we are in transaction {0}" \ - .format(txid)) - - def is_orphan_lock(self, tx_lockpath): - """ Checks to see if a lock does not have a transaction linked. - - If the groomer misses to unlock a lock for whatever reason, we need - to make sure the lock is eventually released. - - Args: - tx_lockpath: A str, the path to the transaction using the lock. - Returns: - True if the lock is an orphan, and False otherwise. - """ - try: - self.handle.get(tx_lockpath) - return False - except kazoo.exceptions.NoNodeError: - return True - - def acquire_additional_lock(self, app_id, txid, entity_key, create): - """ Acquire an additional lock for a cross group transaction. - - Args: - app_id: A str representing the application ID. - txid: The transaction ID you are acquiring a lock for. Built into - the path. - entity_key: Used to get the root path. - create: A bool that indicates if we should create a new Zookeeper node - to store the lock information in. - Returns: - Boolean, of true on success, false if lock can not be acquired. - Raises: - ZKTransactionException: If we can't acquire the lock for the given - entity group, because a different transaction already has it. - """ - txpath = self.get_transaction_path(app_id, txid) - lockrootpath = self.get_lock_root_path(app_id, entity_key) - lockpath = None - - try: - lockpath = self.run_with_retry(self.handle.create, lockrootpath, - value=str(txpath), acl=ZOO_ACL_OPEN, ephemeral=False, - sequence=False, makepath=True) - except kazoo.exceptions.NodeExistsError: - # fail to get lock - try: - tx_lockpath = self.run_with_retry(self.handle.get, lockrootpath)[0] - self.logger.error( - 'Lock {} in use by {}'.format(lockrootpath, tx_lockpath)) - if self.is_orphan_lock(tx_lockpath): - self.logger.error( - 'Lock {} is an orphan lock. Releasing it'.format(lockrootpath)) - # Releasing the lock in question. - self.handle.delete(lockrootpath) - # Try to acquire the lock again. - return self.acquire_additional_lock(app_id, txid, entity_key, create) - except kazoo.exceptions.NoNodeError: - # If the lock is released by another thread this can get tossed. - # A race condition. - self.logger.warning( - 'Lock {} was in use but was released'.format(lockrootpath)) - raise ZKTransactionException("acquire_additional_lock: There is " \ - "already another transaction using {0} lock".format(lockrootpath)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't get a lock at path {0}" \ - .format(lockrootpath)) - - transaction_lock_path = self.get_transaction_lock_list_path(app_id, txid) - - try: - if create: - self.run_with_retry(self.handle.create_async, transaction_lock_path, - value=str(lockpath), acl=ZOO_ACL_OPEN, ephemeral=False, - makepath=False, sequence=False) - else: - tx_lockpath = self.run_with_retry(self.handle.get, - transaction_lock_path)[0] - lock_list = tx_lockpath.split(LOCK_LIST_SEPARATOR) - lock_list.append(lockpath) - lock_list_str = LOCK_LIST_SEPARATOR.join(lock_list) - self.run_with_retry(self.handle.set_async, transaction_lock_path, - str(lock_list_str)) - self.logger.debug( - 'Set lock list path {} to value {}' - .format(transaction_lock_path, lock_list_str)) - # We do this check last, otherwise we may have left over locks to - # to a lack of a lock path reference. - if len(lock_list) > MAX_GROUPS_FOR_XG: - raise ZKBadRequest("acquire_additional_lock: Too many " \ - "groups for this XG transaction.") - - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't create or set a lock at path {0}" \ - .format(transaction_lock_path)) - - return True - - def is_xg(self, app_id, tx_id): - """ Checks to see if the transaction can operate over multiple entity - groups. - - Args: - app_id: The application ID that the transaction operates over. - tx_id: The transaction ID that may or may not be XG. - Returns: - True if the transaction is XG, False otherwise. - Raises: - ZKTransactionException: on ZooKeeper exceptions. - ZKInternalException: If we can't tell if the transaction is a XG - transaction or not. - """ - try: - return self.run_with_retry(self.handle.exists, self.get_xg_path(app_id, - tx_id)) - except kazoo.exceptions.ZookeeperError as zk_exception: - raise ZKTransactionException("ZooKeeper exception:{0}"\ - .format(zk_exception)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKInternalException("Couldn't see if transaction {0} was XG " \ - "for app {1}".format(tx_id, app_id)) - - def acquire_lock(self, app_id, txid, entity_key): - """ Acquire lock for transaction. It will acquire additional locks - if the transactions is XG. - - You must call create_transaction_id() first to obtain transaction ID. - You could call this method anytime if the root entity key is same, - or different in the case of it being XG. - - Args: - app_id: The application ID to acquire a lock for. - txid: The transaction ID you are acquiring a lock for. Built into - the path. - entity_key: Used to get the root path. - Returns: - True on success, False otherwise. - Raises: - ZKTransactionException: If it could not get the lock. - """ - lockrootpath = self.get_lock_root_path(app_id, entity_key) - - try: - if self.is_in_transaction(app_id, txid): # use current lock - transaction_lock_path = self.get_transaction_lock_list_path( - app_id, txid) - prelockpath = self.run_with_retry(self.handle.get, - transaction_lock_path)[0] - lock_list = prelockpath.split(LOCK_LIST_SEPARATOR) - if lockrootpath in lock_list: - return True - else: - if self.is_xg(app_id, txid): - return self.acquire_additional_lock(app_id, txid, entity_key, - create=False) - else: - raise ZKBadRequest("acquire_lock: You can not lock " \ - "different root entity in non-cross-group transaction.") - except ZKInternalException as zk_exception: - self.logger.exception(zk_exception) - raise ZKTransactionException("An internal exception prevented us from " \ - "getting the lock for app id {0}, txid {1}, entity key {2}" \ - .format(app_id, txid, entity_key)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't get lock for app id {0}, txid " \ - "{1}, entity key {2}".format(app_id, txid, entity_key)) - - return self.acquire_additional_lock(app_id, txid, entity_key, create=True) - - def get_updated_key_list(self, app_id, txid): - """ Gets a list of keys updated in this transaction. - - Args: - app_id: A str corresponding to the application ID whose transaction we - wish to query. - txid: The transaction ID that we want to get a list of updated keys for. - Returns: - A list of (keys, txn_id) that have been updated in this transaction. - Raises: - ZKTransactionException: If the given transaction ID does not correspond - to a transaction that is currently in progress. - """ - txpath = self.get_transaction_path(app_id, txid) - try: - child_list = self.run_with_retry(self.handle.get_children, txpath) - keylist = [] - for item in child_list: - if re.match("^" + TX_UPDATEDKEY_PREFIX, item): - keyandtx = self.run_with_retry(self.handle.get, - PATH_SEPARATOR.join([txpath, item]))[0] - key = urllib.unquote_plus(keyandtx.split(PATH_SEPARATOR)[0]) - txn_id = urllib.unquote_plus(keyandtx.split(PATH_SEPARATOR)[1]) - keylist.append((key, txn_id)) - return keylist - except kazoo.exceptions.NoNodeError: - raise ZKTransactionException("get_updated_key_list: Transaction ID {0} " \ - "is not valid.".format(txid)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't get updated key list for appid " \ - "{0}, txid {1}".format(app_id, txid)) - - def release_lock(self, app_id, txid): - """ Releases all locks acquired during this transaction. - - Callers must call acquire_lock before calling release_lock. Upon calling - release_lock, the given transaction ID is no longer valid. - - Args: - app_id: The application ID we are releasing a lock for. - txid: The transaction ID we are releasing a lock for. - Returns: - True if the locks were released. - Raises: - ZKTransactionException: If any locks acquired during this transaction - could not be released. - """ - self.check_transaction(app_id, txid) - txpath = self.get_transaction_path(app_id, txid) - - transaction_lock_path = self.get_transaction_lock_list_path(app_id, txid) - try: - lock_list_str = self.run_with_retry(self.handle.get, - transaction_lock_path)[0] - lock_list = lock_list_str.split(LOCK_LIST_SEPARATOR) - for lock_path in lock_list: - self.run_with_retry(self.handle.delete, lock_path) - self.run_with_retry(self.handle.delete, transaction_lock_path) - except kazoo.exceptions.NoNodeError: - try: - if self.is_blacklisted(app_id, txid): - raise ZKTransactionException( - "Unable to release lock {0} for app id {1}" \ - .format(transaction_lock_path, app_id)) - else: - return True - except ZKInternalException as zk_exception: - self.logger.exception(zk_exception) - raise ZKTransactionException("Internal exception prevented us from " \ - "releasing lock {0} for app id {1}".format(transaction_lock_path, - app_id)) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKTransactionException("Couldn't release lock {0} for appid {1}" \ - .format(transaction_lock_path, app_id)) - - try: - if self.is_xg(app_id, txid): - xg_path = self.get_xg_path(app_id, txid) - self.run_with_retry(self.handle.delete, xg_path) - - for child in self.run_with_retry(self.handle.get_children, txpath): - lock_path = PATH_SEPARATOR.join([txpath, child]) - self.logger.debug('Removing lock: {}'.format(lock_path)) - self.run_with_retry(self.handle.delete, lock_path) - - # This deletes the transaction root path. - self.run_with_retry(self.handle.delete, txpath) - - except ZKInternalException as zk_exception: - # Although there was a failure doing the async deletes, since we've - # already released the locks above, we can safely return True here. - self.logger.exception(zk_exception) - return True - except kazoo.exceptions.KazooException as kazoo_exception: - # Although there was a failure doing the async deletes, since we've - # already released the locks above, we can safely return True here. - self.logger.exception(kazoo_exception) - return True - - return True - - def is_blacklisted(self, app_id, txid, retries=5): - """ Checks to see if the given transaction ID has been blacklisted (that is, - if it is no longer considered to be a valid transaction). - - Args: - app_id: The application ID whose transaction ID we want to validate. - txid: The transaction ID that we want to validate. - Returns: - True if the transaction is blacklisted, False otherwise. - Raises: - ZKInternalException: If we couldn't determine if the transaction was - blacklisted or not. - """ - try: - blacklist_root = self.get_blacklist_root_path(app_id) - blacklist_txn = PATH_SEPARATOR.join([blacklist_root, - str(txid)]) - return self.run_with_retry(self.handle.exists, blacklist_txn) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - if retries > 0: - self.logger.info( - 'Trying again to see if transaction {} is blacklisted with retry #{}' - .format(txid, retries)) - time.sleep(self.ZK_RETRY_TIME) - return self.is_blacklisted(app_id=app_id, txid=txid, - retries=retries - 1) - raise ZKInternalException("Couldn't see if appid {0}'s transaction, " \ - "{1}, is blacklisted.".format(app_id, txid)) - - - def get_valid_transaction_id(self, app_id, target_txid, entity_key): - """ This returns valid transaction id for the entity key. - - Args: - app_id: A str representing the application ID. - target_txid: The transaction id that we want to check for validness. - entity_key: The entity that the transaction operates over. - Returns: - A long containing the latest valid transaction id, or zero if there is - none. - Raises: - ZKInternalException: If we couldn't get a valid transaction ID. - """ - # If this is an ongoing transaction give the previous value. - try: - if self.is_in_transaction(app_id, target_txid): - key_list = self.get_updated_key_list(app_id, target_txid) - for (key, txn_id) in key_list: - if entity_key == key: - return long(txn_id) - except ZKTransactionException, zk_exception: - # If the transaction is blacklisted. - # Get the valid id. - vtxpath = self.get_valid_transaction_path(app_id, entity_key) - try: - return long(self.run_with_retry(self.handle.get, vtxpath)[0]) - except kazoo.exceptions.NoNodeError: - # Blacklisted and without a valid ID. - return long(0) - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - raise ZKInternalException("Couldn't get valid transaction id for " \ - "app {0}, target txid {1}, entity key {2}".format(app_id, target_txid, - entity_key)) - - # The given target ID is not blacklisted or in an ongoing transaction. - return target_txid - - def notify_failed_transaction(self, app_id, txid): - """ Marks the given transaction as failed, invalidating its use by future - callers. - - This function also cleans up successful transactions that have expired. - - Args: - app_id: The application ID whose transaction we wish to invalidate. - txid: An int representing the transaction ID we wish to invalidate. - Returns: - True if the transaction was invalidated, False otherwise. - """ - self.logger.debug('notify_failed_trasnsaction: app={}, txid={}' - .format(app_id, txid)) - - lockpath = None - lock_list = [] - - txpath = self.get_transaction_path(app_id, txid) - try: - lockpath = self.run_with_retry(self.handle.get, - PATH_SEPARATOR.join([txpath, TX_LOCK_PATH]))[0] - lock_list = lockpath.split(LOCK_LIST_SEPARATOR) - except kazoo.exceptions.NoNodeError: - # There is no need to rollback because there is no lock. - self.logger.debug('There is no lock for transaction {}'.format(txid)) - pass - except kazoo.exceptions.ZookeeperError as zoo_exception: - self.logger.exception(zoo_exception) - return False - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - return False - - try: - if lock_list: - children = [] - try: - children = self.run_with_retry(self.handle.get_children, txpath) - except kazoo.exceptions.NoNodeError: - pass - - # Copy valid transaction ID for each updated key into valid list. - for child in children: - if re.match("^" + TX_UPDATEDKEY_PREFIX, child): - value = self.run_with_retry(self.handle.get, - PATH_SEPARATOR.join([txpath, child]))[0] - valuelist = value.split(PATH_SEPARATOR) - key = urllib.unquote_plus(valuelist[0]) - vid = valuelist[1] - vtxroot = self.get_valid_transaction_root_path(app_id) - - if not self.run_with_retry(self.handle.exists, vtxroot): - self.run_with_retry(self.handle.create, vtxroot, DEFAULT_VAL, - ZOO_ACL_OPEN, False, False, True) - vtxpath = self.get_valid_transaction_path(app_id, key) - self.run_with_retry(self.handle.create_async, vtxpath, str(vid), - ZOO_ACL_OPEN) - - # Release the locks. - for lock in lock_list: - try: - self.run_with_retry(self.handle.delete, lock) - except kazoo.exceptions.NoNodeError: - # Try to delete all nodes, so skip any failure to release a lock. - pass - - if self.is_xg(app_id, txid): - try: - self.run_with_retry(self.handle.delete, self.get_xg_path(app_id, - txid)) - except kazoo.exceptions.NoNodeError: - self.logger.error( - 'No node error when trying to remove {0}'.format(txid)) - - # Remove the transaction paths. - for item in self.run_with_retry(self.handle.get_children, txpath): - try: - self.run_with_retry(self.handle.delete, - PATH_SEPARATOR.join([txpath, item])) - except kazoo.exceptions.NoNodeError: - self.logger.error( - 'No node error when trying to remove {}'.format(txid)) - - self.logger.debug( - 'Notify failed transaction removing lock: {}'.format(txpath)) - self.run_with_retry(self.handle.delete, txpath) - - except ZKInternalException as zk_exception: - self.logger.exception(zk_exception) - return False - except kazoo.exceptions.ZookeeperError as zk_exception: - self.logger.exception(zk_exception) - return False - except kazoo.exceptions.KazooException as kazoo_exception: - self.logger.exception(kazoo_exception) - return False - - return True - - def get_lock_with_path(self, path): - """ Tries to get the lock based on path. - - Args: - path: A str, the lock path. - Returns: - True if the lock was obtained, False otherwise. - """ - try: - now = str(time.time()) - self.run_with_retry(self.handle.create, path, value=now, - acl=ZOO_ACL_OPEN, ephemeral=True) - except kazoo.exceptions.NoNodeError: - self.logger.error('Unable to create {}'.format(path)) - return False - except kazoo.exceptions.NodeExistsError: - return False - except (kazoo.exceptions.SystemZookeeperError, ZookeeperError, - KazooException, SystemError): - self.logger.exception('Unable to create {}'.format(path)) - return False - except Exception: - self.logger.exception('Unknown exception') - return False - - return True - - def release_lock_with_path(self, path): - """ Releases lock based on path. - - Args: - path: A str, the lock path. - Returns: - True on success, False on system failures. - Raises: - ZKTransactionException: If the lock could not be released. - """ - try: - self.run_with_retry(self.handle.delete, path) - except kazoo.exceptions.NoNodeError: - raise ZKTransactionException('Unable to delete lock: {0}'.format(path)) - except (kazoo.exceptions.SystemZookeeperError, KazooException, - SystemError): - self.logger.exception('Unable to delete lock: {0}'.format(path)) - return False - except Exception: - self.logger.exception('Unknown exception') - return False - return True diff --git a/AppDB/setup.py b/AppDB/setup.py index 2d20846395..d0365c07b5 100644 --- a/AppDB/setup.py +++ b/AppDB/setup.py @@ -11,7 +11,6 @@ platforms='Posix', install_requires=[ 'appscale-common', - 'cassandra-driver<3.18.0', 'kazoo', 'monotonic', 'mmh3', @@ -30,30 +29,14 @@ namespace_packages=['appscale'], packages=['appscale', 'appscale.datastore', - 'appscale.datastore.cassandra_env', 'appscale.datastore.fdb', 'appscale.datastore.fdb.stats', - 'appscale.datastore.scripts', - 'appscale.datastore.zkappscale'], + 'appscale.datastore.scripts'], entry_points={'console_scripts': [ 'appscale-blobstore-server=appscale.datastore.scripts.blobstore:main', - 'appscale-data-layout=appscale.datastore.scripts.data_layout:main', 'appscale-datastore=appscale.datastore.scripts.datastore:main', - 'appscale-delete-all-records=' - 'appscale.datastore.scripts.delete_records:main', - 'appscale-get-token=appscale.datastore.cassandra_env.get_token:main', - 'appscale-groomer=appscale.datastore.groomer:main', - 'appscale-groomer-service=appscale.datastore.scripts.groomer_service:main', - 'appscale-prime-cassandra=appscale.datastore.scripts.prime_cassandra:main', - 'appscale-rebalance=appscale.datastore.cassandra_env.rebalance:main', - 'appscale-transaction-groomer=' - 'appscale.datastore.scripts.transaction_groomer:main', 'appscale-uaserver=appscale.datastore.scripts.ua_server:main', 'appscale-uaserver-backup=appscale.datastore.scripts.ua_server_backup:main', 'appscale-uaserver-restore=appscale.datastore.scripts.ua_server_restore:main', - 'appscale-update-index=appscale.datastore.scripts.update_index:main', - 'appscale-upgrade-schema=appscale.datastore.scripts.upgrade_schema:main', - 'appscale-view-all-records=appscale.datastore.scripts.view_records:main', - ]}, - package_data={'appscale.datastore.cassandra_env': ['templates/*']} + ]} ) diff --git a/AppDB/test/e2e/test_queries.py b/AppDB/test/e2e/test_queries.py index e20282abe7..cf7c1e3ba1 100644 --- a/AppDB/test/e2e/test_queries.py +++ b/AppDB/test/e2e/test_queries.py @@ -134,19 +134,3 @@ def tear_down_helper(self): yield self.datastore.delete(batch) batch = [] yield self.datastore.delete(batch) - - @gen_test - def test_cassandra_page_size(self): - entity_count = self.CASSANDRA_PAGE_SIZE + 1 - batch = [] - for _ in range(entity_count): - entity = Entity('Greeting', _app=PROJECT_ID) - batch.append(entity) - if len(batch) == self.BATCH_SIZE: - yield self.datastore.put_multi(batch) - batch = [] - yield self.datastore.put_multi(batch) - - query = Query('Greeting', _app=PROJECT_ID) - results = yield self.datastore.run_query(query) - self.assertEqual(len(results), entity_count) diff --git a/AppDB/test/unit/test_cassandra_interface.py b/AppDB/test/unit/test_cassandra_interface.py deleted file mode 100644 index cd2f24fe7d..0000000000 --- a/AppDB/test/unit/test_cassandra_interface.py +++ /dev/null @@ -1,167 +0,0 @@ -import unittest - -from mock import mock -from tornado import testing -from tornado.concurrent import Future - -from appscale.common import file_io -from appscale.datastore.cassandra_env import cassandra_interface - - -class TestCassandra(testing.AsyncTestCase): - - def setUp(self, *args, **kwargs): - super(TestCassandra, self).setUp(*args, **kwargs) - # Prepare patchers - self.read_patcher = mock.patch.object(file_io, 'read') - self.execute_patcher = mock.patch.object( - cassandra_interface.TornadoCassandra, 'execute') - self.cluster_class_patcher = mock.patch.object( - cassandra_interface, 'Cluster') - - # Start patches - self.read_mock = self.read_patcher.start() - self.execute_mock = self.execute_patcher.start() - self.cluster_class_mock = self.cluster_class_patcher.start() - - # Configure mocks - self.read_mock.return_value = '127.0.0.1' - self.session_mock = mock.MagicMock() - self.connect_mock = mock.MagicMock(return_value=self.session_mock) - self.cluster_mock = mock.MagicMock(connect=self.connect_mock) - self.cluster_class_mock.return_value = self.cluster_mock - - # Instantiate Datastore proxy - self.db = cassandra_interface.DatastoreProxy() - - def tearDown(self, *args, **kwargs): - super(TestCassandra, self).tearDown(*args, **kwargs) - self.read_patcher.stop() - self.execute_patcher.stop() - self.cluster_class_patcher.stop() - - @testing.gen_test - def test_get(self): - # Mock cassandra response - async_response = Future() - async_response.set_result([ - ('a', 'c1', '1'), ('a', 'c2', '2'), ('a', 'c3', '3'), - ('b', 'c1', '4'), ('b', 'c2', '5'), ('b', 'c3', '6'), - ('c', 'c1', '7'), ('c', 'c2', '8'), ('c', 'c3', '9'), - ]) - self.execute_mock.return_value = async_response - - # Call function under test - keys = ['a', 'b', 'c'] - columns = ['c1', 'c2', 'c3'] - result = yield self.db.batch_get_entity('table', keys, columns) - - # Make sure cassandra interface prepared good query - query = self.execute_mock.call_args[0][0] - parameters = self.execute_mock.call_args[1]["parameters"] - self.assertEqual( - query.query_string, - 'SELECT * FROM "table" WHERE key IN %s and column1 IN %s') - self.assertEqual(parameters, ([b'a', b'b', b'c'], ['c1', 'c2', 'c3']) ) - # And result matches expectation - self.assertEqual(result, { - 'a': {'c1': '1', 'c2': '2', 'c3': '3'}, - 'b': {'c1': '4', 'c2': '5', 'c3': '6'}, - 'c': {'c1': '7', 'c2': '8', 'c3': '9'} - }) - - @testing.gen_test - def test_put(self): - # Mock execute function response - async_response = Future() - async_response.set_result(None) - self.execute_mock.return_value = async_response - # Mock prepare method of session - self.session_mock.prepare = mock.MagicMock( - side_effect=lambda query_str: mock.MagicMock(argument=query_str)) - - # Call function under test - keys = ['a', 'b', 'c'] - columns = ['c1', 'c2', 'c3'] - cell_values = { - 'a': {'c1': '1', 'c2': '2', 'c3': '3'}, - 'b': {'c1': '4', 'c2': '5', 'c3': '6'}, - 'c': {'c1': '7', 'c2': '8', 'c3': '9'} - } - result = yield self.db.batch_put_entity('tableX', keys, columns, cell_values) - - # Make sure cassandra interface prepared good queries - self.assertEqual(len(self.execute_mock.call_args_list), 9) - calls_iterator = iter(self.execute_mock.call_args_list) - expected_params_iterator = iter([ - ('a', 'c1', '1'), ('a', 'c2', '2'), ('a', 'c3', '3'), - ('b', 'c1', '4'), ('b', 'c2', '5'), ('b', 'c3', '6'), - ('c', 'c1', '7'), ('c', 'c2', '8'), ('c', 'c3', '9') - ]) - for expected_params in expected_params_iterator: - call = next(calls_iterator) - prepare_argument = call[0][0] - parameters = call[1]["parameters"] - self.assertEqual( - prepare_argument.argument, - 'INSERT INTO "tableX" (key, column1, value) VALUES (?, ?, ?)' - ) - self.assertEqual(parameters, expected_params) - # And result matches expectation - self.assertEqual(result, None) - - @testing.gen_test - def test_delete_table(self): - # Mock cassandra response - async_response = Future() - async_response.set_result(None) - self.execute_mock.return_value = async_response - - # Call function under test - result = yield self.db.delete_table('tableY') - - # Make sure cassandra interface prepared good query - query = self.execute_mock.call_args[0][0] - self.assertEqual(query.query_string, 'DROP TABLE IF EXISTS "tableY"') - self.assertEqual(len(self.execute_mock.call_args[0]), 1) # 1 positional arg - self.assertEqual(self.execute_mock.call_args[1], {}) # no kwargs - # And result matches expectation - self.assertEqual(result, None) - - @testing.gen_test - def test_range_query(self): - # Mock cassandra response - async_response = Future() - async_response.set_result([ - ('keyA', 'c1', '1'), ('keyA', 'c2', '2'), - ('keyB', 'c1', '4'), ('keyB', 'c2', '5'), - ('keyC', 'c1', '7'), ('keyC', 'c2', '8') - ]) - self.execute_mock.return_value = async_response - - # Call function under test - columns = ['c1', 'c2'] - result = yield self.db.range_query("tableZ", columns, "keyA", "keyC", 5) - - # Make sure cassandra interface prepared good query - query = self.execute_mock.call_args[0][0] - parameters = self.execute_mock.call_args[1]["parameters"] - self.assertEqual( - query.query_string, - 'SELECT * FROM "tableZ" WHERE ' - 'token(key) >= %s AND ' - 'token(key) <= %s AND ' - 'column1 IN %s ' - 'LIMIT 10 ' # 5 * number of columns - 'ALLOW FILTERING') - self.assertEqual(parameters, (b'keyA', b'keyC', ['c1', 'c2']) ) - # And result matches expectation - self.assertEqual(result, [ - {'keyA': {'c1': '1', 'c2': '2'}}, - {'keyB': {'c1': '4', 'c2': '5'}}, - {'keyC': {'c1': '7', 'c2': '8'}} - ]) - - -if __name__ == "__main__": - unittest.main() diff --git a/AppDB/test/unit/test_cassandra_prime.py b/AppDB/test/unit/test_cassandra_prime.py deleted file mode 100644 index 7e56b3a43c..0000000000 --- a/AppDB/test/unit/test_cassandra_prime.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python2 - -import unittest - -from appscale.common import appscale_info -from appscale.datastore import dbconstants -from cassandra.cluster import Cluster -from appscale.datastore.cassandra_env import schema -from flexmock import flexmock - - -class TestCassandraPrimer(unittest.TestCase): - def test_define_ua_schema(self): - session = flexmock(execute=lambda statement, values: None) - schema.define_ua_schema(session) - - def test_prime_cassandra(self): - self.assertRaises(TypeError, schema.prime_cassandra, '1') - self.assertRaises( - dbconstants.AppScaleBadArg, schema.prime_cassandra, 0) - - flexmock(appscale_info).should_receive('get_db_ips').\ - and_return(['127.0.0.1']) - - session = flexmock(execute=lambda: None, set_keyspace=lambda: None) - flexmock(Cluster).should_receive('connect').and_return(session) - flexmock(schema).should_receive('define_ua_schema') diff --git a/AppDB/test/unit/test_datastore_server.py b/AppDB/test/unit/test_datastore_server.py deleted file mode 100644 index 4ea9bd8d38..0000000000 --- a/AppDB/test/unit/test_datastore_server.py +++ /dev/null @@ -1,1070 +0,0 @@ -#!/usr/bin/env python -# Programmer: Navraj Chohan - -import datetime -import random -import sys -import unittest - -from tornado import gen, testing - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore import dbconstants -from appscale.datastore import utils -from appscale.datastore.datastore_distributed import DatastoreDistributed -from appscale.datastore.dbconstants import APP_ENTITY_SCHEMA -from appscale.datastore.dbconstants import JOURNAL_SCHEMA -from appscale.datastore.dbconstants import TOMBSTONE -from appscale.datastore.cassandra_env.entity_id_allocator import\ - ScatteredAllocator - -from appscale.datastore.cassandra_env.utils import deletions_for_entity -from appscale.datastore.cassandra_env.utils import index_deletions -from appscale.datastore.cassandra_env.utils import mutations_for_entity - -from appscale.datastore.utils import ( - encode_index_pb, - get_entity_key, - get_entity_kind, - get_index_key_from_params, - get_index_kv_from_tuple, - get_kind_key -) - -from appscale.datastore.zkappscale.entity_lock import EntityLock -from appscale.datastore.zkappscale.zktransaction import ZKTransactionException -from flexmock import flexmock - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.datastore import entity_pb -from google.appengine.datastore import datastore_pb -from google.appengine.ext import db - - -class Item(db.Model): - name = db.StringProperty(required = True) - - -ASYNC_NONE = gen.Future() -ASYNC_NONE.set_result(None) - - -class TestDatastoreServer(testing.AsyncTestCase): - """ - A set of test cases for the datastore server (datastore server v2) - """ - BASIC_ENTITY = ['guestbook', 'Greeting', 'foo', 'content', 'hello world'] - - def setUp(self, *args, **kwargs): - super(TestDatastoreServer, self).setUp(*args, **kwargs) - - - - def get_zookeeper(self): - zk_handle = flexmock(handler=flexmock(event_object=lambda: None, - sleep_func=lambda: None, - lock_object=lambda: None)) - zk_handle.should_receive('add_listener') - zookeeper = flexmock(handle=zk_handle) - zookeeper.should_receive("acquire_lock").and_return(True) - zookeeper.should_receive("release_lock").and_return(True) - zookeeper.should_receive("increment_and_get_counter").and_return(0,1000) - return zookeeper - - def test_get_entity_kind(self): - item = Item(name="Bob", _app="hello") - key = db.model_to_protobuf(item) - self.assertEquals(get_entity_kind(key), "Item") - - def test_kind_key(self): - item = Item(name="Dyan", _app="hello") - key = db.model_to_protobuf(item) - self.assertEquals(get_kind_key("howdy", key.key().path()), "howdy\x00Item\x01Item:0000000000\x01") - - item1 = Item(key_name="Bob", name="Bob", _app="hello") - key = db.model_to_protobuf(item1) - self.assertEquals(get_kind_key("howdy", key.key().path()), "howdy\x00Item\x01Item:Bob\x01") - - item2 = Item(key_name="Frank", name="Frank", _app="hello", parent = item1) - key = db.model_to_protobuf(item2) - self.assertEquals(get_kind_key("howdy", key.key().path()), - "howdy\x00Item\x01Item:Bob\x01Item:Frank\x01") - - def test_get_entity_key(self): - item = Item(key_name="Bob", name="Bob", _app="hello") - key = db.model_to_protobuf(item) - self.assertEquals(str(get_entity_key("howdy", key.key().path())), "howdy\x00Item:Bob\x01") - - def test_validate_key(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - item = Item(key_name="Bob", name="Bob", _app="hello") - key = db.model_to_protobuf(item) - dd.validate_key(key.key()) - - def test_get_table_prefix(self): - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - item = Item(key_name="Bob", name="Bob", _app="hello") - key = db.model_to_protobuf(item) - self.assertEquals(dd.get_table_prefix(key), "hello\x00") - - def test_get_index_key_from_params(self): - params = ['a','b','c','d','e'] - self.assertEquals(get_index_key_from_params(params), - "a\x00b\x00c\x00d\x00e") - - def test_get_index_kv_from_tuple(self): - item1 = Item(key_name="Bob", name="Bob", _app="hello") - item2 = Item(key_name="Sally", name="Sally", _app="hello") - key1 = db.model_to_protobuf(item1) - key2 = db.model_to_protobuf(item2) - tuples_list = [("a\x00b",key1),("a\x00b",key2)] - self.assertEquals(get_index_kv_from_tuple( - tuples_list), (['a\x00b\x00Item\x00name\x00\x9aBob\x01\x01\x00Item:Bob\x01', - 'a\x00b\x00Item:Bob\x01'], - ['a\x00b\x00Item\x00name\x00\x9aSally\x01\x01\x00Item:Sally\x01', - 'a\x00b\x00Item:Sally\x01'])) - - def test_get_composite_index_key(self): - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd = flexmock(dd) - - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(123) - composite_index.set_app_id("appid") - - definition = composite_index.mutable_definition() - definition.set_entity_type("kind") - - prop1 = definition.add_property() - prop1.set_name("prop1") - prop1.set_direction(1) # ascending - prop2 = definition.add_property() - prop2.set_name("prop2") - prop1.set_direction(1) # ascending - - ent = self.get_new_entity_proto("appid", "kind", "entity_name", "prop1", "value", ns="") - - self.assertEquals( - dd.get_composite_index_key(composite_index, ent), - "appid\x00\x00123\x00\x9avalue\x01\x01\x00\x00kind:entity_name\x01") - - @testing.gen_test - def test_delete_composite_index_metadata(self): - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_delete").and_return(ASYNC_NONE) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd = flexmock(dd) - dd.index_manager = flexmock( - projects={'appid': flexmock(delete_index_definition=lambda id_: None)}) - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(1) - yield dd.delete_composite_index_metadata("appid", composite_index) - - @testing.gen_test - def test_create_composite_index(self): - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_put_entity").and_return(ASYNC_NONE) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - zk_handle = flexmock(ensure_path=lambda path: None, - get=lambda path: (None, flexmock(version=None)), - set=lambda path, value, version: None) - dd.zookeeper.handle = zk_handle - dd = flexmock(dd) - index = entity_pb.CompositeIndex() - index.set_app_id("appid") - index.set_state(2) - definition = index.mutable_definition() - definition.set_entity_type("kind") - definition.set_ancestor(0) - prop1 = definition.add_property() - prop1.set_name("prop1") - prop1.set_direction(1) # ascending - prop2 = definition.add_property() - prop2.set_name("prop2") - prop1.set_direction(1) # ascending - - index_id = yield dd.create_composite_index("appid", index) - assert index_id > 0 - - @testing.gen_test - def test_insert_composite_indexes(self): - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(123) - composite_index.set_app_id("appid") - - definition = composite_index.mutable_definition() - definition.set_entity_type("kind") - - prop1 = definition.add_property() - prop1.set_name("prop1") - prop1.set_direction(1) # ascending - prop2 = definition.add_property() - prop2.set_name("prop2") - prop1.set_direction(1) # ascending - - ent = self.get_new_entity_proto( - "appid", "kind", "entity_name", "prop1", "value", ns="") - - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_put_entity").and_return(ASYNC_NONE).once() - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - yield dd.insert_composite_indexes([ent], [composite_index]) - - @testing.gen_test - def test_fetch_keys(self): - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "bob", "prop1name", "prop1val", ns="blah") - async_get_1 = gen.Future() - async_get_1.set_result({ - 'test\x00blah\x00test_kind:bob\x01': { - APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - }) - async_get_2 = gen.Future() - async_get_2.set_result({ - 'test\x00blah\x00test_kind:bob\x01\x000000000002': { - JOURNAL_SCHEMA[0]: entity_proto1.Encode() - }}) - - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_delete").and_return(ASYNC_NONE) - db_batch.should_receive("batch_put_entity").and_return(ASYNC_NONE) - db_batch.should_receive("batch_get_entity").\ - and_return(async_get_1).\ - and_return(async_get_2) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("acquire_lock").and_return(True) - zookeeper.should_receive("release_lock").and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - - fetched = yield dd.fetch_keys([entity_proto1.key()]) - self.assertEquals(fetched[0], { - 'test\x00blah\x00test_kind:bob\x01': { - 'txnID': 1, 'entity': entity_proto1.Encode() - } - }) - self.assertEqual(fetched[1], ['test\x00blah\x00test_kind:bob\x01']) - - def test_rollback_transcation(self): - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("release_lock").and_return(True) - zookeeper.should_receive("notify_failed_transaction").and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - commit_request = datastore_pb.Transaction() - commit_request.set_handle(123) - commit_request.set_app("aaa") - self.assertEquals( - dd.rollback_transaction("app_id", commit_request.handle()), None) - - @staticmethod - def get_new_entity_proto(app_id, kind, entity_name, prop_name, prop_value, ns=""): - entity_proto = datastore_pb.EntityProto() - - reference = entity_proto.mutable_key() - reference.set_app(app_id) - reference.set_name_space(ns) - - path = reference.mutable_path() - element = path.add_element() - element.set_type(kind) - element.set_name(entity_name) - - ent_group = entity_proto.mutable_entity_group() - eg_element = ent_group.add_element() - eg_element.set_type(kind) - eg_element.set_name(entity_name) - - prop = entity_proto.add_property() - prop.set_meaning(datastore_pb.Property.BYTESTRING) - prop.set_name(prop_name) - prop.set_multiple(1) - val = prop.mutable_value() - val.set_stringvalue(prop_value) - return entity_proto - - @testing.gen_test - def test_dynamic_put(self): - db_batch = flexmock(session=flexmock()) - db_batch.should_receive('valid_data_version_sync').and_return(True) - - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "bob", "prop1name", "prop1val", ns="blah") - entity_key1 = 'test\x00blah\x00test_kind:bob\x01' - entity_proto2 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop2val", ns="blah") - entity_key2 = 'test\x00blah\x00test_kind:nancy\x01' - async_result = gen.Future() - async_result.set_result({entity_key1: {}, entity_key2: {}}) - - db_batch.should_receive('batch_get_entity').and_return(async_result) - db_batch.should_receive('normal_batch').and_return(ASYNC_NONE) - transaction_manager = flexmock( - create_transaction_id=lambda project, xg: 1, - delete_transaction_id=lambda project, txid: None, - set_groups=lambda project, txid, groups: None) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={'test': flexmock(indexes_pb=[])}) - putreq_pb = datastore_pb.PutRequest() - putreq_pb.add_entity() - putreq_pb.mutable_entity(0).MergeFrom(entity_proto1) - putreq_pb.add_entity() - putreq_pb.mutable_entity(1).MergeFrom(entity_proto2) - - putresp_pb = datastore_pb.PutResponse() - - async_true = gen.Future() - async_true.set_result(True) - entity_lock = flexmock(EntityLock) - entity_lock.should_receive('acquire').and_return(async_true) - entity_lock.should_receive('release') - - flexmock(ScatteredAllocator).should_receive('next').\ - and_return(random.randint(1, 500)) - - yield dd.dynamic_put('test', putreq_pb, putresp_pb) - self.assertEquals(len(putresp_pb.key_list()), 2) - - @testing.gen_test - def test_put_entities(self): - app_id = 'test' - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - entity_proto1 = self.get_new_entity_proto( - app_id, "test_kind", "bob", "prop1name", "prop1val", ns="blah") - entity_key1 = 'test\x00blah\x00test_kind:bob\x01' - entity_proto2 = self.get_new_entity_proto( - app_id, "test_kind", "nancy", "prop1name", "prop2val", ns="blah") - entity_key2 = 'test\x00blah\x00test_kind:nancy\x01' - entity_list = [entity_proto1, entity_proto2] - async_result = gen.Future() - async_result.set_result({entity_key1: {}, entity_key2: {}}) - - db_batch.should_receive('batch_get_entity').and_return(async_result) - db_batch.should_receive('normal_batch').and_return(ASYNC_NONE) - transaction_manager = flexmock( - create_transaction_id=lambda project, xg: 1, - delete_transaction_id=lambda project, txid: None, - set_groups=lambda project, txid, groups: None) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={app_id: flexmock(indexes_pb=[])}) - - async_true = gen.Future() - async_true.set_result(True) - entity_lock = flexmock(EntityLock) - entity_lock.should_receive('acquire').and_return(async_true) - entity_lock.should_receive('release') - - yield dd.put_entities(app_id, entity_list) - - def test_acquire_locks_for_trans(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - flexmock(dd).should_receive("is_instance_wrapper").and_return(False).once() - self.assertRaises(TypeError, dd.acquire_locks_for_trans, [1], 1) - - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - flexmock(dd).should_receive("is_instance_wrapper").and_return(True) \ - .and_return(False).and_return(False) - self.assertRaises(TypeError, dd.acquire_locks_for_trans, [1], 1) - - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - flexmock(dd).should_receive("is_instance_wrapper").and_return(True) \ - .and_return(True) - - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - flexmock(dd).should_receive("is_instance_wrapper").and_return(True) \ - .and_return(True).and_return(False) - flexmock(dd).should_receive("get_table_prefix").and_return("prefix").never() - flexmock(dd).should_receive("get_root_key_from_entity_key").and_return("rootkey").never() - self.assertEquals({}, dd.acquire_locks_for_trans([], 1)) - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("acquire_lock").once() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - entity = flexmock() - entity.should_receive("app").and_return("appid") - flexmock(dd).should_receive("is_instance_wrapper").and_return(True) \ - .and_return(True).and_return(True) - flexmock(dd).should_receive("get_root_key_from_entity_key").and_return("rootkey").once() - self.assertEquals({'rootkey':1}, dd.acquire_locks_for_trans([entity], 1)) - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("acquire_lock").once().and_raise(ZKTransactionException) - zookeeper.should_receive("notify_failed_transaction").once() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - entity = flexmock() - entity.should_receive("app").and_return("appid") - flexmock(dd).should_receive("is_instance_wrapper").and_return(True) \ - .and_return(True).and_return(True) - flexmock(dd).should_receive("get_root_key_from_entity_key").and_return("rootkey").once() - self.assertRaises(ZKTransactionException, dd.acquire_locks_for_trans, [entity], 1) - - @testing.gen_test - def test_delete_entities(self): - app_id = 'test' - entity_proto1 = self.get_new_entity_proto( - app_id, "test_kind", "bob", "prop1name", "prop1val", ns="blah") - row_key = "test\x00blah\x00test_kind:bob\x01" - row_values = {row_key: {APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: '1'}} - async_result = gen.Future() - async_result.set_result(row_values) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("get_valid_transaction_id").and_return(1) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_get_entity").and_return(async_result) - db_batch.should_receive('normal_batch').and_return(ASYNC_NONE) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - - row_keys = [entity_proto1.key()] - - yield dd.delete_entities(entity_proto1.key(), 1, row_keys) - - def test_release_put_locks_for_nontrans(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("get_valid_transaction_id").and_return(1) - zookeeper.should_receive("release_lock").and_return(True) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "bob", "prop1name", "prop1val", ns="blah") - entity_proto2 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop2val", ns="blah") - entities = [entity_proto1, entity_proto2] - dd.release_locks_for_nontrans( - "test", entities, { - 'test\x00blah\x00test_kind:bob\x01': 1, - 'test\x00blah\x00test_kind:nancy\x01': 2 - }) - - def test_root_key_from_entity_key(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - self.assertEquals( - "test\x00blah\x00test_kind:bob\x01", - dd.get_root_key_from_entity_key("test\x00blah\x00test_kind:bob\x01nancy\x01") - ) - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop2val", ns="blah") - self.assertEquals( - "test\x00blah\x00test_kind:nancy\x01", - dd.get_root_key_from_entity_key(entity_proto1.key()) - ) - - @testing.gen_test - def test_dynamic_get(self): - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop2val", ns="blah") - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - async_result = gen.Future() - async_result.set_result({ - "test\x00blah\x00test_kind:nancy\x01": { - APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - }) - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("get_valid_transaction_id").and_return(1) - zookeeper.should_receive("acquire_lock").and_return(True) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_get_entity").and_return(async_result) - db_batch.should_receive('record_reads').and_return(ASYNC_NONE) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - - entity_key = entity_proto1.key() - get_req = datastore_pb.GetRequest() - key = get_req.add_key() - key.MergeFrom(entity_key) - get_resp = datastore_pb.GetResponse() - - yield dd.dynamic_get("test", get_req, get_resp) - self.assertEquals(get_resp.entity_size(), 1) - - # Now test while in a transaction - get_resp = datastore_pb.GetResponse() - get_req.mutable_transaction().set_handle(1) - yield dd.dynamic_get("test", get_req, get_resp) - self.assertEquals(get_resp.entity_size(), 1) - - @testing.gen_test - def test_ancestor_query(self): - query = datastore_pb.Query() - ancestor = query.mutable_ancestor() - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop1val", ns="blah") - entity_key = entity_proto1.key() - get_req = datastore_pb.GetRequest() - key = get_req.add_key() - key.MergeFrom(entity_key) - ancestor.MergeFrom(entity_key) - - async_result = gen.Future() - async_result.set_result({ - "test\x00blah\x00test_kind:nancy\x01": { - APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - }) - - filter_info = [] - tombstone1 = {'key': {APP_ENTITY_SCHEMA[0]:TOMBSTONE, APP_ENTITY_SCHEMA[1]: 1}} - db_batch = flexmock() - db_batch.should_receive('record_reads').and_return(ASYNC_NONE) - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_get_entity").and_return(async_result) - - entity_proto1 = { - 'test\x00blah\x00test_kind:nancy\x01': { - APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - } - async_result_1 = gen.Future() - async_result_1.set_result([entity_proto1, tombstone1]) - async_result_2 = gen.Future() - async_result_2.set_result([]) - - db_batch.should_receive("range_query").\ - and_return(async_result_1).\ - and_return(async_result_2) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("get_valid_transaction_id").and_return(1) - zookeeper.should_receive("acquire_lock").and_return(True) - zookeeper.should_receive("is_in_transaction").and_return(False) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - yield dd.ancestor_query(query, filter_info) - # Now with a transaction - transaction = query.mutable_transaction() - transaction.set_handle(2) - yield dd.ancestor_query(query, filter_info) - - @testing.gen_test - def test_kindless_query(self): - query = datastore_pb.Query() - entity_proto1 = self.get_new_entity_proto( - "test", "test_kind", "nancy", "prop1name", "prop1val", ns="blah") - entity_key = entity_proto1.key() - get_req = datastore_pb.GetRequest() - key = get_req.add_key() - key.MergeFrom(entity_key) - - async_result = gen.Future() - async_result.set_result({ - "test\x00blah\x00test_kind:nancy\x01": { - APP_ENTITY_SCHEMA[0]: entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - }) - - tombstone1 = {'key': {APP_ENTITY_SCHEMA[0]:TOMBSTONE, APP_ENTITY_SCHEMA[1]: 1}} - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive("batch_get_entity").and_return(async_result) - - entity_proto1 = { - 'test\x00blah\x00test_kind:nancy\x01': { - APP_ENTITY_SCHEMA[0]:entity_proto1.Encode(), - APP_ENTITY_SCHEMA[1]: 1 - } - } - async_result_1 = gen.Future() - async_result_1.set_result([entity_proto1, tombstone1]) - async_result_2 = gen.Future() - async_result_2.set_result([]) - db_batch.should_receive("range_query").\ - and_return(async_result_1).\ - and_return(async_result_2) - - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - zookeeper.should_receive("get_valid_transaction_id").and_return(1) - zookeeper.should_receive("is_in_transaction").and_return(False) - zookeeper.should_receive("acquire_lock").and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - filter_info = { - '__key__' : [[0, 0]] - } - yield dd.kindless_query(query, filter_info) - - @testing.gen_test - def test_dynamic_delete(self): - async_true = gen.Future() - async_true.set_result(True) - entity_lock = flexmock(EntityLock) - entity_lock.should_receive('acquire').and_return(async_true) - entity_lock.should_receive('release') - - del_request = flexmock() - del_request.should_receive("key_list") - del_request.should_receive("has_transaction").never() - del_request.should_receive("transaction").never() - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - transaction_manager = flexmock( - create_transaction_id=lambda project, xg: 1, - delete_transaction_id=lambda project, txid: None, - set_groups=lambda project_id, txid, groups: None) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={'guestbook': flexmock(indexes_pb=[])}) - yield dd.dynamic_delete("appid", del_request) - - fake_key = entity_pb.Reference() - fake_key.set_app('foo') - path = fake_key.mutable_path() - element = path.add_element() - element.set_type('bar') - element.set_id(1) - - del_request = flexmock() - del_request.should_receive("key_list").and_return([fake_key]) - del_request.should_receive("has_transaction").and_return(True) - transaction = flexmock() - transaction.should_receive("handle").and_return(1) - del_request.should_receive("transaction").and_return(transaction) - del_request.should_receive("has_mark_changes").and_return(False) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={'appid': flexmock(indexes_pb=[])}) - flexmock(utils).should_receive("get_entity_kind").and_return("kind") - db_batch.should_receive('delete_entities_tx').and_return(ASYNC_NONE) - yield dd.dynamic_delete("appid", del_request) - - del_request = flexmock() - del_request.should_receive("key_list").and_return([fake_key]) - del_request.should_receive("has_transaction").and_return(False) - del_request.should_receive("has_mark_changes").and_return(False) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={'appid': flexmock(indexes_pb=[])}) - flexmock(dd).should_receive("delete_entities").and_return(ASYNC_NONE).once() - yield dd.dynamic_delete("appid", del_request) - - def test_reverse_path(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - key = "Project:Synapse\x01Module:Core\x01" - self.assertEquals(dd.reverse_path(key), "Module:Core\x01Project:Synapse\x01") - - def test_remove_exists_filters(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - self.assertEquals(dd.remove_exists_filters({}), {}) - - filter_info = { - "prop1": [(datastore_pb.Query_Filter.EQUAL, "1")], - "prop2": [(datastore_pb.Query_Filter.EQUAL, "2")] - } - self.assertEquals(dd.remove_exists_filters(filter_info), filter_info) - - filter_info = { - "prop1": [(datastore_pb.Query_Filter.EXISTS, "1")], - "prop2": [(datastore_pb.Query_Filter.EXISTS, "2")] - } - self.assertEquals(dd.remove_exists_filters(filter_info), {}) - - def test_is_zigzag_merge_join(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - query = datastore_pb.Query() - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - self.assertEquals(dd.is_zigzag_merge_join(query, {}, {}), False) - filter_info = { - "prop1": [(datastore_pb.Query_Filter.EQUAL, "1")], - "prop2": [(datastore_pb.Query_Filter.EQUAL, "2")] - } - self.assertEquals(dd.is_zigzag_merge_join(query, filter_info, []), True) - - filter_info = { - "prop1": [(datastore_pb.Query_Filter.EQUAL, "1")] - } - self.assertEquals(dd.is_zigzag_merge_join(query, filter_info, []), False) - - @testing.gen_test - def test_zigzag_merge_join(self): - zk_client = flexmock() - zk_client.should_receive('add_listener') - - zookeeper = flexmock(handle=zk_client) - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - - query = datastore_pb.Query() - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, zookeeper) - flexmock(dd).should_receive("is_zigzag_merge_join").and_return(False) - result = yield dd.zigzag_merge_join(None, None, None) - self.assertEquals(result, (None, False)) - - filter_info = { - "prop1": [(datastore_pb.Query_Filter.EQUAL, "1")], - "prop2": [(datastore_pb.Query_Filter.EQUAL, "2")] - } - result = yield dd.zigzag_merge_join(query, filter_info, []) - self.assertEquals(result, (None, False)) - - def test_index_deletions(self): - old_entity = self.get_new_entity_proto(*self.BASIC_ENTITY) - - # No deletions should occur when the entity doesn't change. - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - self.assertListEqual([], index_deletions(old_entity, old_entity)) - - # When a property changes, the previous index entries should be deleted. - new_entity = entity_pb.EntityProto() - new_entity.MergeFrom(old_entity) - new_entity.property_list()[0].value().set_stringvalue('updated content') - - deletions = index_deletions(old_entity, new_entity) - self.assertEqual(len(deletions), 2) - self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(deletions[1]['table'], dbconstants.DSC_PROPERTY_TABLE) - - prop = old_entity.add_property() - prop.set_name('author') - value = prop.mutable_value() - value.set_stringvalue('author1') - - prop = new_entity.add_property() - prop.set_name('author') - value = prop.mutable_value() - value.set_stringvalue('author1') - - # When given an index, an entry should be removed from the composite table. - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(123) - composite_index.set_app_id('guestbook') - definition = composite_index.mutable_definition() - definition.set_entity_type('Greeting') - prop1 = definition.add_property() - prop1.set_name('content') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - prop2 = definition.add_property() - prop2.set_name('author') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - deletions = index_deletions(old_entity, new_entity, (composite_index,)) - self.assertEqual(len(deletions), 3) - self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(deletions[1]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(deletions[2]['table'], dbconstants.COMPOSITE_TABLE) - - # No composite deletions should occur when the entity type differs. - definition.set_entity_type('TestEntity') - deletions = index_deletions(old_entity, new_entity, (composite_index,)) - self.assertEqual(len(deletions), 2) - - def test_deletions_for_entity(self): - entity = self.get_new_entity_proto(*self.BASIC_ENTITY) - - # Deleting an entity with one property should remove four entries. - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - deletions = deletions_for_entity(entity) - self.assertEqual(len(deletions), 4) - self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(deletions[1]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(deletions[2]['table'], dbconstants.APP_ENTITY_TABLE) - self.assertEqual(deletions[3]['table'], dbconstants.APP_KIND_TABLE) - - prop = entity.add_property() - prop.set_name('author') - value = prop.mutable_value() - value.set_stringvalue('author1') - - # Deleting an entity with two properties and one composite index should - # remove seven entries. - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(123) - composite_index.set_app_id('guestbook') - definition = composite_index.mutable_definition() - definition.set_entity_type('Greeting') - prop1 = definition.add_property() - prop1.set_name('content') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - prop2 = definition.add_property() - prop2.set_name('author') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - deletions = deletions_for_entity(entity, (composite_index,)) - self.assertEqual(len(deletions), 7) - self.assertEqual(deletions[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(deletions[1]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(deletions[2]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(deletions[3]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(deletions[4]['table'], dbconstants.COMPOSITE_TABLE) - self.assertEqual(deletions[5]['table'], dbconstants.APP_ENTITY_TABLE) - self.assertEqual(deletions[6]['table'], dbconstants.APP_KIND_TABLE) - - def test_mutations_for_entity(self): - entity = self.get_new_entity_proto(*self.BASIC_ENTITY) - txn = 1 - - # Adding an entity with one property should add four entries. - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - mutations = mutations_for_entity(entity, txn) - self.assertEqual(len(mutations), 4) - self.assertEqual(mutations[0]['table'], dbconstants.APP_ENTITY_TABLE) - self.assertEqual(mutations[1]['table'], dbconstants.APP_KIND_TABLE) - self.assertEqual(mutations[2]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[3]['table'], dbconstants.DSC_PROPERTY_TABLE) - - # Updating an entity with one property should delete two entries and add - # four more. - new_entity = entity_pb.EntityProto() - new_entity.MergeFrom(entity) - new_entity.property_list()[0].value().set_stringvalue('updated content') - mutations = mutations_for_entity(entity, txn, new_entity) - self.assertEqual(len(mutations), 6) - self.assertEqual(mutations[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[0]['operation'], dbconstants.Operations.DELETE) - self.assertEqual(mutations[1]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(mutations[1]['operation'], dbconstants.Operations.DELETE) - self.assertEqual(mutations[2]['table'], dbconstants.APP_ENTITY_TABLE) - self.assertEqual(mutations[3]['table'], dbconstants.APP_KIND_TABLE) - self.assertEqual(mutations[4]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[5]['table'], dbconstants.DSC_PROPERTY_TABLE) - - prop = entity.add_property() - prop.set_name('author') - prop.set_multiple(0) - value = prop.mutable_value() - value.set_stringvalue('author1') - - prop = new_entity.add_property() - prop.set_name('author') - prop.set_multiple(0) - value = prop.mutable_value() - value.set_stringvalue('author1') - - # Updating one property of an entity with two properties and one composite - # index should remove three entries and add seven more. - composite_index = entity_pb.CompositeIndex() - composite_index.set_id(123) - composite_index.set_app_id('guestbook') - definition = composite_index.mutable_definition() - definition.set_entity_type('Greeting') - prop1 = definition.add_property() - prop1.set_name('content') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - prop2 = definition.add_property() - prop2.set_name('author') - prop1.set_direction(datastore_pb.Query_Order.ASCENDING) - - mutations = mutations_for_entity(entity, txn, new_entity, - (composite_index,)) - self.assertEqual(len(mutations), 10) - self.assertEqual(mutations[0]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[0]['operation'], dbconstants.Operations.DELETE) - self.assertEqual(mutations[1]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(mutations[1]['operation'], dbconstants.Operations.DELETE) - self.assertEqual(mutations[2]['table'], dbconstants.COMPOSITE_TABLE) - self.assertEqual(mutations[2]['operation'], dbconstants.Operations.DELETE) - self.assertEqual(mutations[3]['table'], dbconstants.APP_ENTITY_TABLE) - self.assertEqual(mutations[4]['table'], dbconstants.APP_KIND_TABLE) - self.assertEqual(mutations[5]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[6]['table'], dbconstants.ASC_PROPERTY_TABLE) - self.assertEqual(mutations[7]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(mutations[8]['table'], dbconstants.DSC_PROPERTY_TABLE) - self.assertEqual(mutations[9]['table'], dbconstants.COMPOSITE_TABLE) - - @testing.gen_test - def test_apply_txn_changes(self): - app = 'guestbook' - txn = 1 - entity = self.get_new_entity_proto(app, *self.BASIC_ENTITY[1:]) - - async_metadata = gen.Future() - async_metadata.set_result({ - 'puts': {entity.key().Encode(): entity.Encode()}, - 'deletes': [], - 'tasks': [], - 'reads': set(), - 'start': datetime.datetime.utcnow(), - 'is_xg': False, - }) - - db_batch = flexmock() - db_batch.should_receive('get_transaction_metadata').\ - and_return(async_metadata) - db_batch.should_receive('valid_data_version_sync').and_return(True) - db_batch.should_receive('group_updates').and_return([]) - - transaction_manager = flexmock( - delete_transaction_id=lambda project_id, txid: None, - set_groups=lambda project_id, txid, groups: None) - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - dd.index_manager = flexmock( - projects={'guestbook': flexmock(indexes_pb=[])}) - prefix = dd.get_table_prefix(entity) - entity_key = get_entity_key(prefix, entity.key().path()) - - async_result = gen.Future() - async_result.set_result({entity_key: {}}) - - db_batch.should_receive('batch_get_entity').and_return(async_result) - db_batch.should_receive('normal_batch').and_return(ASYNC_NONE) - - async_true = gen.Future() - async_true.set_result(True) - entity_lock = flexmock(EntityLock) - entity_lock.should_receive('acquire').and_return(async_true) - entity_lock.should_receive('release') - - yield dd.apply_txn_changes(app, txn) - - def test_extract_entities_from_composite_indexes(self): - project_id = 'guestbook' - props = ['prop1', 'prop2'] - db_batch = flexmock() - db_batch.should_receive('valid_data_version_sync').and_return(True) - transaction_manager = flexmock() - dd = DatastoreDistributed(db_batch, transaction_manager, - self.get_zookeeper()) - query = datastore_pb.Query() - for prop_name in props: - query.add_property_name(prop_name) - - index = query.add_composite_index() - definition = index.mutable_definition() - for prop_name in props: - prop = definition.add_property() - prop.set_name(prop_name) - - entity_id = 1524699263329044 - val1 = entity_pb.PropertyValue() - val1.set_int64value(5) - val2 = entity_pb.PropertyValue() - val2.set_stringvalue('test') - index_key = '\x00'.join( - [project_id, 'namespace', 'index1', str(encode_index_pb(val1)), - str(encode_index_pb(val2)), 'Greeting:{}\x01'.format(entity_id)]) - - index_results = [{index_key: {'reference': 'ignored-ref'}}] - entities = dd._extract_entities_from_composite_indexes( - query, index_results, index) - self.assertEqual(len(entities), 1) - returned_entity = entity_pb.EntityProto(entities[0]) - self.assertEqual(returned_entity.property_size(), 2) - self.assertEqual(returned_entity.key().path().element(0).type(), 'Greeting') - self.assertEqual(returned_entity.key().path().element(0).id(), entity_id) - self.assertEqual(returned_entity.property(0).name(), 'prop1') - self.assertEqual(returned_entity.property(0).value().int64value(), 5) - self.assertEqual(returned_entity.property(1).name(), 'prop2') - self.assertEqual(returned_entity.property(1).value().stringvalue(), 'test') - -if __name__ == "__main__": - unittest.main() diff --git a/AppDB/test/unit/test_entity_utils.py b/AppDB/test/unit/test_entity_utils.py deleted file mode 100644 index 6b4ee32bc0..0000000000 --- a/AppDB/test/unit/test_entity_utils.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python - -""" Unit tests for entity_utils.py """ - -import sys -import unittest - -from tornado import gen, testing - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore import entity_utils -from flexmock import flexmock - -sys.path.append(APPSCALE_PYTHON_APPSERVER) - - -class FakeDatastore(object): - def __init__(self): - pass - - @gen.coroutine - def batch_get_entity(self, table, keys, schema): - raise gen.Return({}) - - @staticmethod - def batch_get_entity_sync(table, keys, schema): - return {} - -FAKE_SERIALIZED_ENTITY = \ - {'guestbook27\x00\x00Guestbook:default_guestbook\x01Greeting:1\x01': - { - 'txnID': '1', - 'entity': 'j@j\x0bguestbook27r1\x0b\x12\tGuestbook"\x11default_guestbook\x0c\x0b\x12\x08Greeting\x18\xaa\xe7\xfb\x18\x0cr=\x1a\x06author \x00*1CJ\x07a@a.comR\tgmail.com\x90\x01\x00\x9a\x01\x15120912168209190119424Dr\x15\x08\x07\x1a\x04date \x00*\t\x08\xf6\xfc\xd2\x92\xa4\xa3\xc3\x02z\x17\x08\x0f\x1a\x07content \x00*\x08\x1a\x06111111\x82\x01 \x0b\x12\tGuestbook"\x11default_guestbook\x0c' - } - } - - -class TestEntityUtils(testing.AsyncTestCase): - """ - A set of test cases for the datastore backup thread. - """ - def test_get_root_key_from_entity_key(self): - self.assertEquals("hi/bye\x01", entity_utils.\ - get_root_key_from_entity_key("hi/bye\x01otherstuff\x01moar")) - - self.assertEquals("hi/\x01", entity_utils.\ - get_root_key_from_entity_key("hi/\x01otherstuff\x01moar")) - - def test_get_kind_from_entity(self): - self.assertEquals("some", entity_utils.\ - get_kind_from_entity_key("hi\x00bye\x00some\x00other\x00stuff")) - - # Test empty namespace (very common). - self.assertEquals("some", entity_utils.\ - get_kind_from_entity_key("hi\x00\x00some\x00other\x00stuff")) - - def test_fetch_journal_entry(self): - future_response = gen.Future() - future_response.set_result({}) - flexmock(FakeDatastore()).should_receive('batch_get_entity').\ - and_return(future_response) - - result = entity_utils.fetch_journal_entry(FakeDatastore(), 'key') - self.assertEquals(result, None) - - -if __name__ == "__main__": - unittest.main() diff --git a/AppDB/test/unit/test_groomer.py b/AppDB/test/unit/test_groomer.py deleted file mode 100644 index 039f9713fc..0000000000 --- a/AppDB/test/unit/test_groomer.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python -# Programmer: Navraj Chohan - -import datetime -import sys -import unittest - -from appscale.common.unpackaged import APPSCALE_PYTHON_APPSERVER -from appscale.datastore import appscale_datastore_batch -from appscale.datastore import dbconstants -from appscale.datastore import entity_utils -from appscale.datastore import groomer -from appscale.datastore import utils -from flexmock import flexmock - -sys.path.append(APPSCALE_PYTHON_APPSERVER) -from google.appengine.api import apiproxy_stub_map -from google.appengine.api import datastore_distributed -from google.appengine.api import datastore_errors -from google.appengine.ext import db -from google.appengine.datastore import entity_pb - - -class FakeQuery(): - def __init__(self): - pass - def run(self): - return [FakeEntity()] - def fetch(self, number): - return [FakeEntity()] - - -class FakeDatastore(): - def __init__(self): - pass - def range_query(self, table, schema, start, end, batch_size, - start_inclusive=True, end_inclusive=True): - return [] - def batch_delete(self, table, row_keys): - raise dbconstants.AppScaleDBConnectionError("Bad connection") - - -class FakeDistributedDB(): - def __init__(self): - pass - def Query(self, model_class="kind", namespace=''): - return FakeQuery() - - -class FakeReference(): - def __init__(self): - pass - def app(self): - return "app_id" - def name_space(self): - return "namespace" - - -class FakeEntity(): - def __init__(self): - pass - def ParseFromString(self, ent_str): - pass - def kind(self): - return 'kind' - def key(self): - return FakeReference() - def delete(self): - raise Exception() - def put(self): - raise Exception() - def key(self): - return FakeReference() - def query(self): - return FakeQuery() - - -class TestGroomer(unittest.TestCase): - """ - A set of test cases for the datastore groomer service. - """ - def test_init(self): - zookeeper = flexmock() - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - - def test_get_groomer_lock(self): - zookeeper = flexmock() - zookeeper.should_receive("get_lock_with_path").and_return(True) - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - self.assertEquals(True, dsg.get_groomer_lock()) - - def test_hard_delete_row(self): - zookeeper = flexmock() - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.db_access = FakeDatastore() - self.assertEquals(False, dsg.hard_delete_row("some_key")) - - def test_get_root_key_from_entity_key(self): - self.assertEquals("hi/bye\x01", entity_utils.\ - get_root_key_from_entity_key("hi/bye\x01otherstuff\x01moar")) - - self.assertEquals("hi/\x01", entity_utils.\ - get_root_key_from_entity_key("hi/\x01otherstuff\x01moar")) - - def test_run_groomer(self): - zookeeper = flexmock() - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.should_receive("get_entity_batch").and_return([]) - dsg.should_receive("process_entity") - dsg.should_receive("update_statistics").and_raise(Exception) - dsg.should_receive("remove_old_logs").and_return() - dsg.should_receive("remove_old_tasks_entities").and_return() - ds_factory = flexmock(appscale_datastore_batch.DatastoreFactory) - ds_factory.should_receive("getDatastore").and_return(FakeDatastore()) - self.assertRaises(Exception, dsg.run_groomer) - - def test_process_entity(self): - zookeeper = flexmock() - flexmock(entity_pb).should_receive('EntityProto').and_return(FakeEntity()) - - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.should_receive('process_statistics') - self.assertEquals(True, - dsg.process_entity({'key':{dbconstants.APP_ENTITY_SCHEMA[0]:'ent', - dbconstants.APP_ENTITY_SCHEMA[1]:'version'}})) - - def test_process_statistics(self): - zookeeper = flexmock() - flexmock(utils).should_receive("get_entity_kind").and_return("kind") - - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.stats['app_id'] = {'kind': {'size': 0, 'number': 0}} - - # This one gets ignored - dsg.should_receive("initialize_kind") - self.assertEquals(True, dsg.process_statistics("key", FakeEntity(), 1)) - self.assertEquals(dsg.stats, {'app_id':{'kind':{'size':1, 'number':1}}}) - self.assertEquals(True, dsg.process_statistics("key", FakeEntity(), 1)) - self.assertEquals(dsg.stats, {'app_id':{'kind':{'size':2, 'number':2}}}) - - def test_initialize_kind(self): - zookeeper = flexmock() - flexmock(entity_pb).should_receive('EntityProto').and_return(FakeEntity()) - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.initialize_kind('app_id', 'kind') - self.assertEquals(dsg.stats, {'app_id': {'kind': {'size': 0, 'number': 0}}}) - - def test_txn_blacklist_cleanup(self): - #TODO - pass - - def test_stop(self): - #TODO - pass - - def test_remove_old_statistics(self): - zookeeper = flexmock() - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.should_receive("register_db_accessor").and_return(FakeDistributedDB()) - dsg.stats['app_id'] = {'kind': {'size': 0, 'number': 0}} - dsg.stats['app_id1'] = {'kind': {'size': 0, 'number': 0}} - self.assertRaises(Exception, dsg.remove_old_statistics) - - def test_update_statistics(self): - zookeeper = flexmock() - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg = flexmock(dsg) - dsg.should_receive("register_db_accessor").and_return(FakeDistributedDB()) - dsg.should_receive("create_global_stat_entry").and_return(True) - dsg.should_receive("create_kind_stat_entry") - dsg.stats['app_id'] = {'kind': {'size': 0, 'number': 0}} - dsg.stats['app_id1'] = {'kind': {'size': 0, 'number': 0}} - - dsg.update_statistics(datetime.datetime.now()) - dsg.should_receive("create_kind_stat_entry").\ - and_raise(datastore_errors.BadRequestError) - # BadRequestErrors should be ignored when generating stats because they are - # expected for undeployed projects and it's better to continue generating - # stats for other projects rather than stopping altogether. - dsg.update_statistics(datetime.datetime.now()) - - def test_reset_statistics(self): - zookeeper = flexmock() - flexmock(entity_pb).should_receive('EntityProto').and_return(FakeEntity()) - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - dsg.reset_statistics() - self.assertEquals(dsg.stats, {}) - - def test_register_db_accessor(self): - zookeeper = flexmock() - fake_ds = FakeDatastore() - flexmock(datastore_distributed).should_receive('DatastoreDistributed').\ - and_return(fake_ds) - flexmock(apiproxy_stub_map.apiproxy).should_receive('RegisterStub') - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - self.assertEquals(fake_ds, dsg.register_db_accessor("app_id")) - - def test_create_kind_stat_entry(self): - zookeeper = flexmock() - stats = flexmock(db.stats) - stats.should_receive("GlobalStat").and_return(FakeEntity()) - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - self.assertRaises(Exception, dsg.create_kind_stat_entry, "kind", 0, 0, 0) - - def test_create_global_stat_entry(self): - zookeeper = flexmock() - stats = flexmock(db.stats) - stats.should_receive("KindStat").and_return(FakeEntity()) - dsg = groomer.DatastoreGroomer(zookeeper, "cassandra", "localhost:8888") - self.assertRaises(Exception, dsg.create_kind_stat_entry, 0, 0, 0) - - -if __name__ == "__main__": - unittest.main() diff --git a/AppDB/test/unit/test_transaction_manager.py b/AppDB/test/unit/test_transaction_manager.py deleted file mode 100644 index 5740b076c4..0000000000 --- a/AppDB/test/unit/test_transaction_manager.py +++ /dev/null @@ -1,107 +0,0 @@ -import unittest - -from mock import ANY -from mock import call -from mock import MagicMock - -from appscale.datastore.zkappscale.transaction_manager import ( - ProjectTransactionManager) - - -class TestDatastoreServer(unittest.TestCase): - def test_create_transaction_id(self): - project_id = 'guestbook' - project_node = '/appscale/apps/{}'.format(project_id) - - zk_client = MagicMock() - tx_manager = ProjectTransactionManager(project_id, zk_client) - - # Ensure the first created node is ignored. - created_nodes = ['{}/txids/tx0000000000'.format(project_node), - '{}/txids/tx0000000001'.format(project_node)] - zk_client.create = MagicMock(side_effect=created_nodes) - self.assertEqual(tx_manager.create_transaction_id(xg=False), 1) - calls = [ - call('{}/txids/tx'.format(project_node), value=ANY, sequence=True), - call('{}/txids/tx'.format(project_node), value=ANY, sequence=True)] - zk_client.create.assert_has_calls(calls) - - # Ensure the manual offset works. - tx_manager._txid_manual_offset = 10 - created_nodes = ['{}/txids/tx0000000015'.format(project_node)] - zk_client.create = MagicMock(side_effect=created_nodes) - self.assertEqual(tx_manager.create_transaction_id(xg=False), 25) - calls = [ - call('{}/txids/tx'.format(project_node), value=ANY, sequence=True)] - zk_client.create.assert_has_calls(calls) - tx_manager._txid_manual_offset = 0 - - # Ensure the automatic rollover works. - created_nodes = ['{}/txids/tx-2147483647'.format(project_node), - '{}/txids2'.format(project_node), - '{}/txids2/tx0000000000'.format(project_node)] - zk_client.create = MagicMock(side_effect=created_nodes) - zk_client.get_children = MagicMock(return_value=['txids', 'txids2']) - self.assertEqual(tx_manager.create_transaction_id(xg=False), 2147483648) - calls = [ - call('{}/txids/tx'.format(project_node), value=ANY, sequence=True), - call('{}/txids2'.format(project_node)), - call('{}/txids2/tx'.format(project_node), value=ANY, sequence=True)] - zk_client.create.assert_has_calls(calls) - - def test_delete_transaction_id(self): - project_id = 'guestbook' - project_node = '/appscale/apps/{}'.format(project_id) - - zk_client = MagicMock() - tx_manager = ProjectTransactionManager(project_id, zk_client) - - # A small transaction ID should be located in the first bucket. - tx_manager._delete_counter = MagicMock() - tx_manager.delete_transaction_id(5) - tx_manager._delete_counter.assert_called_with( - '{}/txids/tx0000000005'.format(project_node)) - - # Transactions above the max counter value should be in a different bucket. - tx_manager._delete_counter = MagicMock() - tx_manager.delete_transaction_id(2147483649) - tx_manager._delete_counter.assert_called_with( - '{}/txids2/tx0000000001'.format(project_node)) - - # Offset transactions should be corrected. - tx_manager._txid_manual_offset = 2 ** 31 - tx_manager._delete_counter = MagicMock() - tx_manager.delete_transaction_id(2 ** 31 * 2) - tx_manager._delete_counter.assert_called_with( - '{}/txids2/tx0000000000'.format(project_node)) - tx_manager._txid_manual_offset = 0 - - def test_get_open_transactions(self): - project_id = 'guestbook' - project_node = '/appscale/apps/{}'.format(project_id) - - zk_client = MagicMock() - tx_manager = ProjectTransactionManager(project_id, zk_client) - - # Counters in multiple active buckets should be used. - active_buckets = ('{}/txids'.format(project_node), - '{}/txids2'.format(project_node)) - tx_manager._active_containers = MagicMock(return_value=active_buckets) - zk_responses = [['{}/txids/tx2147483646'.format(project_node), - '{}/txids/tx2147483647'.format(project_node)], - ['{}/txids2/tx0000000000'.format(project_node), - '{}/txids2/tx0000000001'.format(project_node)]] - zk_client.get_children = MagicMock(side_effect=zk_responses) - open_txids = [2147483646, 2147483647, 2147483648, 2147483649] - self.assertListEqual(tx_manager.get_open_transactions(), open_txids) - - # A manual offset should affect the list of open transactions. - tx_manager._txid_manual_offset = 10 - active_buckets = ('{}/txids'.format(project_node),) - tx_manager._active_containers = MagicMock(return_value=active_buckets) - zk_response = ['{}/txids/tx0000000001'.format(project_node), - '{}/txids/tx0000000002'.format(project_node)] - zk_client.get_children = MagicMock(return_value=zk_response) - open_txids = [11, 12] - self.assertListEqual(tx_manager.get_open_transactions(), open_txids) - tx_manager._txid_manual_offset = 0 diff --git a/AppDB/test/unit/test_zookeeper.py b/AppDB/test/unit/test_zookeeper.py deleted file mode 100644 index 13912aba35..0000000000 --- a/AppDB/test/unit/test_zookeeper.py +++ /dev/null @@ -1,400 +0,0 @@ -#!/usr/bin/env python -# Programmer: Navraj Chohan - -import kazoo.client -import kazoo.exceptions -import kazoo.protocol -import kazoo.protocol.states -import unittest - -from appscale.datastore.dbconstants import MAX_GROUPS_FOR_XG -from appscale.datastore.zkappscale import zktransaction as zk -from appscale.datastore.zkappscale.zktransaction import ZKTransactionException -from appscale.datastore.zkappscale.inspectable_counter import \ - InspectableCounter -from flexmock import flexmock - - -class TestZookeeperTransaction(unittest.TestCase): - """ - """ - - def setUp(self): - self.appid = 'appid' - self.handle = None - - def test_increment_and_get_counter(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_transaction_prefix_path').with_args( - self.appid).and_return('/rootpath') - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', create='create', - delete_async='delete_async', connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').and_return(None) - - flexmock(InspectableCounter).should_receive('__add__').and_return(1) - - # assert, make sure we got back our id - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals((0, 1), transaction.increment_and_get_counter( - self.appid, 1)) - - def test_create_sequence_node(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_transaction_prefix_path').with_args( - self.appid).and_return('/rootpath') - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', create='create', - delete='delete', connected=lambda: True) - fake_zookeeper.should_receive('start') - - # mock out zookeeper.create for txn id - path_to_create = "/rootpath/" + self.appid - zero_path = path_to_create + "/0" - nonzero_path = path_to_create + "/1" - - fake_zookeeper.should_receive('retry').with_args('create', str, value=str, - acl=None, makepath=bool, sequence=bool, ephemeral=bool).\ - and_return(zero_path).and_return(nonzero_path) - - # mock out deleting the zero id we get the first time around - fake_zookeeper.should_receive('retry').with_args('delete', zero_path) - - # assert, make sure we got back our id - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(1, transaction.create_sequence_node('/rootpath/' + \ - self.appid, 'now')) - - def test_create_node(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_transaction_prefix_path').with_args( - self.appid).and_return('/rootpath') - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', create='create', - connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('create', str, value=str, - acl=None, makepath=bool, sequence=bool, ephemeral=bool) - - # mock out zookeeper.create for txn id - path_to_create = "/rootpath/" + self.appid - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(None, transaction.create_node('/rootpath/' + self.appid, - 'now')) - - def test_get_txn_path_before_getting_id(self): - # mock out initializing a ZK connection - flexmock(zk.ZKTransaction) - - fake_zookeeper = flexmock(name='fake_zoo') - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry') - - zk.ZKTransaction.should_receive('get_app_root_path').and_return("app_root_path") - - expected = zk.PATH_SEPARATOR.join(["app_root_path", zk.APP_TX_PATH, zk.APP_TX_PREFIX]) - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(expected, - transaction.get_txn_path_before_getting_id(self.appid)) - - def test_get_xg_path(self): - # mock out initializing a ZK connection - flexmock(zk.ZKTransaction) - - fake_zookeeper = flexmock(name='fake_zoo') - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry') - - tx_id = 100 - tx_str = zk.APP_TX_PREFIX + "%010d" % tx_id - zk.ZKTransaction.should_receive('get_app_root_path') \ - .and_return("app_root_path") - - expected = zk.PATH_SEPARATOR.join(["app_root_path", zk.APP_TX_PATH, - tx_str, zk.XG_PREFIX]) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(expected, transaction.get_xg_path("xxx", 100)) - - def test_is_in_transaction(self): - # shared mocks - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_transaction_path') \ - .and_return('/transaction/path') - - fake_zookeeper = flexmock(name='fake_zoo', exists='exists', - connected=lambda: True) - fake_zookeeper.should_receive('start') - - # test when the transaction is running - zk.ZKTransaction.should_receive('is_blacklisted').and_return(False) - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(True) - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.is_in_transaction(self.appid, 1)) - - # and when it's not - zk.ZKTransaction.should_receive('is_blacklisted').and_return(False) - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(False) - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(False, transaction.is_in_transaction(self.appid, 1)) - - # and when it's blacklisted - zk.ZKTransaction.should_receive('is_blacklisted').and_return(True) - self.assertRaises(zk.ZKTransactionException, transaction.is_in_transaction, - self.appid, 1) - - def test_acquire_lock(self): - # mock out waitForConnect - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_lock_root_path').\ - and_return('/lock/root/path') - zk.ZKTransaction.should_receive('get_transaction_prefix_path').\ - and_return('/rootpath/' + self.appid) - fake_zookeeper = flexmock(name='fake_zoo', get='get', - connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry') - - # first, test out getting a lock for a regular transaction, that we don't - # already have the lock for - zk.ZKTransaction.should_receive('is_in_transaction').and_return(False) - zk.ZKTransaction.should_receive('acquire_additional_lock').and_return(True) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.acquire_lock(self.appid, "txid", - "somekey")) - - # next, test when we're in a transaction and we already have the lock - zk.ZKTransaction.should_receive('is_in_transaction').and_return(True) - zk.ZKTransaction.should_receive('get_transaction_lock_list_path').\ - and_return('/rootpath/' + self.appid + "/tx1") - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return(['/lock/root/path']) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.acquire_lock(self.appid, "txid", - "somekey")) - - # next, test when we're in a non-XG transaction and we're not in the lock - # root path - zk.ZKTransaction.should_receive('is_in_transaction').and_return(True) - zk.ZKTransaction.should_receive('get_transaction_lock_list_path').\ - and_return('/rootpath/' + self.appid + "/tx1") - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return(['/lock/root/path2']) - zk.ZKTransaction.should_receive('is_xg').and_return(False) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertRaises(zk.ZKTransactionException, transaction.acquire_lock, - self.appid, "txid", "somekey") - - # next, test when we're in a XG transaction and we're not in the lock - # root path - zk.ZKTransaction.should_receive('is_in_transaction').and_return(True) - zk.ZKTransaction.should_receive('get_transaction_lock_list_path').\ - and_return('/rootpath/' + self.appid + "/tx1") - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return(['/lock/root/path2']) - zk.ZKTransaction.should_receive('is_xg').and_return(True) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.acquire_lock(self.appid, "txid", - "somekey")) - - def test_acquire_additional_lock(self): - # mock out waitForConnect - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('check_transaction') - zk.ZKTransaction.should_receive('get_transaction_path').\ - and_return('/txn/path') - zk.ZKTransaction.should_receive('get_lock_root_path').\ - and_return('/lock/root/path') - zk.ZKTransaction.should_receive('get_transaction_prefix_path').\ - and_return('/rootpath/' + self.appid) - - fake_zookeeper = flexmock(name='fake_zoo', create='create', - create_async='create_async', get='get', set_async='set_async', - connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('create', str, makepath=bool, sequence=bool, - ephemeral=bool, value=str, acl=None).and_return("/some/lock/path") - fake_zookeeper.should_receive('retry').with_args('create_async', str, value=str, - acl=None, ephemeral=bool, makepath=bool, sequence=bool) - fake_zookeeper.should_receive('retry').with_args('create_async', str, value=str, - acl=str, ephemeral=bool, makepath=bool, sequence=bool) - lock_list = ['path1', 'path2', 'path3'] - lock_list_str = zk.LOCK_LIST_SEPARATOR.join(lock_list) - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return([lock_list_str]) - fake_zookeeper.should_receive('retry').with_args('set_async', str, str) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.acquire_additional_lock(self.appid, - "txid", "somekey", False)) - - # Test for when we want to create a new ZK node for the lock path - self.assertEquals(True, transaction.acquire_additional_lock(self.appid, - "txid", "somekey", True)) - - # Test for existing max groups - lock_list = ['path' + str(num+1) for num in range(MAX_GROUPS_FOR_XG)] - lock_list_str = zk.LOCK_LIST_SEPARATOR.join(lock_list) - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return([lock_list_str]) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertRaises(zk.ZKTransactionException, - transaction.acquire_additional_lock, self.appid, "txid", "somekey", False) - - # Test for when there is a node which already exists. - fake_zookeeper.should_receive('retry').with_args('create', str, str, None, - bool, bool, bool).and_raise(kazoo.exceptions.NodeExistsError) - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertRaises(zk.ZKTransactionException, - transaction.acquire_additional_lock, self.appid, "txid", "somekey", False) - - def test_check_transaction(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_transaction_prefix_path').with_args( - self.appid).and_return('/rootpath') - zk.ZKTransaction.should_receive('is_blacklisted').and_return(False) - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', exists='exists', - connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(True) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.check_transaction(self.appid, 1)) - - # Check to make sure it raises exception for blacklisted transactions. - zk.ZKTransaction.should_receive('is_blacklisted').and_return(True) - self.assertRaises(zk.ZKTransactionException, transaction.check_transaction, - self.appid, 1) - - zk.ZKTransaction.should_receive('is_blacklisted').and_return(False) - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(False) - self.assertRaises(zk.ZKTransactionException, transaction.check_transaction, - self.appid, 1) - - def test_is_xg(self): - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', exists='exists', - connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(True) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.is_xg(self.appid, 1)) - - def test_release_lock(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('check_transaction') - zk.ZKTransaction.should_receive('get_transaction_path').\ - and_return('/rootpath') - zk.ZKTransaction.should_receive('get_transaction_lock_list_path').\ - and_return('/rootpath') - zk.ZKTransaction.should_receive('is_xg').and_return(False) - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', exists='exists', get='get', - delete='delete', delete_async='delete_async', - get_children='get_children', connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(True) - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_return(['/1/2/3']) - fake_zookeeper.should_receive('retry').with_args('delete_async', str) - fake_zookeeper.should_receive('retry').with_args('delete', str) - fake_zookeeper.should_receive('retry').with_args('get_children', str) \ - .and_return(['1','2']) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.release_lock(self.appid, 1)) - - zk.ZKTransaction.should_receive('is_xg').and_return(True) - self.assertEquals(True, transaction.release_lock(self.appid, 1)) - - # Check to make sure it raises exception for blacklisted transactions. - zk.ZKTransaction.should_receive('is_xg').and_return(False) - fake_zookeeper.should_receive('retry').with_args('get', str) \ - .and_raise(kazoo.exceptions.NoNodeError) - self.assertRaises(zk.ZKTransactionException, transaction.release_lock, - self.appid, 1) - - - def test_is_blacklisted(self): - # mock out getTransactionRootPath - flexmock(zk.ZKTransaction) - zk.ZKTransaction.should_receive('get_blacklist_root_path').\ - and_return("bl_root_path") - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', create='create', exists='exists', - get_children='get_children', connected=lambda: True) - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('create', str, str, None, - bool, bool, bool).and_return() - fake_zookeeper.should_receive('retry').with_args('exists', str) \ - .and_return(True) - fake_zookeeper.should_receive('retry').with_args('get_children', str) \ - .and_return(['1','2']) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.is_blacklisted(self.appid, 1)) - - def test_notify_failed_transaction(self): - pass - #TODO - - def test_get_lock_with_path(self): - flexmock(zk.ZKTransaction) - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', create='create') - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('create', str, value=str, - acl=None, ephemeral=bool).and_return(True) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.get_lock_with_path('path')) - - fake_zookeeper.should_receive('retry').with_args('create', str, value=str, - acl=None, ephemeral=bool).and_raise(kazoo.exceptions.NodeExistsError) - self.assertEquals(False, transaction.get_lock_with_path('some/path')) - - def test_release_lock_with_path(self): - flexmock(zk.ZKTransaction) - - # mock out initializing a ZK connection - fake_zookeeper = flexmock(name='fake_zoo', delete='delete') - fake_zookeeper.should_receive('start') - fake_zookeeper.should_receive('retry').with_args('delete', str) - - transaction = zk.ZKTransaction(zk_client=fake_zookeeper) - self.assertEquals(True, transaction.release_lock_with_path('some/path')) - - fake_zookeeper.should_receive('retry').with_args('delete', str). \ - and_raise(kazoo.exceptions.NoNodeError) - self.assertRaises(ZKTransactionException, - transaction.release_lock_with_path, 'some/path') - - -if __name__ == "__main__": - unittest.main() From 793ba756d02c3da8acaa98e7a5c5ba6895f8e239 Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 31 Jan 2020 16:20:38 -0800 Subject: [PATCH 217/221] AdminServer remove cassandra support --- AdminServer/appscale/admin/service_manager.py | 1 - AdminServer/tests/test_service_manager.py | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/AdminServer/appscale/admin/service_manager.py b/AdminServer/appscale/admin/service_manager.py index 52a4a36f23..126b3f9145 100644 --- a/AdminServer/appscale/admin/service_manager.py +++ b/AdminServer/appscale/admin/service_manager.py @@ -222,7 +222,6 @@ def datastore_start_cmd(port, assignment_options): A list of command line arguments. """ start_cmd = ['appscale-datastore', - '--type', assignment_options.get('backend', 'cassandra'), '--port', str(port)] if assignment_options.get('verbose'): start_cmd.append('--verbose') diff --git a/AdminServer/tests/test_service_manager.py b/AdminServer/tests/test_service_manager.py index 6339bb3f49..e65746f0d2 100644 --- a/AdminServer/tests/test_service_manager.py +++ b/AdminServer/tests/test_service_manager.py @@ -47,16 +47,14 @@ def test_start(self, popen_mock, http_client_mock): http_client_mock.return_value = MagicMock(fetch=fake_fetch) yield datastore_server.start() - cmd = ['appscale-datastore', - '--type', 'cassandra', '--port', '4000', '--verbose'] + cmd = ['appscale-datastore', '--port', '4000', '--verbose'] self.assertEqual(popen_mock.call_count, 1) self.assertEqual(popen_mock.call_args[0][0], cmd) @patch.object(psutil, 'Process') def test_from_pid(self, process_mock): # Test that the server attributes are parsed correctly. - cmd = ['python', 'appscale-datastore', - '--type', 'cassandra', '--port', '4000'] + cmd = ['python', 'appscale-datastore', '--port', '4000'] process_mock.return_value = MagicMock(cmdline=MagicMock(return_value=cmd)) server = ServerManager.from_pid(10000, datastore_service) From a0febe6bb9ca518226d099a2a821b3b22bf0875c Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Fri, 31 Jan 2020 16:21:06 -0800 Subject: [PATCH 218/221] Install / systemd remove cassandra support --- debian/appscale_install.sh | 4 +-- debian/appscale_install_functions.sh | 37 +------------------------ debian/makedeb_all.sh | 1 - debian/postinst.cassandra | 25 ----------------- system/units/appscale-cassandra.service | 26 ----------------- 5 files changed, 2 insertions(+), 91 deletions(-) delete mode 100644 debian/postinst.cassandra delete mode 100644 system/units/appscale-cassandra.service diff --git a/debian/appscale_install.sh b/debian/appscale_install.sh index 3552ec3402..99461c646d 100755 --- a/debian/appscale_install.sh +++ b/debian/appscale_install.sh @@ -28,7 +28,7 @@ fi case "$1" in # At this time we cannot simply install pieces of AppScale, and the # space saving is minimal. So we install all the components. - all|core|cassandra) + all|core) # Scratch install of appscale including post script. installappscaleprofile . /etc/profile.d/appscale.sh @@ -46,8 +46,6 @@ case "$1" in installsoappy installzookeeper postinstallzookeeper - installcassandra - postinstallcassandra postinstallrabbitmq installsolr installsolr7 diff --git a/debian/appscale_install_functions.sh b/debian/appscale_install_functions.sh index 3955e48d80..a9856b5d4f 100644 --- a/debian/appscale_install_functions.sh +++ b/debian/appscale_install_functions.sh @@ -333,42 +333,6 @@ installsolr7() update-rc.d solr disable } -installcassandra() -{ - CASSANDRA_VER=3.11.2 - - # The following is a Cassandra package built from source with the inclusion - # of https://issues.apache.org/jira/browse/CASSANDRA-12942. - CASSANDRA_PACKAGE="apache-cassandra-${CASSANDRA_VER}-w-12942-bin.tar.gz" - CASSANDRA_PACKAGE_MD5="25a9039dba8fe7ffe5e5e560e65c1f6f" - cachepackage ${CASSANDRA_PACKAGE} ${CASSANDRA_PACKAGE_MD5} - - # Remove old Cassandra environment directory. - rm -rf ${APPSCALE_HOME}/AppDB/cassandra - - CASSANDRA_DIR="/opt/cassandra" - CASSANDRA_DATA_DIR="/opt/appscale/cassandra" - mkdir -p ${CASSANDRA_DIR} - mkdir -p ${CASSANDRA_DATA_DIR} - rm -rf ${CASSANDRA_DIR}/cassandra - tar xzf "${PACKAGE_CACHE}/${CASSANDRA_PACKAGE}" -C ${CASSANDRA_DIR} - mv -v ${CASSANDRA_DIR}/apache-cassandra-${CASSANDRA_VER} \ - ${CASSANDRA_DIR}/cassandra - - if ! id -u cassandra &> /dev/null ; then - useradd cassandra - fi - chown -R cassandra ${CASSANDRA_DIR} - chown -R cassandra ${CASSANDRA_DATA_DIR} -} - -postinstallcassandra() -{ - mkdir -p ${CONFIG_DIR}/${APPSCALE_VERSION} - touch ${CONFIG_DIR}/${APPSCALE_VERSION}/cassandra -} - - installservice() { # This must be absolute path of runtime. @@ -469,6 +433,7 @@ installVersion() mv ${CONFIG_DIR}/VERSION ${CONFIG_DIR}/VERSION-$(date --rfc-3339=date) fi cp ${APPSCALE_HOME}/VERSION ${CONFIG_DIR} + mkdir -p ${CONFIG_DIR}/${APPSCALE_VERSION} } postinstallrsyslog() diff --git a/debian/makedeb_all.sh b/debian/makedeb_all.sh index 6bae5dbbce..faaf5db394 100755 --- a/debian/makedeb_all.sh +++ b/debian/makedeb_all.sh @@ -9,7 +9,6 @@ fi for comp in\ core all\ - cassandra\ do ./debian/makedeb.sh $comp $RELEASE diff --git a/debian/postinst.cassandra b/debian/postinst.cassandra deleted file mode 100644 index b2cf2a5923..0000000000 --- a/debian/postinst.cassandra +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh - -DESTDIR= -. /etc/profile.d/appscale.sh - -case "$1" in - configure) - - postinstallcassandra - ;; - abort-upgrade|abort-remove|abort-deconfigure) - ;; - - *) - echo "postinst called with unknown argument \`$1'" >&2 - exit 1 - ;; -esac - -# dh_installdeb will replace this with shell code automatically -# generated by other debhelper scripts. - -#DEBHELPER# - -exit 0 diff --git a/system/units/appscale-cassandra.service b/system/units/appscale-cassandra.service deleted file mode 100644 index a21d031c39..0000000000 --- a/system/units/appscale-cassandra.service +++ /dev/null @@ -1,26 +0,0 @@ -[Unit] -Description=AppScale Cassandra -Before=appscale-backend.target -PartOf=appscale-backend.target -After=network-online.target time-sync.target - -[Service] -Type=forking -User=cassandra -PermissionsStartOnly=true -ExecStartPre=/bin/mkdir -p /opt/appscale/cassandra -ExecStartPre=/bin/chown -R cassandra /opt/appscale/cassandra -ExecStart=/opt/cassandra/cassandra/bin/cassandra -GuessMainPID=yes -SuccessExitStatus=0 143 -KillMode=mixed -TimeoutStartSec=60 -TimeoutStopSec=60 -SyslogIdentifier=cassandra -# Limits -LimitAS=infinity -LimitMEMLOCK=infinity -LimitNPROC=32768 - -[Install] -WantedBy=appscale-backend.target \ No newline at end of file From de92ff47c5d2801517a1184d3c4fedae9aae38ff Mon Sep 17 00:00:00 2001 From: Steve Jones Date: Mon, 3 Feb 2020 18:27:43 -0800 Subject: [PATCH 219/221] AppController remove image database support check --- AppController/djinn.rb | 1 - AppController/lib/helperfunctions.rb | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/AppController/djinn.rb b/AppController/djinn.rb index 3eee49a9bd..07db29c8e4 100644 --- a/AppController/djinn.rb +++ b/AppController/djinn.rb @@ -3579,7 +3579,6 @@ def validate_image(node) key = node.ssh_key HelperFunctions.ensure_image_is_appscale(ip, key) HelperFunctions.ensure_version_is_supported(ip, key) - HelperFunctions.ensure_db_is_supported(ip, @options['table'], key) end def copy_encryption_keys(dest_node) diff --git a/AppController/lib/helperfunctions.rb b/AppController/lib/helperfunctions.rb index 9392be8fdf..d0fade2de8 100644 --- a/AppController/lib/helperfunctions.rb +++ b/AppController/lib/helperfunctions.rb @@ -871,18 +871,6 @@ def self.ensure_version_is_supported(ip, key) " #{version} on it and try again.") end - def self.ensure_db_is_supported(ip, db, key) - version = get_appscale_version - if does_image_have_location?(ip, "/etc/appscale/#{version}/#{db}", key) - Djinn.log_debug("Image at #{ip} supports #{db}.") - else - fail_msg = "The image at #{ip} does not have support for #{db}." \ - ' Please install support for this database and try again.' - Djinn.log_debug(fail_msg) - log_and_crash(fail_msg) - end - end - # Examines the configuration file for the given version to see if it is # thread safe. # From 7528be24487cb56498dd472f19bf80c1a45b695c Mon Sep 17 00:00:00 2001 From: Nicolas Denutte <36203052+NicolasDenutte@users.noreply.github.com> Date: Fri, 22 May 2020 13:34:29 -0700 Subject: [PATCH 220/221] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6250a05e90..d855956964 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# AppScale +# AppScale GTS [![GitHub version](https://badge.fury.io/gh/AppScale%2Fappscale.svg)](http://badge.fury.io/gh/AppScale%2Fappscale) [![AppScale license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://github.com/AppScale/appscale/blob/master/LICENSE) From 113d3c921b02dc0c469355ddffc55f0a980a9844 Mon Sep 17 00:00:00 2001 From: Nicolas Denutte <36203052+NicolasDenutte@users.noreply.github.com> Date: Fri, 22 May 2020 14:13:51 -0700 Subject: [PATCH 221/221] Update README.md - Changed AppScale to AppScale GTS - Added clarification that AppScale GTS is not supported anymore however source code remains available in GitHub Repo - Deleted all links to >appscale.com< domain and only left links to GitHub Wiki and Google Group --- README.md | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d855956964..9d145e5ec9 100644 --- a/README.md +++ b/README.md @@ -3,33 +3,22 @@ [![GitHub version](https://badge.fury.io/gh/AppScale%2Fappscale.svg)](http://badge.fury.io/gh/AppScale%2Fappscale) [![AppScale license](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://github.com/AppScale/appscale/blob/master/LICENSE) -AppScale is an easy-to-manage serverless platform for building and running scalable web and mobile applications on any infrastructure. +AppScale GTS is an open source serverless platform for building and running scalable web and mobile applications on any infrastructure. The platform enables developers to focus solely on business logic in order to rapidly build scalable apps, cleanly separating it from deployment and scaling logic. It allows operations to provide a consistent, tunable environment that can simplify running and maintaining apps on multiple infrastructures. The business will benefit from faster time-to-market, reduced operational costs, maximized application lifetime, and the flexibility to integrate with new or existing technologies. -AppScale is open source and modeled on Google App Engine APIs, allowing developers to automatically deploy and scale unmodified Google App Engine applications over public and private cloud systems and on-premise clusters. It currently supports Python, Go, PHP and Java applications. The software is developed and maintained by AppScale Systems, Inc., based in Santa Barbara, California, and Google. - - -## Why Use AppScale? -The goal of AppScale is to provide developers with a rapid, API-driven development platform that can run applications on any cloud infrastructure. AppScale decouples application logic from its service ecosystem to give developers and cloud administrators control over application deployment, data storage, resource use, backup, and migration. +AppScale GTS is open source and modeled on Google App Engine APIs, allowing developers to automatically deploy and scale unmodified Google App Engine applications over public and private cloud systems and on-premise clusters. It currently supports Python, Go, PHP and Java applications. The software was developed by AppScale Systems, Inc., based in Santa Barbara, California, and Google. In 2019, the company ended commercial support AppScale GTS, however the source code remains available in this GitHub Repo. +## Why Use AppScale GTS? +The goal of AppScale GTS is to provide developers with a rapid, API-driven development platform that can run applications on any cloud infrastructure. AppScale GTS decouples application logic from its service ecosystem to give developers and cloud administrators control over application deployment, data storage, resource use, backup, and migration. ## I Want ... * to [contribute](https://github.com/AppScale/appscale/wiki/Contribute%21) -* to [try AppScale](https://www.appscale.com/get-started/) -* to see [how other people use AppScale](https://www.appscale.com/why-appscale/) -* [customer support](https://www.appscale.com/products/appscale-customer-success/) * to [learn more](https://github.com/AppScale/appscale/wiki) - ## Documentation -* Getting Started - * [AppScale FastStart](https://www.appscale.com/get-started/) - * [Deployment types](https://www.appscale.com/get-started/deployment-types/) - * [Deploy AppScale](https://www.appscale.com/get-started/deploy-appscale/) * Users - * [Managing Apps and AppScale](https://www.appscale.com/get-started/management/) - * [Automated Data Persistence](https://github.com/AppScale/appscale/wiki/Automated-Data-Persistence) + * [Automated Data Persistence](https://github.com/AppScale/appscale/wiki/Automated-Data-Persistence) * [Multinode Deployments](https://github.com/AppScale/appscale/wiki/Distributed-Deployment) * [Making AppScale Scale](https://github.com/AppScale/appscale/wiki#making-appscale-scale) * Developers @@ -41,5 +30,3 @@ The goal of AppScale is to provide developers with a rapid, API-driven developme ## Community and Support Join the [Community Google Group](http://groups.google.com/group/appscale_community) for announcements, help, and to discuss cloud research. - -Also, join us on [#appscale on freenode](http://webchat.freenode.net/?channels=appscale&uio=d4) if you have questions, suggestions, comments, or just want to say hi!