#!/usr/local/sbin/charm-env python3

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import subprocess
import sys

from path import Path
from time import time

from charmhelpers.core import hookenv
from charms.benchmark import Benchmark
from charms.reactive import is_state
from jujubigdata import utils


def fail(msg):
    hookenv.action_set({'outcome': 'failure'})
    hookenv.action_fail(msg)
    sys.exit()


def main():
    bench = Benchmark()

    if not is_state('spark.started'):
        msg = 'Spark is not started yet'
        fail(msg)

    # gather params and create dir to store results
    num_iter = hookenv.action_get('iterations')
    run = int(time())
    result_dir = Path('/opt/sparkpagerank-results')
    result_log = result_dir / '{}.log'.format(run)
    if not result_dir.exists():
        result_dir.mkdir()
    result_dir.chown('ubuntu', 'ubuntu')
    hookenv.log("pagerank ({} iteration) log: {}".format(num_iter, result_log))

    sample_dir = "/home/ubuntu/sample-data"
    sample_pr = "/home/ubuntu/sample-data/pagerank/web-Google.txt"
    if not os.path.isfile(sample_pr):
        msg = 'Could not find pagerank sample data'
        fail('{}: {}'.format(msg, sample_pr))

    # Benchmark input data is packed into a sample-data resource, which makes
    # it available on all spark units. In yarn mode, the nodemanagers
    # act as the spark workers and will not have access to this local data.
    # In yarn mode, copy our input data to hdfs so nodemanagers can access it.
    mode = hookenv.config()['spark_execution_mode']
    if mode.startswith('yarn'):
        if is_state('hadoop.hdfs.ready'):
            try:
                utils.run_as('ubuntu',
                             'hdfs', 'dfs', '-copyFromLocal', '-f',
                             sample_dir, '/user/ubuntu',
                             capture_output=True)
            except subprocess.CalledProcessError as e:
                msg = 'Unable to copy pagerank sample data to hdfs'
                fail('{}: {}'.format(msg, e))
            else:
                sample_pr = "/user/ubuntu/sample-data/pagerank/web-Google.txt"
        else:
            msg = 'Spark is configured for yarn mode, but HDFS is not ready yet'
            fail(msg)

    # find jar location
    spark_home = "/usr/lib/spark"
    example_jar_name = "spark-examples.jar"
    example_jar_path = None
    for root, dirs, files in os.walk(spark_home):
        if example_jar_name in files:
            example_jar_path = os.path.join(root, example_jar_name)

    if not example_jar_path:
        msg = 'Could not find {}'.format(example_jar_name)
        fail(msg)

    print('Calculating PageRank')
    bench.start()
    start = int(time())

    with open(result_log, 'w') as log_file:
        arg_list = [
            'spark-submit',
            '--class',
            'org.apache.spark.examples.SparkPageRank',
            example_jar_path,
            sample_pr,
            num_iter,
        ]

        try:
            subprocess.check_call(arg_list, stdout=log_file,
                                  stderr=subprocess.STDOUT)
        except subprocess.CalledProcessError as e:
            msg = 'SparkPageRank command failed: {}'.format(' '.join(arg_list))
            fail('{}: {}'.format(msg, e))

    stop = int(time())
    bench.finish()

    duration = stop - start
    bench.set_composite_score(duration, units='secs', direction='asc')

    # Tell the user how they can get the full result log
    scp_log_msg = "juju scp {}:{} .".format(hookenv.local_unit(), result_log)
    hookenv.action_set({'meta.fetchcmd': scp_log_msg})

    with open(result_log) as log:
        success = False
        for line in log.readlines():
            if 'rank' in line:
                success = True
                break

    if not success:
        msg = 'Spark-submit failed to calculate pagerank'
        fail(msg)

    hookenv.action_set({'outcome': 'success'})


if __name__ == '__main__':
    main()
