# Maintainer: Erich Eckner <arch at eckner dot net>
# Contributor: François Garillot ("huitseeker") <francois [at] garillot.net>
# Contributor: Christian Krause ("wookietreiber") <kizkizzbangbang@gmail.com>

pkgname=apache-spark
pkgver=3.2.1
pkgrel=1
pkgdesc="fast and general engine for large-scale data processing"
arch=('any')
url="http://spark.apache.org"
license=('APACHE')
depends=('java-environment>=6')
optdepends=('python2: python2 support for pyspark'
            'ipython2: ipython2 support for pyspark'
            'python: python3 support for pyspark'
            'ipython: ipython3 support for pyspark'
            'r: support for sparkR'
            'rsync: support rsync hadoop binaries from master'
            'hadoop: support for running on YARN')
install=apache-spark.install
source=("https://archive.apache.org/dist/spark/spark-${pkgver}/spark-${pkgver}-bin-without-hadoop.tgz"
        'apache-spark-master.service'
        'apache-spark-slave@.service'
        'spark-env.sh'
        'spark-daemon-run.sh'
        'run-master.sh'
        'run-slave.sh')
sha512sums=('0923b887bffe9ce984b41e730a0059d563d0ee429f4e8c74be2df98d0b441919eff4cc3c43d79b131d3b914139df4833aee75280889643690e8c14a287552b40'
            'cf080327efb022e93b7267ab03ca62a0ec1c77d4ea48a7903a8a6d3c99f9b2c7d81ec02b01a9c92f999ecf97b4d8ff98864c711b1034cfec206ba966cdd82218'
            '90fa91198acb209d8362af781c2d630e3d0c47987bd69c9a14c5a9edd45dd45040a253c919224cc687f938daac5255e584b22d1755c6faa0d495308bbd1905f3'
            'd88452fe250d4153859d79fbeb1110d05d2eaca6690c23367aa373c6e67581b055e0648a2ebc7bea685edf102a455fc85dff7019242b79fbba6efa528ae873b2'
            '432b56f708cf67af12be7997104a0ba4e5f29830542ddf13f93a575e2b11a2d205598f3724ad2791ef1da57d6cf5d5597e52876ef84796cbfb60ce310fb6383f'
            '81b4886ed30dcca6d6f8d9b88f20b9fc2b728808494e1df2f088a261adf85d2269ec03f9cd3b88b74f0e6d54610ae7f3a81ca190ba321f9e5c9cedadf184ead3'
            '736a9ce60247803f6fef5caac699b3c5ea1c8ed9ccdd6c842acd3afff08a6cc2776148aff5fc5b1d8f74c76320ad0c0db0836dbcd0d55c1f78385254d8878614')
backup=('etc/apache-spark/spark-env.sh')

PKGEXT=${PKGEXT:-'.pkg.tar.xz'}

prepare() {
  cd "$srcdir/spark-${pkgver}-bin-without-hadoop"
}

package() {
        cd "$srcdir/spark-${pkgver}-bin-without-hadoop"

        install -d "$pkgdir/usr/bin" "$pkgdir/opt" "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work"
        chmod 2775 "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work"

        cp -r "$srcdir/spark-${pkgver}-bin-without-hadoop" "$pkgdir/opt/apache-spark/"

        cd "$pkgdir/usr/bin"
        for binary in beeline pyspark sparkR spark-class spark-shell spark-sql spark-submit load-spark-env.sh; do
                binpath="/opt/apache-spark/bin/$binary"
                ln -s "$binpath" $binary
                sed -i 's|^export SPARK_HOME=.*$|export SPARK_HOME=/opt/apache-spark|' "$pkgdir/$binpath"
        done

        mkdir -p $pkgdir/etc/profile.d
        echo '#!/bin/sh' > $pkgdir/etc/profile.d/apache-spark.sh
        echo 'SPARK_HOME=/opt/apache-spark' >> $pkgdir/etc/profile.d/apache-spark.sh
        echo 'export SPARK_HOME' >> $pkgdir/etc/profile.d/apache-spark.sh
        chmod 755 $pkgdir/etc/profile.d/apache-spark.sh

        install -Dm644 "$srcdir/apache-spark-master.service" "$pkgdir/usr/lib/systemd/system/apache-spark-master.service"
        install -Dm644 "$srcdir/apache-spark-slave@.service" "$pkgdir/usr/lib/systemd/system/apache-spark-slave@.service"
        install -Dm644 "$srcdir/spark-env.sh" "$pkgdir/etc/apache-spark/spark-env.sh"
        for script in run-master.sh run-slave.sh spark-daemon-run.sh; do
            install -Dm755 "$srcdir/$script" "$pkgdir/opt/apache-spark/sbin/$script"
        done
        install -Dm644 "$srcdir/spark-${pkgver}-bin-without-hadoop/conf"/* "$pkgdir/etc/apache-spark"

        cd "$pkgdir/opt/apache-spark"
        mv conf conf-templates
        ln -sf "/etc/apache-spark" conf
        ln -sf "/var/lib/apache-spark/work" .
}