# Maintainer: Erich Eckner # Contributor: François Garillot ("huitseeker") # Contributor: Christian Krause ("wookietreiber") pkgname=apache-spark pkgver=3.5.3 pkgrel=1 pkgdesc="fast and general engine for large-scale data processing" arch=('any') url="http://spark.apache.org" license=('APACHE') depends=('java-environment>=6') optdepends=('python: python support for pyspark' 'ipython: ipython support for pyspark' 'r: support for sparkR' 'rsync: support rsync hadoop binaries from master' 'hadoop: support for running on YARN') install=apache-spark.install source=("https://archive.apache.org/dist/spark/spark-${pkgver}/spark-${pkgver}-bin-without-hadoop.tgz" 'apache-spark-master.service' 'apache-spark-slave@.service' 'spark-env.sh' 'spark-daemon-run.sh' 'run-master.sh' 'run-slave.sh') sha512sums=('76d6edc1050629c0be0f82543d7da36f7c27ef2e7978e28c20bb5b9e7677c7c8b485af3c6a34319f402706592a1b381ad8ec96458104b23814c871e2b4870eab' 'cf080327efb022e93b7267ab03ca62a0ec1c77d4ea48a7903a8a6d3c99f9b2c7d81ec02b01a9c92f999ecf97b4d8ff98864c711b1034cfec206ba966cdd82218' '90fa91198acb209d8362af781c2d630e3d0c47987bd69c9a14c5a9edd45dd45040a253c919224cc687f938daac5255e584b22d1755c6faa0d495308bbd1905f3' 'd88452fe250d4153859d79fbeb1110d05d2eaca6690c23367aa373c6e67581b055e0648a2ebc7bea685edf102a455fc85dff7019242b79fbba6efa528ae873b2' '432b56f708cf67af12be7997104a0ba4e5f29830542ddf13f93a575e2b11a2d205598f3724ad2791ef1da57d6cf5d5597e52876ef84796cbfb60ce310fb6383f' '81b4886ed30dcca6d6f8d9b88f20b9fc2b728808494e1df2f088a261adf85d2269ec03f9cd3b88b74f0e6d54610ae7f3a81ca190ba321f9e5c9cedadf184ead3' '736a9ce60247803f6fef5caac699b3c5ea1c8ed9ccdd6c842acd3afff08a6cc2776148aff5fc5b1d8f74c76320ad0c0db0836dbcd0d55c1f78385254d8878614') backup=('etc/apache-spark/spark-env.sh') PKGEXT=${PKGEXT:-'.pkg.tar.xz'} prepare() { cd "$srcdir/spark-${pkgver}-bin-without-hadoop" } package() { cd "$srcdir/spark-${pkgver}-bin-without-hadoop" install -d "$pkgdir/usr/bin" "$pkgdir/opt" "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work" chmod 2775 "$pkgdir/var/log/apache-spark" "$pkgdir/var/lib/apache-spark/work" cp -r "$srcdir/spark-${pkgver}-bin-without-hadoop" "$pkgdir/opt/apache-spark/" cd "$pkgdir/usr/bin" for binary in beeline pyspark sparkR spark-class spark-shell spark-sql spark-submit load-spark-env.sh; do binpath="/opt/apache-spark/bin/$binary" ln -s "$binpath" $binary sed -i 's|^export SPARK_HOME=.*$|export SPARK_HOME=/opt/apache-spark|' "$pkgdir/$binpath" done mkdir -p $pkgdir/etc/profile.d echo '#!/bin/sh' > $pkgdir/etc/profile.d/apache-spark.sh echo 'SPARK_HOME=/opt/apache-spark' >> $pkgdir/etc/profile.d/apache-spark.sh echo 'export SPARK_HOME' >> $pkgdir/etc/profile.d/apache-spark.sh chmod 755 $pkgdir/etc/profile.d/apache-spark.sh install -Dm644 "$srcdir/apache-spark-master.service" "$pkgdir/usr/lib/systemd/system/apache-spark-master.service" install -Dm644 "$srcdir/apache-spark-slave@.service" "$pkgdir/usr/lib/systemd/system/apache-spark-slave@.service" install -Dm644 "$srcdir/spark-env.sh" "$pkgdir/etc/apache-spark/spark-env.sh" for script in run-master.sh run-slave.sh spark-daemon-run.sh; do install -Dm755 "$srcdir/$script" "$pkgdir/opt/apache-spark/sbin/$script" done install -Dm644 "$srcdir/spark-${pkgver}-bin-without-hadoop/conf"/* "$pkgdir/etc/apache-spark" cd "$pkgdir/opt/apache-spark" mv conf conf-templates ln -sf "/etc/apache-spark" conf ln -sf "/var/lib/apache-spark/work" . }