#!/bin/sh

# check for packages that need to be built, and build a list in the proper build order
# Details:
#  https://github.com/archlinux32/builder/wiki/Build-system#get-package-updates

# shellcheck source=conf/default.conf
. "${0%/*}/../conf/default.conf"

# TODO: Find out, why sometimes package updates are missed.

# TODO: read information from database

# TODO: correctly handle if pkgbase of a split package is renamed, e.g.:
# $a -> ($a,$b)  ==>  $b -> ($a,$b)

# TODO: keep database clean in case of abort

# TODO: It seems, updating present build assignments in the database is broken
# ... they seem to get deleted

# shellcheck disable=SC2016
usage() {
  >&2 echo ''
  >&2 echo 'get-package-updates: check for packages that need to be built,'
  >&2 echo ' and build a list in the proper build order'
  >&2 echo ''
  >&2 echo 'possible options:'
  >&2 echo '  -b|--block:   If necessary, wait for lock blocking.'
  >&2 echo '  -h|--help:    Show this help and exit.'
  >&2 echo '  -n|--no-pull: Do not pull git repos, merely reorder build list.'
  >&2 echo '  -x|--test-exclusion $package:'
  >&2 echo '                Print additionally deleted/excluded packages if'
  >&2 echo '                "$package" would be black listed.'
  [ -z "$1" ] && exit 1 || exit "$1"
}

eval set -- "$(
  getopt -o bhnx: \
    --long block \
    --long help \
    --long no-pull \
    --long test-exclusion: \
    -n "$(basename "$0")" -- "$@" || \
  echo usage
)"

block_flag='-n'
test_exclusion=''
pull=true

while true
do
  case "$1" in
    -b|--block)
      block_flag=''
    ;;
    -h|--help)
      usage 0
    ;;
    -n|--no-pull)
      pull=false
    ;;
    -x|--test-exclusion)
      shift
      if [ -n "${test_exclusion}" ]; then
        >&2 printf 'I already have --test-exclusion=%s and you gave me another one.\n' "${test_exclusion}"
        >&2 printf 'But I can only handle one exclusion test at a time.\n'
        exit 2
      fi
      test_exclusion="$1"
    ;;
    --)
      shift
      break
    ;;
    *)
      >&2 echo 'Whoops, forgot to implement option "'"$1"'" internally.'
      exit 42
    ;;
  esac
  shift
done

if [ $# -ne 0 ]; then
  >&2 echo 'Too many arguments.'
  usage
fi

if [ -s "${work_dir}/build-master-sanity" ]; then
  >&2 echo 'Build master is not sane.'
  exit
fi

# TODO: How should new deletion-list packages be handled?
# - packages deleted upstream should be marked as to-be-deleted if
#   existent and otherwise be ignored
# - packages deleted due to black listing should be marked as black
#   listed - and thus as to-be-deleted, but they should remain in the
#   database after deletion!

# delete_package package
# mark $package for deletion
delete_package() {
  echo "$1" >> \
    "${work_dir}/deletion-list.new"
  sed -i "/^$(str_to_regex "${1}") /d" "${work_dir}/build-list.new"
  find "${work_dir}/package-states" -maxdepth 1 -mindepth 1 -regextype grep \
    -regex '.*/'"$(str_to_regex "${1}")"'\(\.[^.]\+\)\{3\}\.\(broken\|blocked\|locked\)' \
    -delete
  # TODO: Once we want to rely on the database for test_exclusion, we
  # need to run the command below unconditionally, but with some
  # changes, so we can easily revert.
  if [ -z "${test_exclusion}" ]; then
    # shellcheck disable=SC2016
    {
      # packages from the build-list/to-be-decided go straight to the deletion-list
      printf 'UPDATE `binary_packages`'
      mysql_join_binary_packages_repositories
      mysql_join_binary_packages_build_assignments
      mysql_join_build_assignments_package_sources
      printf ' SET `binary_packages`.`repository`=(SELECT `repositories`.`id` FROM `repositories` WHERE `repositories`.`name`="deletion-list")'
      printf ' WHERE `repositories`.`name` in ("build-list","to-be-decided")'
      printf ' AND `package_sources`.`pkgbase`=from_base64("%s");\n' \
        "$(printf '%s' "$1" | base64 -w0)"
      # other packages are marked as `is_to_be_deleted`
      printf 'UPDATE `binary_packages`'
      mysql_join_binary_packages_repositories
      mysql_join_binary_packages_build_assignments
      mysql_join_build_assignments_package_sources
      printf ' SET `binary_packages`.`is_to_be_deleted`=1'
      printf ' WHERE `package_sources`.`pkgbase`=from_base64("%s");' \
        "$(printf '%s' "$1" | base64 -w0)"
    } | \
      mysql_run_query
  fi
}

# create tmp_dir and trap
clean_up() {
  rm -rf --one-file-system "${tmp_dir}"
  # shellcheck disable=SC2016
  {
    printf 'DELETE FROM `binary_packages`'
    printf ' WHERE `binary_packages`.`repository`=('
      printf 'SELECT `repositories`.`id` FROM `repositories`'
      printf ' WHERE `repositories`.`name`="to-be-decided"'
    printf ');\n'
  } | \
    mysql_run_query
}
tmp_dir=$(mktemp -d 'tmp.get-package-updates.XXXXXXXXXX' --tmpdir)
trap 'clean_up' EXIT

# Update git repositories (official packages, community packages and the repository of package customizations).

for repo in ${repo_names}; do
  eval repo_path='"${repo_paths__'"${repo}"'}"'
  git -C "${repo_path}" fetch origin master:master
done

# Read previous git revision numbers from files.

something_new=false

for repo in ${repo_names}; do
  eval "old_repo_revisions__${repo}='$(
    cat "${work_dir}/${repo}.revision" 2> /dev/null || \
      echo NONE
  )'"
  eval repo_path='"${repo_paths__'"${repo}"'}"'
  eval "new_repo_revisions__${repo}='$(
    if ${pull}; then
      git -C "${repo_path}" rev-parse HEAD
    else
      cat "${work_dir}/${repo}.revision"
    fi | \
      tee "${work_dir}/${repo}.revision.new"
  )'"
  if ! eval '[ "${new_repo_revisions__'"${repo}"'}" = "${old_repo_revisions__'"${repo}"'}" ]'; then
    something_new=true
  fi
done

if ${pull} && \
  ! ${something_new}; then
  >&2 echo 'Nothing changed.'
  exit
fi

# Create a lock file for build list.

exec 9> "${build_list_lock_file}"
if ! flock ${block_flag} 9; then
  >&2 echo 'come back (shortly) later - I cannot lock build list.'
  exit
fi

exec 8> "${sanity_check_lock_file}"
if ! flock -s ${block_flag} 8; then
  >&2 echo 'come back (shortly) later - sanity-check running.'
  exit
fi

# shellcheck disable=SC2119
mysql_cleanup

echo 'Check modified packages from the last update, and put them to the build list.'

# Check modified packages from the last update, and put them to the build list.
# If a package is updated, but already on the rebuild list, then just update the git revision number.
# If a package is deleted, remove from the rebuild list, and add it to the deletion list.
# If a new package is added, then ensure that it's not on the deletion list.

cp \
  "${work_dir}/build-list" \
  "${work_dir}/build-list.new"
cp \
  "${work_dir}/deletion-list" \
  "${work_dir}/deletion-list.new"

for repo in ${repo_names}; do
  eval repo_path='"${repo_paths__'"${repo}"'}"'
  eval old_repo_revision='"${old_repo_revisions__'"${repo}"'}"'
  eval new_repo_revision='"${new_repo_revisions__'"${repo}"'}"'
  {
    # if old revision unknown, mimic "git diff"-output
    # shellcheck disable=SC2154
    if [ "${old_repo_revision}" = "NONE" ]; then
      git -C "${repo_path}" archive --format=tar HEAD | \
        tar -t | \
        sed 's|^|A\t|'
    else
      git -C "${repo_path}" diff --no-renames --name-status "${old_repo_revision}" "${new_repo_revision}"
    fi
  } | \
    # Packages which are already on the build list should receive a git_revision bump if _any_ file changed.
    # Thus, we rename any file "PKGBUILD" to trigger the successive logic.
    if [ "${repo}" = 'archlinux32' ]; then
      sed "$(
        sed '
          s/ .*$//
          s|^|\\@^.\\t[^/]\\+/|
          s|$|/@ s@/[^/]*$@/PKGBUILD@|
        ' "${work_dir}/build-list"
      )"
    else
      sed "$(
        sed '
          s/ .*$//
          s|^|\\@^.\\t|
          s|$|/@ s@/[^/]*$@/PKGBUILD@|
        ' "${work_dir}/build-list"
      )"
    fi | \
    # only track changes in PKGBUILDs
    grep '/PKGBUILD$' | \
    if [ "${repo}" = "archlinux32" ]; then
    # modify the directory structure from the modifiaction-repository
    # to the one of an original source repository
      # shellcheck disable=SC2016
      sed 's|^\(.\t\)\([^/]\+\)/\([^/]\+\)/\(.\+\)$|\2 \1\3/repos/\2-x86_64/\4|' | \
        while read -r pkg_repo rest; do
          eval 'printf '"'"'%s %s\n'"'" \
            "$(printf '"${new_repo_revisions__%s}"' "$(find_git_repository_to_package_repository "${pkg_repo}")")" \
            "'${rest}'"
        done
    else
      sed "s|^|${new_repo_revision} |"
    fi | \
    grep '^\S\+ .\s[^/]\+/repos/[^/]\+/PKGBUILD$' | \
    # ignore i686
    grep -v -- '-i686/PKGBUILD$' | \
    # ignore staging and testing
    grep -v -- '[-/]\(staging\|testing\|unstable\)-[^/]\+/PKGBUILD$' | \
    sed 's|^\(\S\+\) \(.\)\t\([^/]\+\)/repos/\([^/]\+\)-[^/-]\+/PKGBUILD$|\2 \3 \1 \4|'
done | \
  sort -u | \
  sed '
    s|^D\s|0 \0|
    t
    s|^[AM]\s|1 \0|
    t
    s|^|2 |
  ' | \
  sort -k1,1 | \
  sed 's|^[012] ||' | \
  while read -r mode package git_revision repository; do
    if [ "${mode}" = 'D' ]; then
      # deleted PKGBUILD
      git_revision=$(cat "${work_dir}/archlinux32.revision.new")
      found_package=false
      for repository in ${repo_names}; do
        eval 'repo_path="${repo_paths__'"${repository}"'}"'
        if [ "${repository}" = "archlinux32" ]; then
          if git -C "${repo_path}" archive "$(cat "${work_dir}/${repository}.revision.new")" 2> /dev/null | \
            tar -t 2> /dev/null | \
            grep -q "/$(str_to_regex "${package}")/PKGBUILD$"; then
            found_package=true
            break;
          fi
        else
          if git -C "${repo_path}" archive "$(cat "${work_dir}/${repository}.revision.new")" -- "${package}/repos" 2> /dev/null | \
            tar -t --wildcards "${package}/repos/*/PKGBUILD" 2> /dev/null | \
            cut -d/ -f3 | \
            grep -v 'staging\|testing\|-unstable' | \
            grep -vq -- '-i686$'; then
            git_revision=$(cat "${work_dir}/${repository}.revision.new")
            found_package=true
            break;
          fi
        fi
      done
      if ${found_package}; then
# TODO: moving packages between community64 and packages64 is currently broken (?)
        mode='M'
        repository=$(
          find_package_repository_to_package "${package}" "${repository}" "${git_revision}"
        )
      else
        delete_package "${package}"
        continue
      fi
    fi
    if [ "${mode}" = 'A' ] || [ "${mode}" = 'M' ]; then
      # new or modified PKGBUILD
      sed -i "/^$(str_to_regex "${package}") /d" "${work_dir}/build-list.new"
      # shellcheck disable=SC2154
      echo "${package} ${git_revision} ${new_repo_revisions__archlinux32} ${repository}" >> \
        "${work_dir}/build-list.new"
      sed -i "/^$(str_to_regex "${package}")\$/d" "${work_dir}/deletion-list.new"
      # shellcheck disable=SC2016
      {
        # delete old build assignment and associated binary packages
        # which are not yet built or on the deletion list
        printf 'DELETE `build_assignments`,`binary_packages`'
        printf ' FROM `binary_packages`'
        mysql_join_binary_packages_build_assignments
        mysql_join_build_assignments_package_sources
        mysql_join_binary_packages_repositories
        printf ' WHERE `package_sources`.`pkgbase`=from_base64("%s")' \
          "$(
            printf '%s' "${package}" | \
              base64 -w0
          )"
        printf ' AND (`repositories`.`name`="build-list" OR `repositories`.`name`="deletion-list");\n'
        # remove is-to-be-deleted marker from old binary packages
        printf 'UPDATE `binary_packages`'
        mysql_join_binary_packages_build_assignments
        mysql_join_build_assignments_package_sources
        mysql_join_binary_packages_repositories
        printf ' SET `is_to_be_deleted`=0'
        printf ' WHERE `package_sources`.`pkgbase`=from_base64("%s");\n' \
          "$(
            printf '%s' "${package}" | \
              base64 -w0
          )"
      } | \
        mysql_run_query
      mysql_generate_package_metadata 'to-be-decided' "${package}" "${git_revision}" "${new_repo_revisions__archlinux32}" "${repository}"
      generate_package_metadata "${package}" "${git_revision}" "${new_repo_revisions__archlinux32}" "${repository}"
      continue
    fi

    >&2 echo "unknown git diff mode '${mode}'"
    exit 1
  done

sort -u "${work_dir}/deletion-list.new" | \
  sponge "${work_dir}/deletion-list.new"

echo 'apply blacklisting'
# ignore blacklisted packages and dependent packages
# this is the first time when all the information is available and up to date

black_listed=''
black_listed_new=$(
  {
    {
      git -C "${repo_paths__archlinux32}" archive "${new_repo_revisions__archlinux32}" -- 'blacklist' | \
        tar -Ox 'blacklist' | \
        sed '
          s/\s*#.*$//
          /^\s*$/d
        '
      if [ -n "${test_exclusion}" ]; then
        echo "${test_exclusion}"
      fi
    } | \
      base64_encode_each
    # shellcheck disable=SC2016
    {
      printf 'SELECT DISTINCT replace(to_base64(`package_sources`.`pkgbase`),"\\n","")'
      printf ' FROM `package_sources`'
      printf ' WHERE `package_sources`.`pkgbase` LIKE "lib32-%%"'
    } | \
      mysql_run_query
  } | \
    sort -u
)

while [ -n "${black_listed_new}" ]; do
  black_listed=$(
    printf '%s\n' "${black_listed}" "${black_listed_new}" | \
      grep -vxF '' | \
      sort -u
  )
  black_listed_new=$(
    {
      printf '%s\n' "${black_listed}" "${black_listed}"

      # shellcheck disable=SC2016
      {
        printf 'CREATE TEMPORARY TABLE `bl` (`pkgbase` VARCHAR(64));\n'
        printf 'INSERT INTO `bl` (`pkgbase`) VALUES '
        printf '%s\n' "${black_listed}" | \
          sort -u | \
          sed '
            s/^/(from_base64("/
            s/$/")),/
            $ s/,$/;/
          '
        printf 'SELECT replace(to_base64(`a_ps`.`pkgbase`),"\\n","")'
        printf ' FROM `package_sources` AS `a_ps`'
        mysql_join_package_sources_build_assignments 'a_ps' 'a_ba'
        mysql_join_build_assignments_binary_packages 'a_ba' 'a_bp'
        mysql_join_binary_packages_dependencies 'a_bp'
        printf ' WHERE NOT EXISTS ('
          printf ' SELECT *'
          printf ' FROM `install_target_providers`'
          printf ' WHERE NOT EXISTS ('
            printf ' SELECT *'
            printf ' FROM `bl`'
            printf ' JOIN `package_sources` AS `b_ps` ON `bl`.`pkgbase`=`b_ps`.`pkgbase`'
            mysql_join_package_sources_build_assignments 'b_ps' 'b_ba'
            mysql_join_build_assignments_binary_packages 'b_ba' 'b_bp'
            printf ' WHERE `install_target_providers`.`package`=`b_bp`.`id`'
          printf ')'
          printf ' AND `install_target_providers`.`install_target`=`dependencies`.`depending_on`'
        printf ') AND EXISTS ('
        # TODO: This should be corrected at the root: automatic install targets, which are bogus should
        #       not be added in the first place - but how do we detect that?
          printf ' SELECT *'
          printf ' FROM `install_target_providers`'
          printf ' WHERE `install_target_providers`.`install_target`=`dependencies`.`depending_on`'
        printf ');\n'
      } | \
        mysql_run_query | \
        sort -u
    } | \
      grep -vxF '' | \
      sort | \
      uniq -u
  )
done

black_listed=$(
  printf '%s\n' "${black_listed}" | \
  while read -r line; do
    printf '%s' \
      "${line}" | \
       base64 -d
    printf '\n'
  done
)

deletion_list_count=$(
  # shellcheck disable=SC2086
  printf '%s\n' ${black_listed} | \
    wc -l
)
if [ "${deletion_list_count}" -gt 1000 ]; then
  >&2 printf 'There are %s > 1000 packages on the deletion-list. This does not seem right.\n' \
    "${deletion_list_count}"
  if [ ! -s "${work_dir}/told-irc-about-too-many-deletion-list-packages" ]; then
    {
      printf '\001ACTION refuses to blacklist %s packages.\001\n' "${deletion_list_count}"
      for repo_name in ${repo_names}; do
        if ! diff -q "${work_dir}/${repo_name}.revision" "${work_dir}/${repo_name}.revision.new" >/dev/null; then
          printf '%s: %s -> %s\n' \
            "${repo_name}" \
            "$(cat "${work_dir}/${repo_name}.revision")" \
            "$(cat "${work_dir}/${repo_name}.revision.new")"
        fi
      done
    } | \
      tee "${work_dir}/told-irc-about-too-many-deletion-list-packages" | \
      sponge "${irc_dir}/#archlinux-ports/in"
  fi
  exit 3
fi
rm -f "${work_dir}/told-irc-about-too-many-deletion-list-packages"

echo "${black_listed}" | \
  while read -r package; do
    if [ -z "${package}" ]; then
      continue
    fi
    delete_package "${package}"
  done

sort -u "${work_dir}/deletion-list.new" | \
  sponge "${work_dir}/deletion-list.new"


if [ -n "${test_exclusion}" ]; then
  # we should not actually update the build-list et. al, but solely print this difference:
  if diff --color -u "${work_dir}/deletion-list" "${work_dir}/deletion-list.new"; then
    printf 'If you put "%s" on the blacklist, no additional packages will end up on the deletion list.\n' "${test_exclusion}"
  fi
  exit 0
fi

# Now we create the partial order.

while read -r package git_revision mod_git_revision repository; do
  # add "$pkgname -> $build-target" to build-order list
  sed "s|^|${package} |" "${work_dir}/package-infos/${package}.${git_revision}.${mod_git_revision}.${repository}.builds"
  # add "$dependency -> $pkgname" to build-order list
  sed "s|\$| ${package}|" "${work_dir}/package-infos/${package}.${git_revision}.${mod_git_revision}.${repository}.build-depends"
  # add "base/base-devel -> $pkgname" to build-order list
  printf '%s '"${package}"'\n' 'base' 'base-devel'
done \
  < "${work_dir}/build-list.new" \
  > "${work_dir}/build-order"

if grep -vq '^\S\+ \S\+$' "${work_dir}/build-order"; then
  >&2 echo 'Created partial order file "build-order" is invalid.'
  exit 2
fi

echo 'Now actually sort it.'

{
  # this part will have the correct build order, but all the infos are missing
  tsort "${work_dir}/build-order" 2> "${work_dir}/tsort.error" | \
    nl -ba | \
    awk '{print $1 " not-git also-not-git whatever " $2}'
  # this part has all the infos, but possibly the wrong order
  awk '{print "0 " $2 " " $3 " " $4 " " $1}' "${work_dir}/build-list.new"
} | \
  sort -k5,5 -k1nr | \
  # now, we have the correct order and the infos, but in adjacent lines
  uniq -f4 -D | \
  sed '/^0 /d;N;s|\n| |' | \
  # now in one line, each
  sort -k1n,1 | \
  awk '{print $5 " " $7 " " $8 " " $9}' > \
  "${work_dir}/build-list.new.new"

rm --one-file-system -rf "${work_dir}/build-list.loops.new"
mkdir "${work_dir}/build-list.loops.new"

if [ -s "${work_dir}/tsort.error" ]; then
  >&2 echo 'WARNING: There is a dependency cycle!'
  >&2 cat "${work_dir}/tsort.error"
  >&2 echo
  >&2 echo 'I will continue anyway.'
  # save loops in separate files each, so breaking them is easier
  awk '
    /^tsort: \S+: input contains a loop:$/{
      n++;
      getline
    }
    {
      print $2 >"'"${work_dir}"'/build-list.loops.new/loop_" n
    }
    ' "${work_dir}/tsort.error"

  # remove lines from loop files which are no packages
  find "${work_dir}/build-list.loops.new" -maxdepth 1 -regextype grep \
    -regex '.*/loop_[0-9]\+' \
    -printf '%p\n' | \
    while read -r loop; do
      {
        sort -u "${loop}"
        cut -d' ' -f1 "${work_dir}/build-list.new.new" | \
          sort -u
      } | \
        sort | \
        uniq -d | \
        sponge "${loop}"
    done
else
  rm "${work_dir}/tsort.error"
fi

# shellcheck disable=SC2016
{
  # update hashes of repositories in mysql database
  for repo in ${repo_names}; do
    printf 'UPDATE `git_repositories`'
    printf ' SET `git_repositories`.`head`=from_base64("%s")' \
      "$(eval 'printf '"'"'%s'"'"' "${new_repo_revisions__'"${repo}"'}" | base64 -w0')"
    printf ' WHERE `git_repositories`.`name`=from_base64("%s");\n' \
      "$(printf '%s' "${repo}" | base64 -w0)"
  done
  # move binary_packages from "to-be-decided" to "build-list"
  printf 'UPDATE `binary_packages`'
  mysql_join_binary_packages_repositories '' 'from_repo'
  printf ' SET `repository`=('
    printf 'SELECT `to_repo`.`id`'
    printf ' FROM `repositories` AS `to_repo`'
    printf ' WHERE `to_repo`.`name`="build-list"'
  printf ')'
  printf ' WHERE `from_repo`.`name`="to-be-decided";\n'
} | \
  mysql_run_query

# update loop list in database (beware, the packages are expected to be in "build-list",
# not "to-be-decided", so we need to run this after moving the packages from "to-be-decided" to the "build-list".
mysql_find_build_assignment_loops

# Move the .new-files to the actual files

rm -rf --one-file-system "${work_dir}/build-list.loops"
{
  printf '%s\n' "build-list.loops" "build-list.new" "build-list" "deletion-list"
  # shellcheck disable=SC2086
  printf '%s.revision\n' ${repo_names}
} | \
  while read -r file; do
    mv "${work_dir}/${file}.new" "${work_dir}/${file}"
  done

# Remove the lock file

rm -f "${build_list_lock_file}"