File: //proc/self/root/usr/sbin/popularity-contest
#!/usr/bin/perl -w
#
# Copyright (C) 2003 by Bill Allombert <ballombe@debian.org>
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
# based on a design and a bash/gawk script
#
# Copyright (C) 1998,2000 by Avery Pennarun, for the Debian Project.
# Use, modify, and redistribute modified or unmodified versions in any
# way you wish.
use strict;
use 5.6.0;
my $dpkg_db="/var/lib/dpkg/info";
my $dpkg_origin="/etc/dpkg/origins/default";
my $popcon_conf="/etc/popularity-contest.conf";
# $popcon_conf is in shell-script format
my $HOSTID = qx(unset MY_HOSTID; . $popcon_conf; echo \$MY_HOSTID );
chomp $HOSTID;
if ( $HOSTID eq "")
{
  print STDERR "You must set MY_HOSTID in $popcon_conf!\n";
  exit 1;
}
if ( $HOSTID eq "d41d8cd98f00b204e9800998ecf8427e")
{
  print STDERR "Warning: MY_HOSTID is the md5sum of the empty file!\n";
  print STDERR "Please change it to the md5sum of a random file in $popcon_conf!\n";
}
if ( $HOSTID !~ /^([a-f0-9]{32})$/)
{
  print STDERR "Error: MY_HOSTID does not match ^([a-f0-9]{32})\$\n";
  print STDERR "Please edit $popcon_conf to use a valid md5sum value\n";
  exit 1;
}
# Architecture.
my $debarch = `dpkg --print-architecture`;
chomp $debarch;
# Popcon release
my $popconver=`dpkg-query --showformat='\${version}' --show popularity-contest`;
# Vendor
my $vendor="unknown";
if (open(VENDOR, "<", $dpkg_origin))
{
  while (<VENDOR>)
  {
    m/^Vendor: *(.*)/ and $vendor=$1;
  }
  close(VENDOR);
}
# Initialise time computations
my $now = time;
my $halfdaylen = 12 * 60 *60;
my $daylen = 2 * $halfdaylen;
my $monthlen = $daylen * 30;
my $lastmonth = $now - $monthlen;
sub trunc_time {
    return $halfdaylen * int($_[0] / $halfdaylen);
}
my %popcon=();
# List all mapped files
my %mapped;
if (opendir(PROC, "/proc"))
{
  my @procfiles = readdir(PROC);
  closedir(PROC);
  foreach (@procfiles)
  {
    -d "/proc/$_" or next;
    m{^[0-9]+$} or next;
    open MAPS, "/proc/$_/maps" or next;
    while (<MAPS>)
    {
      m{(/.*)} or next;
      $mapped{$1} = 1;
    }
    close MAPS;
  }
}
# List files diverted by dpkg
my %diverted;
if (open DIVERSIONS, "env LC_ALL=C dpkg-divert --list|")
{
  while (<DIVERSIONS>)
  {
    next unless /^(?:local )?diversion of (\S+) to (?:\S+)(?: by (\S+))?\s*$/;
    $diverted{$1} = defined $2 ? $2 : ""
  }
  close DIVERSIONS;
}
my %pkgs_files = ();
if (opendir(my $DPKG_DB, $dpkg_db))
{
    for my $e (readdir($DPKG_DB)) {
	if ($e =~ m/^([^:]+) .*? \. list$/x) {
	    $pkgs_files{$1} ||= [];
	    push @{$pkgs_files{$1}}, "$dpkg_db/$e";
	}
    }
    closedir($DPKG_DB);
}
# Read dpkg database of installed packages
open PACKAGES, "dpkg-query --show --showformat='\${status} \${package}\\n'|";
while (<PACKAGES>)
{
  /^.*installed *(.+)$/ or next;
  my $pkg=$1;
  my $bestatime = undef;
  my $list;
  # dpkg-query reports multiple times the same package for diff archs
  next if $popcon{$pkg};
  $popcon{$pkg}=[0,0,$pkg,"<NOFILES>"];
  foreach (@{$pkgs_files{$pkg}})
  {
    open FILES, $_ or next;
    while (<FILES>)
    {
      chop;
      next unless (
          ( m{/bin/|/sbin/|/lib/.+/|^/usr/games/|\.[ah]$|\.pm$|\.php$|^/boot/System\.map-}
            && ! m{/lib/.+-.+-.+/} && ! m{^/usr/lib/mime/packages/}
            || defined $mapped{$_} )
          && -f $_);
      next if defined $diverted{$_} and $diverted{$_} ne $pkg;
      my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
          $atime,$mtime,$ctime,$blksize,$blocks)
        = stat;
      if (defined $mapped{$_}) {
# It's currently being accessed by a process
        $atime = time();
      }
      if (!defined($bestatime) || $atime >= $bestatime)
      {
# Truncate time to reduce informaton leak.
        my $tatime = &trunc_time($atime);
        my $tctime = &trunc_time($ctime);
        $bestatime=$atime;
        if ($atime < $lastmonth)
        {
# Not accessed since more than 30 days.
          $popcon{$pkg}=[$tatime,$tctime,$pkg,$_,"<OLD>"];
        }
        elsif ($ctime > $lastmonth && $atime-$ctime < $daylen)
        {
# Installed/upgraded less than a month ago and not used after
# install/upgrade day.
          $popcon{$pkg}=[$tatime,$tctime,$pkg,$_,"<RECENT-CTIME>"];
        }
        else
        {
# Else we `vote' for the package.
          $popcon{$pkg}=[$tatime,$tctime,$pkg,$_];
        }
      }
    }
    close FILES;
  }
}
close PACKAGES;
# We're not done yet.  Sort the output in reverse by atime, and
# add a header/footer.
print "POPULARITY-CONTEST-0 TIME:",time," ID:$HOSTID ".
    "ARCH:$debarch POPCONVER:$popconver VENDOR:$vendor\n";
for (sort { $popcon{$b}[0] <=> $popcon{$a}[0] } keys %popcon)
{
  print join(' ',@{$popcon{$_}}),"\n";
}
print "END-POPULARITY-CONTEST-0 TIME:",time,"\n";