Malicious Code Finder

This is a Perl script for searching malicious content in your website files. Currently it searches only *.php and .htaccess files. Also it matches some suspicious directory names.

The script has been extensively tested against false positives on more than 100 000 source files (Joomla, WordPress and almost any popular software). It finds the most popular exploits.

The idea is as follows - there patterns with positive and negative values. Once the score for a file reaches 100 it is considered malicious. If the score falls beyond 0 it means it is a legitimate file and should be skipped. The latter has only one purpose - to make the script run faster and skip the rest of the checks. If you scan a lot of files, this is a must. Otherwise, it is better to comment the line with the Copyright rule. 

#!/usr/bin/perl

# Version 2.1 Beta Sept 29 2010
# Copyright (C) 2010 Anatoliy Dimitrov, website-security.info

#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program.  If not, see www.gnu.org.

use warnings;
use strict;

use Cwd;
use File::Find;
use File::stat;

my $path = cwd;

my $count;

my %bad_files;

my @bad_dirs;

my %suspicious_files_pattern = (
'(Copyright)+?' => -1, #usually exploits don't have copyrights
'(shellbot|c99shell|bot_list|______)' => 100, #the easiest
'(eval\(gzinflate\(base64_decode|eval\(gzinflate\(base64|eval\(base64)' => 100,
'<\?php.*(urldecode|\$[a-z]{1}=@)' => 100, #one-line hacks
'if \(\$_SERVER\[\'REMOTE_ADDR' => 90, #why wonder what is the remote server IP
'\$_(POST|GET)\[\'(cwd|port|exe|cmd)' => 50,
'(/etc/passwd|rand\(1,65000\)|netstat)'=> 50, #why use that in regular script
);

my $suspicious_dirname_patterns = 'webscrcmd|\.\.\.|__|\s\s'; 
#paypal scam and others suspicious

my $suspicious_htaccess_patterns = 'HTTP_REFERER.*google'; 
# why would someone redirect google traffic

#$/ = ''; #bugs the re sometimes but is much faster

sub matchPattern {
    my $file = $File::Find::name;

    my $dir = $File::Find::dir;

    if ( $dir =~ /$suspicious_dirname_patterns|\s\s/i ) {
        push(@bad_dirs, $dir) unless grep( /$dir/, @bad_dirs );
    }

    if ( $file =~ m"\.htaccess$" ) {

        open INPUT, '<', $file or warn "Unable to open file: $file!\n";

        while (my $row = <INPUT>) {

            if ( $row =~ /$suspicious_htaccess_patterns/ ) {
                $bad_files{ $file } = 100;
                last;
            }
        }
        close(INPUT);
    }


    if ( $file =~ m"\.php$" ) {

        #protection against too big files

        my $filesize = stat($file)->size;

        if ($filesize > 200000) {
            return;
        }

        #it is important that each time it begins from 0
        my $probability = 0;

        open INPUT, '<', $file or warn "Unable to open file: $file!\n";

        while (my $row = <INPUT>) {

            while ( ( my $key, my $value ) = each(%suspicious_files_pattern) ) {
                if ( $row =~ m/$key/ ) {
                    $probability += $value;

                    if ($probability < 0 ) {
                    #we have caught something that is usually not found in exploits
                        return;
                    } elsif ( $probability > 99 ) {
                        $bad_files{ $file } = $probability;
                        #print  $file." ".$key."\n"; #debug
                        return;
                    }
                }
            }

        }

        close(INPUT);

    }

    print "! Status update: $count files processed\n" if (!( ++$count % 5000 ));   
    #show some progress for each 1000 files processed

}

find( \&matchPattern, $path );


if ((keys %bad_files) || @bad_dirs) {

    my $key;
    print "\n Results in descending order:\n";

    foreach $key (sort { $bad_files {$b} <=> $bad_files {$a}} keys %bad_files) {
        print "- $key\t$bad_files{$key}\n";
    }
    foreach (@bad_dirs) {
        print "- $_ - suspicious dir name \n";
    }
} else {
print "Nothing suspicious found.\n";
}

my $run_time = time() - $^T;

print "\n\nIt took $run_time seconds.\n";

# Remove the file so that we don't forget it somewhere :)
unlink $0 or warn "Please, remove file manually.";

You can find the script on GitHub too.

If you have any questions or comments please contact us.

blog comments powered by Disqus