#!/usr/bin/perl -w ########### # getcraig.pl # # A messy script to crawl craiglist for certain keywords and # subsequently send notifications via e-mail of any matching # finds. Two levels are used: digest and notify. Notify is # for really hot stuff you definitely want to know about. # # I wrote this for keeping an eye out for deals on cars I'm # interested in. # # Requires wget and mail # # Offered to public domain, since it's pretty silly. # # -JSK # www.generalcriticism.com # kutani at projectkutani com ########### use strict; # Enter your craigslist location here my $location = "austin"; # Put the area of craigslist you want to seach here my $section = "cta"; # Digest Addy is for somewhat-interested finds my $digestaddy = "YOUR ADDRESS HERE"; # Notify Addy is for Hot Stuff my $notifyaddy = "$digestaddy"; # Regexp for the digest search my $digregexp = "is(| )(2|3)(0|5)0|300z|lotus|lancer evo|evolution|miata|porsche|alfa|romeo|240( |)sx"; # Regexp for the notify search my $notregexp = "nsx| m3 |mr2|280z|RX(|-)7| 944|2\.5RS|mazdaspeed"; my $wget = "wget -q -O - "; my $urlbase = "http://$location.craigslist.org/$section"; my $row = 0; my $ahref= 0; my $price= 0; my $wrap = 0; my $last = "NULL"; my $next = 1; my $pg = 0; my @entries = (); my @urls = (); if( -e "craig.last" ) { $last = `cat craig.last`; chomp($last); } my @page = `$wget $urlbase/`; TOP: foreach my $line (@page) { if( ! $row && $line =~ /class="row"/ ) { $row = 1; } if( $row && $line =~ /<\/p>/ ) { $row = 0; $ahref = 0; next; } if( $wrap && $row ) { $line =~ s/^\s+//g; my @prs = split /<\/a/, $line; push @entries, $prs[0]; $wrap = 0; next; } if( $row ) { if( ! $ahref && $line =~ //, $line; my @prsb = split /"/, $prsa[0]; @prsa = split /<\/a>/, $prsa[1]; if( $prsb[1] eq $last ) { $next = 0; last; } if( $prsa[0] =~ /(\w|\d)+/ ) { push @entries, $prsa[0]; push @urls, $prsb[1]; } else { $wrap = 1; push @urls, $prsb[1]; } next; } } } if( $next && $pg < 10000 ) { $pg += 100; #print STDERR "Getting $urlbase/index$pg.html\n"; @page = `$wget $urlbase/index$pg.html`; goto TOP; } my @digest = (); my @notify = (); #print STDERR scalar @entries . " entries parsed.\n"; for( my $i = 0; $i < scalar @entries; $i++ ) { if( $i == 0 ) { open(FILE,">craig.last"); print FILE "$urls[$i]"; close(FILE); } #print "$entries[$i] --- $urls[$i]\n"; if( $entries[$i] =~ /$digregexp/i ) { push @digest, "$entries[$i] -- $urls[$i]"; } if( $entries[$i] =~ /$notregexp/i ) { push @notify, "$entries[$i] -- $urls[$i]"; } } if( scalar @digest ) { open(MAIL,"|mail -s \"Craigslist Digest\" $digestaddy"); foreach( @digest ) { print MAIL "$_\n"; } close(MAIL); } if( scalar @notify ) { open(MAIL,"|mail -s \"Craigslist Hot Stuff\" $notifyaddy"); foreach(@notify) { print MAIL "$_\n"; } close(MAIL); }