File Coverage

blib/lib/Perlanet.pm
Criterion Covered Total %
statement 120 132 90.9
branch 7 36 19.4
condition 6 18 33.3
subroutine 25 26 96.2
pod 9 9 100.0
total 167 221 75.6


line stmt bran cond sub pod time code
1             package Perlanet;
2              
3 6     6   177765 use strict;
  6         8  
  6         120  
4 6     6   17 use warnings;
  6         6  
  6         122  
5              
6 6     6   235 use Moose;
  6         274151  
  6         49  
7 6     6   21712 use namespace::autoclean;
  6         4311  
  6         37  
8              
9 6     6   246 use Carp;
  6         22  
  6         207  
10 6     6   1170 use DateTime::Duration;
  6         617420  
  6         94  
11 6     6   25 use DateTime;
  6         5  
  6         63  
12 6     6   880 use Perlanet::Entry;
  6         15  
  6         139  
13 6     6   978 use Perlanet::Feed;
  6         15  
  6         133  
14 6     6   1302 use TryCatch;
  6         1540565  
  6         41  
15 6     6   1741 use URI::Fetch;
  6         14  
  6         105  
16 6     6   20 use XML::Feed;
  6         7  
  6         106  
17              
18 6     6   20 use vars qw{$VERSION};
  6         7  
  6         230  
19              
20             BEGIN {
21 6     6   2225   $VERSION = '0.54';
22             }
23              
24             with 'MooseX::Traits';
25              
26             $XML::Atom::ForceUnicode = 1;
27              
28             has 'ua' => (
29               is => 'rw',
30               isa => 'LWP::UserAgent',
31               lazy_build => 1
32             );
33              
34             sub _build_ua {
35 4     4   5   my $self = shift;
36 4         33   my $ua = LWP::UserAgent->new(
37                 agent => "Perlanet/$VERSION"
38               );
39 4 50       6553   $ua->show_progress(1) if -t STDOUT;
40 4         21   $ua->env_proxy;
41              
42 4         446   return $ua;
43             }
44              
45             has 'cutoff' => (
46               isa => 'DateTime',
47               is => 'ro',
48               default => sub {
49                 DateTime->now + DateTime::Duration->new(weeks => 1);
50               }
51             );
52              
53             has 'entries' => (
54               isa => 'Int',
55               is => 'rw',
56               default => 10,
57             );
58              
59             has 'entries_per_feed' => (
60               isa => 'Int',
61               is => 'rw',
62               default => 5,
63             );
64              
65             has 'feeds' => (
66               isa => 'ArrayRef',
67               is => 'ro',
68               default => sub { [] }
69             );
70              
71             has 'author' => (
72               isa => 'HashRef',
73               is => 'ro',
74             );
75              
76             has $_ => (
77               isa => 'Str',
78               is => 'ro',
79             ) for qw( self_link title description url agent );
80              
81             =head1 NAME
82            
83             Perlanet - A program for creating programs that aggregate web feeds (both
84             RSS and Atom).
85            
86             =head1 SYNOPSIS
87            
88             my $perlanet = Perlanet->new;
89             $perlanet->run;
90            
91             =head1 DESCRIPTION
92            
93             Perlanet is a program for creating programs that aggregate web feeds (both
94             RSS and Atom). Web pages like this are often called "Planets" after the Python
95             software which originally popularised them. Perlanet is a planet builder
96             written in Perl - hence "Perlanet".
97            
98             You are probably interested in L<Perlanet::Simple> to get started straight
99             out of the box, batteries included style.
100            
101             Perlanet itself is the driving force behind everything, however. Perlanet
102             reads a series of web feeds (filtering only those that are valid), sorts
103             and selects entries from these web feeds, and then creates a new aggregate
104             feed and renders this aggregate feed. Perlanet allows the user to customize
105             all of these steps through subclassing and roles.
106            
107             For most uses, you probably don't want to use the Perlanet module. The
108             L<perlanet> command line program is far more likely to be useful.
109            
110             =head1 CONSTRUCTOR
111            
112             =head2 new
113            
114             my $perlanet = Perlanet->new
115            
116             The constructor method. Can be passed a hashref of initialisers.
117            
118             See L</ATTRIBUTES> below for details of the key/value pairs to pass in.
119            
120             =head1 ATTRIBUTES
121            
122             =over
123            
124             =item ua
125            
126             An instance of L<LWP::UserAgent>. Defaults to a simple agent using C<<
127             $cfg->{agent} >> as the user agent name, or C< Perlanet/$VERSION >.
128            
129             =item cutoff
130            
131             An instance of L<DateTime> which represents the earliest date for
132             which feed posts will be fetched/shown.
133            
134             =item feeds
135            
136             An arrayref of L<Perlanet::Feed> objects representing the feeds to
137             collect data from.
138            
139             =back
140            
141             =head1 METHODS
142            
143             =head2 fetch_page
144            
145             Attempt to fetch a web page and a returns a L<URI::Fetch::Response> object.
146            
147             =cut
148              
149             sub fetch_page {
150 0     0 1 0   my ($self, $url) = @_;
151 0         0   return URI::Fetch->fetch(
152                 $url,
153                 UserAgent => $self->ua,
154                 ForceResponse => 1,
155               );
156             }
157              
158             =head2 fetch_feeds
159            
160             Called internally by L</run> and passed the list of feeds in L</feeds>.
161            
162             Attempt to download all given feeds, as specified in the C<feeds> attribute.
163             Returns a list of L<Perlanet::Feed> objects, with the actual feed data
164             loaded.
165            
166             NB: This method also modifies the contents of L</feeds>.
167            
168             =cut
169              
170             sub fetch_feeds {
171 5     5 1 11   my ($self, @feeds) = @_;
172              
173 5         6   my @valid_feeds;
174 5         8   for my $feed (@feeds) {
175 6         38     my $response = $self->fetch_page($feed->url);
176              
177 6 50       240098     if ($response->is_error) {
178 0         0       carp 'Error retrieving ' . $feed->url;
179 0         0       carp $response->http_response->status_line;
180 0         0       next;
181                 }
182              
183 6 0   6   1571     unless (length $response->content) {
  6         158  
  6         345  
  6         32  
  6         6  
  6         18  
184 0         0       carp 'No data returned from ' . $feed->url;
  6         35  
185 0         0       next;
  6         108  
186                 }
187 6         449873  
188 6 50       155     try {
189                   my $data = $response->content;
190 6         54       my $xml_feed = XML::Feed->parse(\$data);
191              
192 6 50   6   1980515       $feed->_xml_feed($xml_feed);
  6         11  
  6         12  
  0         0  
  0         0  
193 0         0       $feed->title($xml_feed->title) unless $feed->title;
194 0 0       0  
195                   push @valid_feeds, $feed;
196 6         152     }
197                 catch ($e) {
198 5         82       carp 'Errors parsing ' . $feed->url;
199                   carp $e if defined $e;
200                 }
201               }
202              
203               return @valid_feeds;
204             }
205              
206             =head2 select_entries
207            
208             Called internally by L</run> and passed the list of feeds from
209             L</fetch_feeds>.
210            
211 5     5 1 549 Returns a combined list of L<Perlanet::Entry> objects from all given feeds.
212            
213 5         7 =cut
214 5         10  
215 6         81 sub select_entries {
216               my ($self, @feeds) = @_;
217 6 50 33     7792  
218 6         27   my @feed_entries;
219               for my $feed (@feeds) {
220                 my @entries = $feed->_xml_feed->entries;
221 14         325  
222                 if ($self->entries_per_feed and @entries > $self->entries_per_feed) {
223 6         326       $#entries = $self->entries_per_feed - 1;
224                 }
225              
226 14 50 100     2086     push @feed_entries,
227 3         57240       map {
228                     $_->title($feed->title . ': ' . $_->title);
229              
230             # Problem with XML::Feed's conversion of RSS to Atom
231 14         9961         if ($_->issued && ! $_->modified) {
232                       $_->modified($_->issued);
233                     }
234              
235                     Perlanet::Entry->new(
236                       _entry => $_,
237 5         242           feed => $feed
238                     );
239                   } @entries;
240               }
241              
242               return @feed_entries;
243             }
244              
245             =head2 sort_entries
246            
247             Called internally by L</run> and passed the list of entries from
248             L</select_entries>.
249            
250             Sort the given list of entries into created/modified order for aggregation,
251             and filters them if necessary.
252            
253 3     3 1 503 Takes a list of L<Perlanet::Entry>s, and returns an ordered list.
254 3         24
255             =cut
256 4   0     1100  
257 1   33     7 sub sort_entries {
      0        
258               my ($self, @entries) = @_;
259 3         795   my $day_zero = DateTime->from_epoch(epoch => 0);
260              
261               @entries = grep {
262                   ($_->issued || $_->modified || $day_zero) < $self->cutoff
263               } sort {
264                   ($b->modified || $b->issued || $day_zero)
265 3 50 33     1819           <=>
266 0         0       ($a->modified || $a->issued || $day_zero)
267               } @entries;
268              
269 3         31 # Only need so many entries
270               if ($self->entries && @entries > $self->entries) {
271                 $#entries = $self->entries - 1;
272               }
273              
274               return @entries;
275             }
276              
277             =head2 build_feed
278            
279             Called internally by L</run> and passed the list of entries from
280             L</sort_entries>.
281            
282             Takes a list of L<Perlanet::Entry>s, and returns a L<Perlanet::Feed>
283 3     3 1 581 that is the actual feed for the planet.
284            
285 3         45 =cut
286              
287 3         58 sub build_feed {
288 3 0       121   my ($self, @entries) = @_;
289 3 0       121  
290 3 0       123   my $self_url = $self->self_link;
291 3 0       86  
292 3 0       76   my $f = Perlanet::Feed->new( modified => DateTime->now );
293 3 0       73   $f->title($self->title) if defined $self->title;
294 3 0       76   $f->url($self->url) if defined $self->url;
295               $f->description($self->description) if defined $self->description;
296 3         57   $f->author($self->author->{name}) if defined $self->author->{name};
  3         30  
297               $f->email($self->author->{email}) if defined $self->author->{email};
298 3         65   $f->self_link($self->url) if defined $self->url;
299               $f->id($self->url) if defined $self->url;
300              
301               $f->add_entry($_) for @entries;
302              
303               return $f;
304             }
305              
306             =head2 clean_html
307            
308             Clean a HTML string so it is suitable for display.
309            
310 6     6 1 12 Takes a HTML string and returns a "cleaned" HTML string.
311 6         64
312             =cut
313              
314             sub clean_html {
315               my ($self, $entry) = @_;
316               return $entry;
317             }
318              
319             =head2 clean_entries
320            
321             Clean all entries for the planet.
322            
323             Takes a list of entries, runs them through C<clean> and returns a list of
324 3     3 1 472 cleaned entries.
325            
326 3         5 =cut
327              
328 3         7 sub clean_entries {
329 4 0       27   my ($self, @entries) = @_;
330 3         646  
331 3         22   my @clean_entries;
332              
333               foreach (@entries) {
334 4 0       1251     if (my $body = $_->content->body) {
335 3         370       my $cleaned = $self->clean_html($body);
336 3         23       $_->content->body($cleaned);
337                 }
338              
339 4         511     if (my $summary = $_->summary->body) {
340                   my $cleaned = $self->clean_html($summary);
341                   $_->summary->body($cleaned);
342 3         39     }
343              
344                 push @clean_entries, $_;
345               }
346              
347               return @clean_entries;
348             }
349              
350             =head2 render
351            
352             Called internally by L</run> and passed the feed from L</build_feed>.
353            
354             This is the hook where you generate some type of page to display the result
355             of aggregating feeds together (ie, inserting the posts into a database,
356             running a HTML templating library, etc)
357            
358 1     1 1 24 Takes a L<Perlanet::Feed> as input (as generated by L<build_feed>.
359            
360             =cut
361              
362             sub render {
363               my ($self, $feed) = @_;
364             }
365              
366             =head2 run
367            
368 1     1 1 3067 The main method which runs the perlanet process.
369            
370 1         2 =cut
  1         8  
371 1         6  
372 1         5 sub run {
373 1         4   my $self = shift;
374 1         12  
375               my @feeds = $self->fetch_feeds(@{$self->feeds});
376 1         14   my @selected = $self->select_entries(@feeds);
377               my @sorted = $self->sort_entries(@selected);
378               my @cleaned = $self->clean_entries(@sorted);
379               my $feed = $self->build_feed(@cleaned);
380              
381               $self->render($feed);
382             }
383              
384             =head1 TO DO
385            
386             See http://wiki.github.com/davorg/perlanet
387            
388             =head1 SUPPORT
389            
390             There is a mailing list which acts as both a place for developers to talk
391             about maintaining and improving Perlanet and also for users to get support.
392             You can sign up to this list at
393             L<http://lists.mag-sol.com/mailman/listinfo/perlanet>
394            
395             To report bugs in Perlanet, please use the CPAN request tracker. You can
396             either use the web page at
397             L<http://rt.cpan.org/Public/Bug/Report.html?Queue=Perlanet> or send an email
398             to bug-Perlanet@rt.cpan.org.
399            
400             =head1 SEE ALSO
401            
402             =over 4
403            
404             =item *
405            
406             L<perlanet>
407            
408             =item *
409            
410             L<Plagger>
411            
412             =back
413            
414             =head1 AUTHOR
415            
416             Dave Cross, <dave@mag-sol.com>
417            
418             =head1 COPYRIGHT AND LICENSE
419            
420             Copyright (c) 2010 by Magnum Solutions Ltd.
421            
422             This library is free software; you can redistribute it and/or modify
423             it under the same terms as Perl itself, either Perl version 5.10.0 or,
424             at your option, any later version of Perl 5 you may have available.
425            
426             =cut
427              
428             1;
429