| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Perlanet::Trait::Scrubber; |
|
2
|
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
2190
|
use strict; |
|
|
6
|
|
|
|
|
9
|
|
|
|
6
|
|
|
|
|
155
|
|
|
4
|
6
|
|
|
6
|
|
23
|
use warnings; |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
195
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
6
|
|
|
6
|
|
23
|
use Moose::Role; |
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
57
|
|
|
7
|
6
|
|
|
6
|
|
16081
|
use namespace::autoclean; |
|
|
6
|
|
|
|
|
13
|
|
|
|
6
|
|
|
|
|
47
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
6
|
|
|
6
|
|
2016
|
use HTML::Scrubber; |
|
|
6
|
|
|
|
|
12470
|
|
|
|
6
|
|
|
|
|
1726
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Perlanet::Trait::Scrubber - clean posts with HTML::Scrubber before aggregating |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Before adding a post to the aggregated feed, it will first be cleaned with |
|
18
|
|
|
|
|
|
|
L<HTML::Scrubber>. |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 scrubber |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
An instance of L<HTML::Scrubber> used to remove unwanted content from |
|
25
|
|
|
|
|
|
|
the feed entries. For default settings see source of Perlanet.pm. |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=cut |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has 'scrubber' => ( |
|
30
|
|
|
|
|
|
|
is => 'rw', |
|
31
|
|
|
|
|
|
|
lazy_build => 1 |
|
32
|
|
|
|
|
|
|
); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub _build_scrubber { |
|
35
|
3
|
|
|
3
|
|
36
|
my $self = shift; |
|
36
|
|
|
|
|
|
|
|
|
37
|
3
|
|
|
|
|
31
|
my %scrub_rules = ( |
|
38
|
|
|
|
|
|
|
img => { |
|
39
|
|
|
|
|
|
|
src => qr{^http://}, |
|
40
|
|
|
|
|
|
|
alt => 1, |
|
41
|
|
|
|
|
|
|
align => 1, |
|
42
|
|
|
|
|
|
|
style => 1, |
|
43
|
|
|
|
|
|
|
'*' => 0, |
|
44
|
|
|
|
|
|
|
}, |
|
45
|
|
|
|
|
|
|
style => 0, |
|
46
|
|
|
|
|
|
|
script => 0, |
|
47
|
|
|
|
|
|
|
span => { |
|
48
|
|
|
|
|
|
|
id => 0, |
|
49
|
|
|
|
|
|
|
}, |
|
50
|
|
|
|
|
|
|
a => { |
|
51
|
|
|
|
|
|
|
href => 1, |
|
52
|
|
|
|
|
|
|
'*' => 0, |
|
53
|
|
|
|
|
|
|
}, |
|
54
|
|
|
|
|
|
|
); |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
|
57
|
3
|
|
|
|
|
39
|
my %scrub_def = ( |
|
58
|
|
|
|
|
|
|
'*' => 1, |
|
59
|
|
|
|
|
|
|
'href' => qr{^(?!(?:java)?script)}i, |
|
60
|
|
|
|
|
|
|
'src' => qr{^(?!(?:java)?script)}i, |
|
61
|
|
|
|
|
|
|
'cite' => '(?i-xsm:^(?!(?:java)?script))', |
|
62
|
|
|
|
|
|
|
'language' => 0, |
|
63
|
|
|
|
|
|
|
'name' => 1, |
|
64
|
|
|
|
|
|
|
'value' => 1, |
|
65
|
|
|
|
|
|
|
'onblur' => 0, |
|
66
|
|
|
|
|
|
|
'onchange' => 0, |
|
67
|
|
|
|
|
|
|
'onclick' => 0, |
|
68
|
|
|
|
|
|
|
'ondblclick' => 0, |
|
69
|
|
|
|
|
|
|
'onerror' => 0, |
|
70
|
|
|
|
|
|
|
'onfocus' => 0, |
|
71
|
|
|
|
|
|
|
'onkeydown' => 0, |
|
72
|
|
|
|
|
|
|
'onkeypress' => 0, |
|
73
|
|
|
|
|
|
|
'onkeyup' => 0, |
|
74
|
|
|
|
|
|
|
'onload' => 0, |
|
75
|
|
|
|
|
|
|
'onmousedown' => 0, |
|
76
|
|
|
|
|
|
|
'onmousemove' => 0, |
|
77
|
|
|
|
|
|
|
'onmouseout' => 0, |
|
78
|
|
|
|
|
|
|
'onmouseover' => 0, |
|
79
|
|
|
|
|
|
|
'onmouseup' => 0, |
|
80
|
|
|
|
|
|
|
'onreset' => 0, |
|
81
|
|
|
|
|
|
|
'onselect' => 0, |
|
82
|
|
|
|
|
|
|
'onsubmit' => 0, |
|
83
|
|
|
|
|
|
|
'onunload' => 0, |
|
84
|
|
|
|
|
|
|
'src' => 1, |
|
85
|
|
|
|
|
|
|
'type' => 1, |
|
86
|
|
|
|
|
|
|
'style' => 1, |
|
87
|
|
|
|
|
|
|
'class' => 0, |
|
88
|
|
|
|
|
|
|
'id' => 0, |
|
89
|
|
|
|
|
|
|
); |
|
90
|
|
|
|
|
|
|
|
|
91
|
3
|
|
|
|
|
23
|
my $scrub = HTML::Scrubber->new; |
|
92
|
3
|
|
|
|
|
281
|
$scrub->rules(%scrub_rules); |
|
93
|
3
|
|
|
|
|
59
|
$scrub->default(1, \%scrub_def); |
|
94
|
|
|
|
|
|
|
|
|
95
|
3
|
|
|
|
|
37
|
return $scrub; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
around 'clean_html' => sub { |
|
99
|
|
|
|
|
|
|
my $orig = shift; |
|
100
|
|
|
|
|
|
|
my ($self, $html) = @_; |
|
101
|
|
|
|
|
|
|
$html = $self->$orig($html); |
|
102
|
|
|
|
|
|
|
my $scrubbed = $self->scrubber->scrub($html); |
|
103
|
|
|
|
|
|
|
return $html; |
|
104
|
|
|
|
|
|
|
}; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 AUTHOR |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Dave Cross, <dave@mag-sol.com> |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Copyright (c) 2010 by Magnum Solutions Ltd. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
115
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
|
116
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
1; |
|
121
|
|
|
|
|
|
|
|