File: //proc/thread-self/root/usr/share/perl5/HTML/FormatText.pm
package HTML::FormatText;
# ABSTRACT: Format HTML as plaintext
use 5.006_001;
use strict;
use warnings;
# We now use Smart::Comments in place of the old DEBUG framework.
# this should be commented out in release versions....
##use Smart::Comments;
use base 'HTML::Formatter';
our $VERSION = '2.12'; # VERSION
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
# ------------------------------------------------------------------------
sub default_values {
    (   shift->SUPER::default_values(),
        lm => 3,     # left margin
        rm => 72,    # right margin (actually, maximum text width)
    );
}
# ------------------------------------------------------------------------
sub configure {
    my ( $self, $hash ) = @_;
    my $lm = $self->{lm};
    my $rm = $self->{rm};
    $lm = delete $hash->{lm}          if exists $hash->{lm};
    $lm = delete $hash->{leftmargin}  if exists $hash->{leftmargin};
    $rm = delete $hash->{rm}          if exists $hash->{rm};
    $rm = delete $hash->{rightmargin} if exists $hash->{rightmargin};
    my $width = $rm - $lm;
    if ( $width < 1 ) {
        warn "Bad margins, ignored" if $^W;
        return;
    }
    if ( $width < 20 ) {
        warn "Page probably too narrow" if $^W;
    }
    for ( keys %$hash ) {
        warn "Unknown configure option '$_'" if $^W;
    }
    $self->{lm} = $lm;
    $self->{rm} = $rm;
    $self;
}
# ------------------------------------------------------------------------
sub begin {
    my $self = shift;
    $self->SUPER::begin;
    $self->{curpos} = 0;    # current output position.
    $self->{maxpos} = 0;    # highest value of $pos (used by header underliner)
    $self->{hspace} = 0;    # horizontal space pending flag
}
# ------------------------------------------------------------------------
sub end {
    shift->collect("\n");
}
# ------------------------------------------------------------------------
sub header_start {
    my ( $self, $level ) = @_;
    $self->vspace( 1 + ( 6 - $level ) * 0.4 );
    $self->{maxpos} = 0;
    1;
}
# ------------------------------------------------------------------------
sub header_end {
    my ( $self, $level ) = @_;
    if ( $level <= 2 ) {
        my $line;
        $line = '=' if $level == 1;
        $line = '-' if $level == 2;
        $self->vspace(0);
        $self->out( $line x ( $self->{maxpos} - $self->{lm} ) );
    }
    $self->vspace(1);
    1;
}
# ------------------------------------------------------------------------
sub bullet {
    my $self = shift;
    $self->SUPER::bullet( $_[0] . ' ' );
}
# ------------------------------------------------------------------------
sub hr_start {
    my $self = shift;
    $self->vspace(1);
    $self->out( '-' x ( $self->{rm} - $self->{lm} ) );
    $self->vspace(1);
}
# ------------------------------------------------------------------------
sub pre_out {
    my $self = shift;
    # should really handle bold/italic etc.
    if ( defined $self->{vspace} ) {
        if ( $self->{out} ) {
            $self->nl() while $self->{vspace}-- >= 0;
            $self->{vspace} = undef;
        }
    }
    my $indent = ' ' x $self->{lm};
    my $pre    = shift;
    $pre =~ s/^/$indent/mg;
    $self->collect($pre);
    $self->{out}++;
}
# ------------------------------------------------------------------------
sub out {
    my $self = shift;
    my $text = shift;
    $text =~ tr/\xA0\xAD/ /d;
    if ( $text =~ /^\s*$/ ) {
        $self->{hspace} = 1;
        return;
    }
    if ( defined $self->{vspace} ) {
        if ( $self->{out} ) {
            $self->nl while $self->{vspace}-- >= 0;
        }
        $self->goto_lm;
        $self->{vspace} = undef;
        $self->{hspace} = 0;
    }
    if ( $self->{hspace} ) {
        if ( $self->{curpos} + length($text) > $self->{rm} ) {
            # word will not fit on line; do a line break
            $self->nl;
            $self->goto_lm;
        }
        else {
            # word fits on line; use a space
            $self->collect(' ');
            ++$self->{curpos};
        }
        $self->{hspace} = 0;
    }
    $self->collect($text);
    my $pos = $self->{curpos} += length $text;
    $self->{maxpos} = $pos if $self->{maxpos} < $pos;
    $self->{'out'}++;
}
# ------------------------------------------------------------------------
sub goto_lm {
    my $self = shift;
    my $pos = $self->{curpos};
    my $lm  = $self->{lm};
    if ( $pos < $lm ) {
        $self->{curpos} = $lm;
        $self->collect( " " x ( $lm - $pos ) );
    }
}
# ------------------------------------------------------------------------
sub nl {
    my $self = shift;
    $self->{'out'}++;
    $self->{curpos} = 0;
    $self->collect("\n");
}
# ------------------------------------------------------------------------
sub adjust_lm {
    my $self = shift;
    $self->{lm} += $_[0];
    $self->goto_lm;
}
# ------------------------------------------------------------------------
sub adjust_rm {
    shift->{rm} += $_[0];
}
1;
__END__
=pod
=for test_synopsis 1;
__END__
=for stopwords latin1 leftmargin lm plaintext rightmargin rm CPAN homepage
=head1 NAME
HTML::FormatText - Format HTML as plaintext
=head1 VERSION
version 2.12
=head1 SYNOPSIS
    use HTML::TreeBuilder;
    $tree = HTML::TreeBuilder->new->parse_file("test.html");
    use HTML::FormatText;
    $formatter = HTML::FormatText->new(leftmargin => 0, rightmargin => 50);
    print $formatter->format($tree);
or, more simply:
    use HTML::FormatText;
    my $string = HTML::FormatText->format_file(
        'test.html',
        leftmargin => 0, rightmargin => 50
        );
=head1 DESCRIPTION
HTML::FormatText is a formatter that outputs plain latin1 text. All character
attributes (bold/italic/underline) are ignored. Formatting of HTML tables and
forms is not implemented.
HTML::FormatText is built on L<HTML::Formatter> and documentation for that
module applies to this - especially L<HTML::Formatter/new>,
L<HTML::Formatter/format_file> and L<HTML::Formatter/format_string>.
You might specify the following parameters when constructing the formatter:
=over 4
=item I<leftmargin> (alias I<lm>)
The column of the left margin. The default is 3.
=item I<rightmargin> (alias I<rm>)
The column of the right margin. The default is 72.
=back
=head1 SEE ALSO
L<HTML::Formatter>
=head1 INSTALLATION
See perlmodinstall for information and options on installing Perl modules.
=head1 BUGS AND LIMITATIONS
You can make new bug reports, and view existing ones, through the
web interface at L<http://rt.cpan.org/Public/Dist/Display.html?Name=HTML-Format>.
=head1 AVAILABILITY
The project homepage is L<https://metacpan.org/release/HTML-Format>.
The latest version of this module is available from the Comprehensive Perl
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
site near you, or see L<https://metacpan.org/module/HTML::Format/>.
=head1 AUTHORS
=over 4
=item *
Nigel Metheringham <nigelm@cpan.org>
=item *
Sean M Burke <sburke@cpan.org>
=item *
Gisle Aas <gisle@ActiveState.com>
=back
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2015 by Nigel Metheringham, 2002-2005 Sean M Burke, 1999-2002 Gisle Aas.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut