<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># -*- coding: utf-8 -*-

# Author: Franck Sajous
# Copyright: CNRS/CLLE-ERSS
# Last update: 2018-09-04
# See info and licence at: http://redac.univ-tlse2.fr/lexicons/glawi/tools/

use strict;
use warnings;
use utf8;

binmode (STDOUT, ":encoding(utf8)");
binmode (STDERR, ":encoding(utf8)");

if ( ($#ARGV &lt; 1) || ($#ARGV &gt; 2))
{
    print ("Usage: $0 GLAWIfile labelValueRegexp [outFile]\n");
    die  ("\tExtracts article's gloss having a definition including a label whose value matches the specified one.\n");
} # if ( ($#ARGV &lt; 1) || ($#ARGV &gt; 2))

my $glawiFile  = $ARGV[0];
my $labelValue = $ARGV[1];
my $outFile    = $ARGV[2];

open (INFILE, "&lt;", $glawiFile) or die ("Unable to read: $glawiFile\n");
binmode (INFILE, ":encoding(utf8)");

if (defined ($outFile))
{
    open (OUTFILE, "&gt;", $outFile) or die ("Unable to write: $outFile\n");
    binmode (OUTFILE, ":encoding(utf8)");
} # if (defined ($outFile))

my ($line, $matches, $currentTitle);
my $currentBuffer = "";
my ($inArticle, $inGloss, $inText) = (0, 0, 0);

while ($line = &lt;INFILE&gt;)
{
    if ($line =~ /&lt;article&gt;/)
    {
	$inArticle = 1;
	&amp;parseLine ($'); #'
    } # if ($line =~ /&lt;article&gt;/)
    elsif ($inArticle)
    {
	if ($line =~ /&lt;\/article&gt;/)
	{
	    &amp;parseLine ($`);
	    $inArticle = 0;

	} # elsif ($line =~ /&lt;\/article&gt;/)
	else
	{
	    &amp;parseLine ($line);
	} # elsif ($line =~ /&lt;\/article&gt;/) else {
    } # elsif ($inArticle)
} # while (&lt;INFILE&gt;)

close (OUTFILE) if (defined ($outFile));
close (INFILE);

sub parseLine ()
{
    my $currentLine = shift;

    if ($currentLine =~ /&lt;title&gt;(.*?)&lt;\/title&gt;/)
    {
	$currentTitle = $1;
    }
    elsif ($currentLine =~ /&lt;gloss&gt;/)
    {
	my ($l, $r) = ($`, $'); #'
	&amp;parseLine ($l);
	$inGloss = 1;
	$matches = 0;
	&amp;parseLine ($r);
    } # if ($currentLine =~ /&lt;gloss&gt;/)
    elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    {
	my ($l, $r) = ($`, $'); #'
	&amp;parseLine ($l);
	$inGloss = 0;
	&amp;parseLine ($r);
    } # elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    elsif ($inGloss)
    {
	if ($currentLine =~ /&lt;label .*?value="([^"]*?)"/)
	{
	    my $currentValue = $1;
	    $matches = $matches || ($currentValue =~ /$labelValue/i);
	} # if ($currentLine =~ /&lt;label .*?value="([^"]*?)"/)
	elsif ($currentLine =~ /&lt;txt&gt;/)
	{
	    my ($l, $r) = ($`, $'); #'
			   #print "***" . $r . "---\n";
	    &amp;parseLine ($l);
	    $inText = 1;
	    $currentBuffer = "";
	    &amp;parseLine ($r);
	} # elsif ($currentLine =~ /&lt;txt[^&lt;&gt;]*&gt;/)
	elsif ($inText)
	{
	    if ($currentLine =~ /&lt;\/txt&gt;/)
	    {
		my ($l, $r) = ($`, $'); #'
		&amp;parseLine ($l);
		$inText = 0;

		if ($matches)
		{
		    if (defined ($outFile))
		    {
			print OUTFILE $currentTitle . "\t" . $currentBuffer . "\n";
		    } # if (defined ($outFile))
		    else
		    {
			print $currentTitle . "\t" . $currentBuffer . "\n";
		    } # if (defined ($outFile)) else {
		} # if ($matches)

		&amp;parseLine ($r);
	    } # elsif ($currentLine =~ /&lt;\/txt&gt;/)
	    else
	    {
		$currentBuffer .= $currentLine;
	    } # if ($currentLine =~ /&lt;\/txt&gt;/) else {
	} # elsif ($inText)	
    } # elsif ($inGloss)
} # parseLine ()
</pre></body></html>