<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"># -*- coding: utf-8 -*-

# Author: Franck Sajous
# Copyright: CNRS/CLLE-ERSS
# Last update: 2018-03-21
# See info and licence at: http://redac.univ-tlse2.fr/lexicons/glawi/tools/

use strict;
use warnings;
use utf8;

binmode (STDOUT, ":encoding(utf8)");
binmode (STDERR, ":encoding(utf8)");

if ( ($#ARGV &lt; 1) || ($#ARGV &gt; 2))
{
    print ("Usage: $0 GLAWIfile labelValueRegexp [outFile]\n");
    print "\tExtracts titles:\n";
    print "\t  - either from monosemic entries whose definition (gloss) includes a label matching the specified value.\n";
    die "\t  - or from polysemic entries whose all definitions (glosses) include a label matching the specified value.\n";
} # if ( ($#ARGV &lt; 1) || ($#ARGV &gt; 2))

my $glawiFile  = $ARGV[0];
my $labelValue = $ARGV[1];
my $outFile    = $ARGV[2];

open (INFILE, "&lt;", $glawiFile) or die ("Unable to read: $glawiFile\n");
binmode (INFILE, ":encoding(utf8)");

if (defined ($outFile))
{
    open (OUTFILE, "&gt;", $outFile) or die ("Unable to write: $outFile\n");
    binmode (OUTFILE, ":encoding(utf8)");
} # if (defined ($outFile))

my ($line, $currentTitle);
my ($inArticle, $inDefinitions, $inGloss) = (0, 0, 0);
my ($nbMatchingLabels, $nbSenses);

while ($line = &lt;INFILE&gt;)
{
    if ($line =~ /&lt;article&gt;/)
    {
	$inArticle = 1;
	&amp;parseLine ($'); #'
    } # if ($line =~ /&lt;article&gt;/)
    elsif ($inArticle)
    {
	if ($line =~ /&lt;\/article&gt;/)
	{
	    &amp;parseLine ($`);
	    $inArticle = 0;
	} # elsif ($line =~ /&lt;\/article&gt;/)
	else
	{
	    &amp;parseLine ($line);
	} # elsif ($line =~ /&lt;\/article&gt;/) else {
    } # elsif ($inArticle)
} # while (&lt;INFILE&gt;)

close (OUTFILE) if (defined ($outFile));
close (INFILE);

sub parseLine ()
{
    my $currentLine = shift;

    if ($currentLine =~ /&lt;title&gt;(.*?)&lt;\/title&gt;/)
    {
	$currentTitle = $1;
    } # if ($currentLine =~ /&lt;title&gt;(.*?)&lt;\/title&gt;/)
    elsif ($currentLine =~ /&lt;definitions&gt;/)
    {
	$nbSenses = 0;
	$nbMatchingLabels = 0;
    } # elsif ($currentLine =~ /&lt;gloss&gt;/)
    elsif ($currentLine =~ /&lt;\/definitions&gt;/)
    {
	if (($nbSenses &gt; 0) &amp;&amp; ($nbMatchingLabels &gt;= $nbSenses))
	{
	    if (defined ($outFile))
	    {
		print OUTFILE $currentTitle . "\n";
	    } # if (defined ($outFile))
	    else
	    {
		print $currentTitle . "\n";		
	    } # if (defined ($outFile)) else {
	} # if ($nbMatchingLabels &gt;= $nbSenses)
    } # elsif ($currentLine =~ /&lt;\/definitions&gt;/)
    elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    {
	my ($l, $r) = ($`, $'); #'
	&amp;parseLine ($l);
	$inGloss = 0;
	&amp;parseLine ($r);
    } # elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    elsif ($currentLine =~ /&lt;gloss&gt;/)
    {
	my ($l, $r) = ($`, $'); #'
	&amp;parseLine ($l);
	$inGloss = 1;
	$nbSenses++;
	&amp;parseLine ($r);
    } # elsif ($currentLine =~ /&lt;gloss&gt;/)
    elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    {
	my ($l, $r) = ($`, $'); #'
	&amp;parseLine ($l);
	$inGloss = 0;
	&amp;parseLine ($r);
    } # elsif ($currentLine =~ /&lt;\/gloss&gt;/)
    elsif ($inGloss)
    {
	if ($line =~ /&lt;label .*?value="([^"]*?)"/)
	{
	    my $currentValue = $1;
	    $nbMatchingLabels++ if ($currentValue =~ /$labelValue/i);
	} # if ($line =~ /&lt;label .*?value="([^"]*?)"/)
    } # elsif ($inGloss)
} # parseLine ()
</pre></body></html>