de.dfki.lt.mary.dbselection
Class FeatureMakerMaryServer

java.lang.Object
  extended by de.dfki.lt.mary.dbselection.FeatureMakerMaryServer

public class FeatureMakerMaryServer
extends java.lang.Object

Takes text and converts to features Needs a running Mary server

Author:
Anna Hunecke

Field Summary
protected static java.lang.String basenamesOutFile
           
protected static java.lang.String doneFileName
           
protected static java.io.PrintWriter doneOut
           
protected static FeatureDefinition featDef
           
protected static java.lang.String featOutDirName
           
protected static java.util.Map index2sentences
           
protected static MaryClient mary
           
protected static java.lang.String maryHost
           
protected static java.lang.String maryPort
           
protected static int outDirIndex
           
protected static java.lang.StringBuffer sentence
           
protected static java.lang.String sentOutDirName
           
protected static boolean strictCredibility
           
protected static java.lang.String textFiles
           
protected static int timeOutAfter
           
protected static java.io.PrintWriter unreliableLog
           
protected static java.lang.String unreliableLogFile
           
protected static boolean usefulSentence
           
 
Constructor Summary
FeatureMakerMaryServer()
           
 
Method Summary
protected static boolean checkCredibility(org.w3c.dom.Element t)
          Phonemise the given document with the help of JPhonemiser
protected static void collectTokens(org.w3c.dom.Node nextToken)
          Collect the tokens of a sentence
protected static void dumpSentence(java.lang.String filename, java.lang.String sentence)
          Print the given sentence to the given file
protected static void getFeatures(java.lang.String filename, MaryData d)
          Process the target features and print them to the given file
protected static void getXMLAsString(org.w3c.dom.Node motherNode, java.lang.StringBuffer ppText)
          Convert the given xml-node and its subnodes to Strings and collect them in the given Stringbuffer
static void main(java.lang.String[] args)
           
protected static org.w3c.dom.Document phonemiseText(java.lang.String textString)
          Process the given text with the MaryClient from Text to Chunked
protected static void printUsage()
          Print usage of this program
protected static MaryData processSentence(java.lang.String nextSentence, java.lang.String filename)
          Process one sentences from text to target features
protected static boolean readArgs(java.lang.String[] args)
          Read and parse the command line args
protected static java.util.List readInDoneFiles(java.lang.String doneFilesTextName)
          Read the list of already processed files
protected static boolean splitIntoSentences(java.lang.String text, java.lang.String filename)
          Split the content of the file into separate sentences
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

mary

protected static MaryClient mary

sentence

protected static java.lang.StringBuffer sentence

usefulSentence

protected static boolean usefulSentence

index2sentences

protected static java.util.Map index2sentences

featDef

protected static FeatureDefinition featDef

doneOut

protected static java.io.PrintWriter doneOut

unreliableLog

protected static java.io.PrintWriter unreliableLog

textFiles

protected static java.lang.String textFiles

featOutDirName

protected static java.lang.String featOutDirName

sentOutDirName

protected static java.lang.String sentOutDirName

doneFileName

protected static java.lang.String doneFileName

maryHost

protected static java.lang.String maryHost

maryPort

protected static java.lang.String maryPort

timeOutAfter

protected static int timeOutAfter

unreliableLogFile

protected static java.lang.String unreliableLogFile

strictCredibility

protected static boolean strictCredibility

basenamesOutFile

protected static java.lang.String basenamesOutFile

outDirIndex

protected static int outDirIndex
Constructor Detail

FeatureMakerMaryServer

public FeatureMakerMaryServer()
Method Detail

main

public static void main(java.lang.String[] args)
                 throws java.lang.Exception
Throws:
java.lang.Exception

printUsage

protected static void printUsage()
Print usage of this program


readArgs

protected static boolean readArgs(java.lang.String[] args)
Read and parse the command line args

Parameters:
args - the args
Returns:
true, if successful, false otherwise

processSentence

protected static MaryData processSentence(java.lang.String nextSentence,
                                          java.lang.String filename)
Process one sentences from text to target features

Parameters:
nextSentence - the sentence
filename - the file containing the sentence
Returns:
the result of the processing as MaryData object

checkCredibility

protected static boolean checkCredibility(org.w3c.dom.Element t)
Phonemise the given document with the help of JPhonemiser

Parameters:
d -
Returns:

getFeatures

protected static void getFeatures(java.lang.String filename,
                                  MaryData d)
                           throws java.lang.Exception
Process the target features and print them to the given file

Parameters:
filename - the file to print the features to
d - the target features as Mary Data object
Throws:
java.lang.Exception

dumpSentence

protected static void dumpSentence(java.lang.String filename,
                                   java.lang.String sentence)
                            throws java.lang.Exception
Print the given sentence to the given file

Parameters:
filename - the file
sentence - the sentence
Throws:
java.lang.Exception

readInDoneFiles

protected static java.util.List readInDoneFiles(java.lang.String doneFilesTextName)
                                         throws java.lang.Exception
Read the list of already processed files

Parameters:
doneDirsTextName - the file to read from
Returns:
the list of already processed files
Throws:
java.lang.Exception

splitIntoSentences

protected static boolean splitIntoSentences(java.lang.String text,
                                            java.lang.String filename)
                                     throws java.lang.Exception
Split the content of the file into separate sentences

Parameters:
file - the file
Returns:
true, if successful
Throws:
java.lang.Exception

phonemiseText

protected static org.w3c.dom.Document phonemiseText(java.lang.String textString)
                                             throws java.lang.Exception
Process the given text with the MaryClient from Text to Chunked

Parameters:
textString - the text to process
Returns:
the resulting XML-Document
Throws:
java.lang.Exception

collectTokens

protected static void collectTokens(org.w3c.dom.Node nextToken)
Collect the tokens of a sentence

Parameters:
nextToken - the Node to start from

getXMLAsString

protected static void getXMLAsString(org.w3c.dom.Node motherNode,
                                     java.lang.StringBuffer ppText)
Convert the given xml-node and its subnodes to Strings and collect them in the given Stringbuffer

Parameters:
motherNode - the xml-node
ppText - the Stringbuffer