001    // This is the file IngredientAmountParser.java.
002    // Copyright 2001, 2002 by Douglas Squirrel.
003    // This file is part of LargoRecipes.
004    // You can write to the author, Douglas Squirrel, at this email address:
005    // dsquirrel@excite.com
006    //
007    // LargoRecipes is free software; you can redistribute it and/or modify
008    // it under the terms of the GNU General Public License as published by
009    // the Free Software Foundation; either version 2 of the License, or
010    // (at your option) any later version.
011    //
012    // LargoRecipes is distributed in the hope that it will be useful,
013    // but WITHOUT ANY WARRANTY; without even the implied warranty of
014    // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015    // GNU General Public License for more details.
016    //
017    // You should have received a copy of the GNU General Public License
018    // along with LargoRecipes; if not, write to the Free Software
019    // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
020    //
021    // LargoRecipes uses RecipeML, a public XML format for describing recipes.
022    // You can read about RecipeML on the website of its copyright holder, 
023    // FormatData: www.formatdata.com. LargoRecipes is not endorsed by 
024    // FormatData. Distribution of LargoRecipes is governed by the RecipeML 
025    // Public License, which you can read in the file license.txt (or by 
026    // choosing About from the LargoRecipes menu when you run LargoRecipes). The
027    // RecipeML Public License requires, among other things, that LargoRecipes
028    // source files bear the notices below; please see the license for details. 
029    // Modified versions of LargoRecipes that you distribute are required to 
030    // bear these notices if they use RecipeML.
031    //
032    // This product is RecipeML compatible. It is RecipeML Processing Software 
033    // as defined by the RecipeML Public License. RecipeML is copyright (c) 
034    // FormatData, all rights reserved. Distribution of RecipeML Processing 
035    // Software in source and/or binary forms is permitted provided that the 
036    // following conditions are met:
037    //
038    // 1. Distributions in source code must retain the above copyright notice 
039    // and this list of conditions.
040    // 2. Distributions in binary form must reproduce the above copyright notice 
041    // and this list of conditions in the documentation and/or other materials 
042    // provided with the distribution.
043    // 3. All advertising materials and documentation for RecipeML Processing 
044    // Software must display the following acknowledgment: "This product is 
045    // RecipeML compatible."
046    // 4. Names associated with RecipeML or FormatData must not be used to 
047    // endorse or promote RecipeML Processing Software without prior written 
048    // permission from FormatData. For written permission, please contact 
049    // RecipeML@formatdata.com. 
050    //
051    import com.largoguidedog.Exceptions.UnrecoverableException;
052    import com.largoguidedog.RecipeEngine.Measure.Measure;
053    import java.util.Stack;
054    import java.util.StringTokenizer;
055    import java.util.regex.Matcher;
056    import java.util.regex.Pattern;
057    /**
058     *  Parses an input string into components of an <code>IngredientAmount</code>.
059     *  Holds six components, which appear in this order in the string.
060     *  <UL>
061     *    <LI>number
062     *    <LI>measure adjective [optional]
063     *    <LI>measure [optional]
064     *    <LI>ingredient adjective [optional]
065     *    <LI>ingredient
066     *    <LI>end adjective [optional]
067     *  </UL>
068     *  All are held and returned as strings. Client methods are responsible for converting
069     *  quantity, measure, and ingredient strings into numeric values, 
070     *  <code>Measures</code>, and <code>Ingredients</code> respectively. The class does 
071     *  <strong>not</strong> guarantee that these strings will in fact be convertible into
072     *  the respective objects - if any are not, this indicates an error in parsing. 
073     *  
074     *  @since LargoRecipes 0.8
075     *  @author Douglas Squirrel
076     */
077    public class IngredientAmountParser {
078        /**
079         *  <code>Parser</code> that processes the quantity.
080         *  
081         *  @since LargoRecipes 0.8
082         */
083        private ElementParser quantityParser;
084    
085        /**
086         *  <code>Parser</code> that processes the measure adjective.
087         *  
088         *  @since LargoRecipes 0.8
089         */
090        private ElementParser measureAdjectiveParser;
091    
092        /**
093         *  <code>Parser</code> that processes the measure.
094         *  
095         *  @since LargoRecipes 0.8
096         */
097        private ElementParser measureParser;
098    
099        /**
100         *  <code>Parser</code> that processes the ingredient adjective.
101         *  
102         *  @since LargoRecipes 0.8
103         */
104        private ElementParser ingredientAdjectiveParser;
105    
106        /**
107         *  <code>Parser</code> that processes the ingredient.
108         *  
109         *  @since LargoRecipes 0.8
110         */
111        private ElementParser ingredientParser;
112    
113        /**
114         *  <code>Parser</code> that processes the end adjective.
115         *  
116         *  @since LargoRecipes 0.8
117         */
118        private ElementParser endAdjectiveParser;
119        
120        /**
121         *  The <code>Parser</code> that is currently in use.
122         *  
123         *  @since LargoRecipes 0.8
124         */
125        private ElementParser parser;
126        
127        /**
128         *  Construct a new <code>IngredientAmountParser</code> that
129         *  is ready to parse a newly entered string.
130         *  
131         *  @since LargoRecipes 0.8
132         */
133        public IngredientAmountParser() {
134            quantityParser = new QuantityParser();
135            measureAdjectiveParser = new MeasureAdjectiveParser();
136            measureParser = new MeasureParser();
137            ingredientAdjectiveParser = new IngredientAdjectiveParser();
138            ingredientParser = new IngredientParser();
139            endAdjectiveParser = new EndAdjectiveParser();
140            parser = endAdjectiveParser;
141        }
142        
143        /**
144         *  Parse the given <code>String</code>. Upon return, the
145         *  various <code>get...()</code> methods return the component
146         *  strings found during parsing.
147         *
148         *  @param input The input <code>String</code> to parse. 
149         *  
150         *  @throws UnrecoverableException  if an error condition prevents completion of this task, and no
151         *                                  recovery is possible. Client code should abort processing and report
152         *                                  the error.
153         *  
154         *  @since LargoRecipes 0.8
155         */
156        public void parse(String input) throws UnrecoverableException {       
157            Stack stack = transformInput(input);
158            
159            while (false == stack.isEmpty()) {
160                parser.parse(stack);
161            }
162            
163            if (0 == getMeasure().length()) {
164                Stack unitStack = new Stack();
165                unitStack.push("unit");
166                measureParser.parse(unitStack);
167            }
168        }
169        
170        /**
171         *  Get the quantity string.
172         *
173         *  @return The quantity <code>String</code>.
174         *  
175         *  @since LargoRecipes 0.8
176         */
177        public String getNumber() {
178            return quantityParser.getValue();
179        }
180    
181        /**
182         *  Get the measure adjective string.
183         *
184         *  @return The measure adjective <code>String</code>.
185         *  
186         *  @since LargoRecipes 0.8
187         */    
188        public String getMeasureAdjective() {
189            return measureAdjectiveParser.getValue();
190        }
191    
192        /**
193         *  Get the measure string.
194         *
195         *  @return The measure <code>String</code>.
196         *  
197         *  @since LargoRecipes 0.8
198         */    
199        public String getMeasure() {
200            return measureParser.getValue();
201        }
202        
203        /**
204         *  Get the ingredient adjective string.
205         *
206         *  @return The ingredient adjective <code>String</code>.
207         *  
208         *  @since LargoRecipes 0.8
209         */    
210        public String getIngredientAdjective() {
211            return ingredientAdjectiveParser.getValue();
212        }
213    
214        /**
215         *  Get the ingredient string.
216         *
217         *  @return The ingredient <code>String</code>.
218         *   
219         *  @since LargoRecipes 0.8
220         */
221        public String getIngredient() {
222            return ingredientParser.getValue();
223        }
224    
225        /**
226         *  Get the end adjective string.
227         *
228         *  @return The end adjective <code>String</code>.
229         *  
230         *  @since LargoRecipes 0.8
231         */
232        public String getEndAdjective() {
233            return endAdjectiveParser.getValue();
234        }
235        
236        private static Pattern fixSeparatorPattern = Pattern.compile("(.*\\w)(,|;|:)(\\w.*)");
237        private static Pattern fixSlashPattern = Pattern.compile("(.*[a-zA-Z])/([a-zA-Z].*)");
238        
239        /**
240         *  Makes parsing easier by reversing the order of words.
241         *  For example, "1 1/2 cups brown sugar" becomes "sugar brown cups 1/2 1". This is 
242         *  easier to parse because the states come in a predictable order - in the normal
243         *  word order, you have to do something special with the first adjective, because you
244         *  don't know if it is the measure or ingredient adjective until later. Also fixes
245         *  missed spaces after separators and replaces slashes: <code>1 teaspoon beans,chopped</code> becomes
246         *  <code>1 teaspoon beans, chopped</code> and <code>1 teaspoon butter/margarine</code> becomes
247         *  <code>1 teaspoon butter or margarine</code>.
248         *
249         *  @param input The <code>String</code> to transform.
250         *
251         *  @return <code>Stack</code> of tokens in reverse order, so the first item you <code>pop</code> is the
252         *          last token of the input string
253         *  
254         *  @since LargoRecipes 0.8
255         */
256        private Stack transformInput(String input) {
257            Matcher sepMatcher = fixSeparatorPattern.matcher(input);
258            if (true == sepMatcher.matches()) {
259                input = sepMatcher.group(1) + sepMatcher.group(2) + " " + sepMatcher.group(3);
260            }
261            Matcher slashMatcher = fixSlashPattern.matcher(input);
262            if (true == slashMatcher.matches()) {
263                input = slashMatcher.group(1) + " or " + slashMatcher.group(2);
264            }
265       
266            StringTokenizer z = new StringTokenizer(input, " ");
267            Stack stack = new Stack();
268            while (z.hasMoreTokens()) {
269                stack.push(z.nextToken());
270            }
271            return stack;
272        }
273           
274        /**
275         *  Parser for one of the six elements of an ingredient amount.
276         *  The six subclasses each handle one element.
277         *  
278         *  @since LargoRecipes 0.8
279         *  @author Douglas Squirrel
280         */
281        private abstract class ElementParser {
282            /**
283             *  The current value of the element being parsed.
284             *  
285             *  @since LargoRecipes 0.8
286             */
287            protected String value = "";
288            
289            /**
290             *  Private access constructor ensures no instances of this class
291             *  can be constructed.
292             *  
293             *  @since LargoRecipes 0.8
294             */
295            private ElementParser() {}
296            
297            /**
298             *  Pop the next token off the given <code>Stack</code> and parse it. If that token is not part of this parser's
299             *  element, change the parser to the appropriate parser for that token
300             *  and call <code>parse</code> on the new parser. May manipulate the stack, for instance to
301             *  "push back" tokens.
302             *
303             *  @param tokenStack The <code>Stack</code> of tokens to process.
304             *  
305             *  @throws UnrecoverableException  if an error condition prevents completion of this task, and no
306             *                                  recovery is possible. Client code should abort processing and report
307             *                                  the error.
308             *  
309             *  @since LargoRecipes 0.8
310             */
311            public abstract void parse(Stack tokenStack) throws UnrecoverableException;
312            
313            /**
314             *  Get the value stored in this parser.
315             *
316             *  @return The value of the element this parser parses.
317             *  
318             *  @since LargoRecipes 0.8
319             */
320            public String getValue() {
321                return value;
322            }
323            
324            /**
325             *  Utility method that strips a trailing asterisk, comma, semicolon, or full stop from a token, if it is 
326             *  present. An asterisk followed by a comma, semicolon, or full stop at the end of the token is also
327             *  removed. For example, both "butter," and "butter*:" become "butter".
328             *
329             *  @param  token  The token to strip.
330             *
331             *  @return  The token minus the trailing comma or full stop, if one exists, or just the token
332             *           itself if there is no trailing punctuation.
333             *  
334             *  @since LargoRecipes 0.8
335             */
336            protected String stripToken(String token) {
337                return token.replaceAll("(\\*)?(\\.|,|;|:|\\*)$", "");
338            }
339        }
340        
341        /**
342         *  Class that parses the quantity element.
343         *  
344         *  @since LargoRecipes 0.8
345         *  @author Douglas Squirrel
346         */
347        private class QuantityParser extends ElementParser {
348            /**
349             *  Construct a new <code>QuantityParser</code>.
350             *  
351             *  @since LargoRecipes 0.8
352             */
353            private QuantityParser() {}
354            
355            public void parse(Stack tokenStack) throws UnrecoverableException {
356                if (true == tokenStack.empty()) {
357                    return;
358                }
359                String token = (String) tokenStack.pop();
360                value = appendToken(stripToken(token), value);
361            }
362        }
363        
364        /**
365         *  Class that parses the end adjective element.
366         *  
367         *  @since LargoRecipes 0.8
368         *  @author Douglas Squirrel
369         */    
370        private class EndAdjectiveParser extends ElementParser {
371            /**
372             *  Construct a new <code>EndAdjectiveParser</code>.
373             *  
374             *  @since LargoRecipes 0.8
375             */
376            private EndAdjectiveParser() {}
377            
378            public void parse(Stack tokenStack) throws UnrecoverableException {
379                if (true == tokenStack.empty()) {
380                    return;
381                }
382                String token = (String) tokenStack.peek();
383                String regex = "\\b\\Q" + stripToken(token).toUpperCase() + "\\E$";
384                if (true == Ingredient.find(regex)) {
385                    if (true == token.endsWith(",")) {
386                        value = "," + value;
387                    }
388                    parser = ingredientParser;
389                    parser.parse(tokenStack);
390                } else {
391                    tokenStack.pop();
392                    value = appendToken(token, value);
393                }
394            }
395        }
396    
397        /**
398         *  Class that parses the ingredient element.
399         *  
400         *  @since LargoRecipes 0.8
401         *  @author Douglas Squirrel
402         */     
403        private class IngredientParser extends ElementParser {
404            /**
405             *  Construct a new <code>IngredientParser</code>.
406             *  
407             *  @since LargoRecipes 0.8
408             */
409            private IngredientParser() {}
410            
411            public void parse(Stack tokenStack) throws UnrecoverableException {
412                if (true == tokenStack.empty()) {
413                    return;
414                }
415                String token = (String) tokenStack.peek();
416                if (true == isNumber(token))
417                { 
418                    parser = quantityParser;
419                    parser.parse(tokenStack);
420                    return;
421                }
422                
423                String newIngredient = appendToken(stripToken(token), value);
424                if (true == Ingredient.find("\\b\\Q" + newIngredient.toUpperCase() + "\\E$")) 
425                {
426                    tokenStack.pop();
427                    value = newIngredient;
428                    return;
429                }
430                
431                if (null != Ingredient.retrieveByName(value)) {
432                    parser = ingredientAdjectiveParser;
433                    parser.parse(tokenStack);
434                } else {
435                    StringTokenizer z = new StringTokenizer(value, " ");
436                    while (z.hasMoreTokens()) {
437                        tokenStack.push(z.nextToken());
438                    }
439                    String endToken = (String) tokenStack.pop();
440                    value = "";
441                    parser = endAdjectiveParser;
442                    parser.value = appendToken(endToken, parser.value);
443                    parser.parse(tokenStack);
444                }
445            }
446        }
447    
448        /**
449         *  Class that parses the ingredient adjective element.
450         *  
451         *  @since LargoRecipes 0.8
452         *  @author Douglas Squirrel
453         */     
454        private class IngredientAdjectiveParser extends ElementParser {
455            /**
456             *  Construct a new <code>IngredientAdjectiveParser</code>.
457             *  
458             *  @since LargoRecipes 0.8
459             */
460            private IngredientAdjectiveParser() {}
461            
462            public void parse(Stack tokenStack) throws UnrecoverableException {
463                if (true == tokenStack.empty()) {
464                    return;
465                }
466                String token = (String) tokenStack.peek();
467                if (true == isNumber(token))
468                { 
469                    parser = quantityParser;
470                    parser.parse(tokenStack);
471                    return;
472                }
473                
474                if (true == Measure.find(stripToken(token))) {
475                    parser = measureParser;
476                    parser.parse(tokenStack);
477                } else {
478                    tokenStack.pop();
479                    value = appendToken(token, value);
480                }
481            }
482        }
483    
484        /**
485         *  Class that parses the measure element.
486         *  
487         *  @since LargoRecipes 0.8
488         *  @author Douglas Squirrel
489         */     
490        private class MeasureParser extends ElementParser {
491            /**
492             *  Construct a new <code>MeasureParser</code>.
493             *  
494             *  @since LargoRecipes 0.8
495             */
496            private MeasureParser() {}
497            
498            public void parse(Stack tokenStack) throws UnrecoverableException {
499                if (true == tokenStack.empty()) {
500                    return;
501                }
502                String token = (String) tokenStack.peek();
503                if (true == isNumber(token))
504                { 
505                    parser = quantityParser;
506                    parser.parse(tokenStack);
507                    return;
508                }
509                
510                String newMeasure = appendToken(stripToken(token), value);
511                if (false == Measure.find(newMeasure)) {
512                    parser = measureAdjectiveParser;
513                    parser.parse(tokenStack);
514                } else {
515                    tokenStack.pop();
516                    value = newMeasure;
517                }
518            }
519        }
520    
521        /**
522         *  Class that parses the measure adjective element.
523         *  
524         *  @since LargoRecipes 0.8
525         *  @author Douglas Squirrel
526         */     
527        private class MeasureAdjectiveParser extends ElementParser {
528            /**
529             *  Construct a new <code>MeasureAdjectiveParser</code>.
530             *  
531             *  @since LargoRecipes 0.8
532             */
533            private MeasureAdjectiveParser() {}
534            
535            public void parse(Stack tokenStack) throws UnrecoverableException {
536                if (true == tokenStack.empty()) {
537                    return;
538                }
539                String token = (String) tokenStack.peek();
540                if (true == isNumber(token)) {
541                    parser = quantityParser;
542                    parser.parse(tokenStack);
543                } else {
544                    tokenStack.pop();
545                    value = appendToken(token, value);
546                }
547            }
548        }
549         
550        /**
551         *  Get a string representation of this <code>IngredientAmountParser</code>.
552         *
553         *  @return The desired <code>String</code> representation.
554         *  
555         *  @since LargoRecipes 0.8
556         */   
557        public String toString() {
558            return getNumber()              + "/" 
559                 + getMeasureAdjective()    + "/"
560                 + getMeasure()             + "/"
561                 + getIngredientAdjective() + "/"
562                 + getIngredient()          + "/"
563                 + getEndAdjective();
564        }
565        
566        /**
567         *  Utility function that determines if the given token could be a number.
568         *  Checks the first character and concludes that the token is a number if
569         *  that character is a digit or a full stop.
570         *
571         *  @param  token  The token to check.
572         *
573         *  @return <code>true</code> if the token could be a number according to the
574         *          above test; <code>false</code> if not.
575         *  
576         *  @since LargoRecipes 0.8
577         */
578        private boolean isNumber(String token) {
579            if (true == token.matches("(\\d|\\.)(\\d|\\.|/)*")) {
580                return true;
581            } else {
582                return false;
583            }
584        }
585        
586        /**
587         *  Utility function that concatenates two strings, separated by a space.
588         *  If one or the other is empty, returns the non-empty string. If both
589         *  are empty, returns an empty string.
590         *
591         *  @param s  The first string.
592         *  @param t  The second string.
593         *
594         *  @return <code>s + " " + t</code>, unless <code>s</code> or </code>t</code>
595         *          or both is empty, in which case returns <code>s + t</code>.
596         *  
597         *  @since LargoRecipes 0.8
598         */
599        private String appendToken(String s, String t) {
600            if (0 == s.length()) {
601                return t;
602            }
603            if (0 == t.length()) {
604                return s;
605            }
606            return (s + " " + t);
607        }
608    }
609    
610    
611