001 // This is the file IngredientAmountParser.java.
002 // Copyright 2001, 2002 by Douglas Squirrel.
003 // This file is part of LargoRecipes.
004 // You can write to the author, Douglas Squirrel, at this email address:
005 // dsquirrel@excite.com
006 //
007 // LargoRecipes is free software; you can redistribute it and/or modify
008 // it under the terms of the GNU General Public License as published by
009 // the Free Software Foundation; either version 2 of the License, or
010 // (at your option) any later version.
011 //
012 // LargoRecipes is distributed in the hope that it will be useful,
013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
015 // GNU General Public License for more details.
016 //
017 // You should have received a copy of the GNU General Public License
018 // along with LargoRecipes; if not, write to the Free Software
019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
020 //
021 // LargoRecipes uses RecipeML, a public XML format for describing recipes.
022 // You can read about RecipeML on the website of its copyright holder,
023 // FormatData: www.formatdata.com. LargoRecipes is not endorsed by
024 // FormatData. Distribution of LargoRecipes is governed by the RecipeML
025 // Public License, which you can read in the file license.txt (or by
026 // choosing About from the LargoRecipes menu when you run LargoRecipes). The
027 // RecipeML Public License requires, among other things, that LargoRecipes
028 // source files bear the notices below; please see the license for details.
029 // Modified versions of LargoRecipes that you distribute are required to
030 // bear these notices if they use RecipeML.
031 //
032 // This product is RecipeML compatible. It is RecipeML Processing Software
033 // as defined by the RecipeML Public License. RecipeML is copyright (c)
034 // FormatData, all rights reserved. Distribution of RecipeML Processing
035 // Software in source and/or binary forms is permitted provided that the
036 // following conditions are met:
037 //
038 // 1. Distributions in source code must retain the above copyright notice
039 // and this list of conditions.
040 // 2. Distributions in binary form must reproduce the above copyright notice
041 // and this list of conditions in the documentation and/or other materials
042 // provided with the distribution.
043 // 3. All advertising materials and documentation for RecipeML Processing
044 // Software must display the following acknowledgment: "This product is
045 // RecipeML compatible."
046 // 4. Names associated with RecipeML or FormatData must not be used to
047 // endorse or promote RecipeML Processing Software without prior written
048 // permission from FormatData. For written permission, please contact
049 // RecipeML@formatdata.com.
050 //
051 import com.largoguidedog.Exceptions.UnrecoverableException;
052 import com.largoguidedog.RecipeEngine.Measure.Measure;
053 import java.util.Stack;
054 import java.util.StringTokenizer;
055 import java.util.regex.Matcher;
056 import java.util.regex.Pattern;
057 /**
058 * Parses an input string into components of an <code>IngredientAmount</code>.
059 * Holds six components, which appear in this order in the string.
060 * <UL>
061 * <LI>number
062 * <LI>measure adjective [optional]
063 * <LI>measure [optional]
064 * <LI>ingredient adjective [optional]
065 * <LI>ingredient
066 * <LI>end adjective [optional]
067 * </UL>
068 * All are held and returned as strings. Client methods are responsible for converting
069 * quantity, measure, and ingredient strings into numeric values,
070 * <code>Measures</code>, and <code>Ingredients</code> respectively. The class does
071 * <strong>not</strong> guarantee that these strings will in fact be convertible into
072 * the respective objects - if any are not, this indicates an error in parsing.
073 *
074 * @since LargoRecipes 0.8
075 * @author Douglas Squirrel
076 */
077 public class IngredientAmountParser {
078 /**
079 * <code>Parser</code> that processes the quantity.
080 *
081 * @since LargoRecipes 0.8
082 */
083 private ElementParser quantityParser;
084
085 /**
086 * <code>Parser</code> that processes the measure adjective.
087 *
088 * @since LargoRecipes 0.8
089 */
090 private ElementParser measureAdjectiveParser;
091
092 /**
093 * <code>Parser</code> that processes the measure.
094 *
095 * @since LargoRecipes 0.8
096 */
097 private ElementParser measureParser;
098
099 /**
100 * <code>Parser</code> that processes the ingredient adjective.
101 *
102 * @since LargoRecipes 0.8
103 */
104 private ElementParser ingredientAdjectiveParser;
105
106 /**
107 * <code>Parser</code> that processes the ingredient.
108 *
109 * @since LargoRecipes 0.8
110 */
111 private ElementParser ingredientParser;
112
113 /**
114 * <code>Parser</code> that processes the end adjective.
115 *
116 * @since LargoRecipes 0.8
117 */
118 private ElementParser endAdjectiveParser;
119
120 /**
121 * The <code>Parser</code> that is currently in use.
122 *
123 * @since LargoRecipes 0.8
124 */
125 private ElementParser parser;
126
127 /**
128 * Construct a new <code>IngredientAmountParser</code> that
129 * is ready to parse a newly entered string.
130 *
131 * @since LargoRecipes 0.8
132 */
133 public IngredientAmountParser() {
134 quantityParser = new QuantityParser();
135 measureAdjectiveParser = new MeasureAdjectiveParser();
136 measureParser = new MeasureParser();
137 ingredientAdjectiveParser = new IngredientAdjectiveParser();
138 ingredientParser = new IngredientParser();
139 endAdjectiveParser = new EndAdjectiveParser();
140 parser = endAdjectiveParser;
141 }
142
143 /**
144 * Parse the given <code>String</code>. Upon return, the
145 * various <code>get...()</code> methods return the component
146 * strings found during parsing.
147 *
148 * @param input The input <code>String</code> to parse.
149 *
150 * @throws UnrecoverableException if an error condition prevents completion of this task, and no
151 * recovery is possible. Client code should abort processing and report
152 * the error.
153 *
154 * @since LargoRecipes 0.8
155 */
156 public void parse(String input) throws UnrecoverableException {
157 Stack stack = transformInput(input);
158
159 while (false == stack.isEmpty()) {
160 parser.parse(stack);
161 }
162
163 if (0 == getMeasure().length()) {
164 Stack unitStack = new Stack();
165 unitStack.push("unit");
166 measureParser.parse(unitStack);
167 }
168 }
169
170 /**
171 * Get the quantity string.
172 *
173 * @return The quantity <code>String</code>.
174 *
175 * @since LargoRecipes 0.8
176 */
177 public String getNumber() {
178 return quantityParser.getValue();
179 }
180
181 /**
182 * Get the measure adjective string.
183 *
184 * @return The measure adjective <code>String</code>.
185 *
186 * @since LargoRecipes 0.8
187 */
188 public String getMeasureAdjective() {
189 return measureAdjectiveParser.getValue();
190 }
191
192 /**
193 * Get the measure string.
194 *
195 * @return The measure <code>String</code>.
196 *
197 * @since LargoRecipes 0.8
198 */
199 public String getMeasure() {
200 return measureParser.getValue();
201 }
202
203 /**
204 * Get the ingredient adjective string.
205 *
206 * @return The ingredient adjective <code>String</code>.
207 *
208 * @since LargoRecipes 0.8
209 */
210 public String getIngredientAdjective() {
211 return ingredientAdjectiveParser.getValue();
212 }
213
214 /**
215 * Get the ingredient string.
216 *
217 * @return The ingredient <code>String</code>.
218 *
219 * @since LargoRecipes 0.8
220 */
221 public String getIngredient() {
222 return ingredientParser.getValue();
223 }
224
225 /**
226 * Get the end adjective string.
227 *
228 * @return The end adjective <code>String</code>.
229 *
230 * @since LargoRecipes 0.8
231 */
232 public String getEndAdjective() {
233 return endAdjectiveParser.getValue();
234 }
235
236 private static Pattern fixSeparatorPattern = Pattern.compile("(.*\\w)(,|;|:)(\\w.*)");
237 private static Pattern fixSlashPattern = Pattern.compile("(.*[a-zA-Z])/([a-zA-Z].*)");
238
239 /**
240 * Makes parsing easier by reversing the order of words.
241 * For example, "1 1/2 cups brown sugar" becomes "sugar brown cups 1/2 1". This is
242 * easier to parse because the states come in a predictable order - in the normal
243 * word order, you have to do something special with the first adjective, because you
244 * don't know if it is the measure or ingredient adjective until later. Also fixes
245 * missed spaces after separators and replaces slashes: <code>1 teaspoon beans,chopped</code> becomes
246 * <code>1 teaspoon beans, chopped</code> and <code>1 teaspoon butter/margarine</code> becomes
247 * <code>1 teaspoon butter or margarine</code>.
248 *
249 * @param input The <code>String</code> to transform.
250 *
251 * @return <code>Stack</code> of tokens in reverse order, so the first item you <code>pop</code> is the
252 * last token of the input string
253 *
254 * @since LargoRecipes 0.8
255 */
256 private Stack transformInput(String input) {
257 Matcher sepMatcher = fixSeparatorPattern.matcher(input);
258 if (true == sepMatcher.matches()) {
259 input = sepMatcher.group(1) + sepMatcher.group(2) + " " + sepMatcher.group(3);
260 }
261 Matcher slashMatcher = fixSlashPattern.matcher(input);
262 if (true == slashMatcher.matches()) {
263 input = slashMatcher.group(1) + " or " + slashMatcher.group(2);
264 }
265
266 StringTokenizer z = new StringTokenizer(input, " ");
267 Stack stack = new Stack();
268 while (z.hasMoreTokens()) {
269 stack.push(z.nextToken());
270 }
271 return stack;
272 }
273
274 /**
275 * Parser for one of the six elements of an ingredient amount.
276 * The six subclasses each handle one element.
277 *
278 * @since LargoRecipes 0.8
279 * @author Douglas Squirrel
280 */
281 private abstract class ElementParser {
282 /**
283 * The current value of the element being parsed.
284 *
285 * @since LargoRecipes 0.8
286 */
287 protected String value = "";
288
289 /**
290 * Private access constructor ensures no instances of this class
291 * can be constructed.
292 *
293 * @since LargoRecipes 0.8
294 */
295 private ElementParser() {}
296
297 /**
298 * Pop the next token off the given <code>Stack</code> and parse it. If that token is not part of this parser's
299 * element, change the parser to the appropriate parser for that token
300 * and call <code>parse</code> on the new parser. May manipulate the stack, for instance to
301 * "push back" tokens.
302 *
303 * @param tokenStack The <code>Stack</code> of tokens to process.
304 *
305 * @throws UnrecoverableException if an error condition prevents completion of this task, and no
306 * recovery is possible. Client code should abort processing and report
307 * the error.
308 *
309 * @since LargoRecipes 0.8
310 */
311 public abstract void parse(Stack tokenStack) throws UnrecoverableException;
312
313 /**
314 * Get the value stored in this parser.
315 *
316 * @return The value of the element this parser parses.
317 *
318 * @since LargoRecipes 0.8
319 */
320 public String getValue() {
321 return value;
322 }
323
324 /**
325 * Utility method that strips a trailing asterisk, comma, semicolon, or full stop from a token, if it is
326 * present. An asterisk followed by a comma, semicolon, or full stop at the end of the token is also
327 * removed. For example, both "butter," and "butter*:" become "butter".
328 *
329 * @param token The token to strip.
330 *
331 * @return The token minus the trailing comma or full stop, if one exists, or just the token
332 * itself if there is no trailing punctuation.
333 *
334 * @since LargoRecipes 0.8
335 */
336 protected String stripToken(String token) {
337 return token.replaceAll("(\\*)?(\\.|,|;|:|\\*)$", "");
338 }
339 }
340
341 /**
342 * Class that parses the quantity element.
343 *
344 * @since LargoRecipes 0.8
345 * @author Douglas Squirrel
346 */
347 private class QuantityParser extends ElementParser {
348 /**
349 * Construct a new <code>QuantityParser</code>.
350 *
351 * @since LargoRecipes 0.8
352 */
353 private QuantityParser() {}
354
355 public void parse(Stack tokenStack) throws UnrecoverableException {
356 if (true == tokenStack.empty()) {
357 return;
358 }
359 String token = (String) tokenStack.pop();
360 value = appendToken(stripToken(token), value);
361 }
362 }
363
364 /**
365 * Class that parses the end adjective element.
366 *
367 * @since LargoRecipes 0.8
368 * @author Douglas Squirrel
369 */
370 private class EndAdjectiveParser extends ElementParser {
371 /**
372 * Construct a new <code>EndAdjectiveParser</code>.
373 *
374 * @since LargoRecipes 0.8
375 */
376 private EndAdjectiveParser() {}
377
378 public void parse(Stack tokenStack) throws UnrecoverableException {
379 if (true == tokenStack.empty()) {
380 return;
381 }
382 String token = (String) tokenStack.peek();
383 String regex = "\\b\\Q" + stripToken(token).toUpperCase() + "\\E$";
384 if (true == Ingredient.find(regex)) {
385 if (true == token.endsWith(",")) {
386 value = "," + value;
387 }
388 parser = ingredientParser;
389 parser.parse(tokenStack);
390 } else {
391 tokenStack.pop();
392 value = appendToken(token, value);
393 }
394 }
395 }
396
397 /**
398 * Class that parses the ingredient element.
399 *
400 * @since LargoRecipes 0.8
401 * @author Douglas Squirrel
402 */
403 private class IngredientParser extends ElementParser {
404 /**
405 * Construct a new <code>IngredientParser</code>.
406 *
407 * @since LargoRecipes 0.8
408 */
409 private IngredientParser() {}
410
411 public void parse(Stack tokenStack) throws UnrecoverableException {
412 if (true == tokenStack.empty()) {
413 return;
414 }
415 String token = (String) tokenStack.peek();
416 if (true == isNumber(token))
417 {
418 parser = quantityParser;
419 parser.parse(tokenStack);
420 return;
421 }
422
423 String newIngredient = appendToken(stripToken(token), value);
424 if (true == Ingredient.find("\\b\\Q" + newIngredient.toUpperCase() + "\\E$"))
425 {
426 tokenStack.pop();
427 value = newIngredient;
428 return;
429 }
430
431 if (null != Ingredient.retrieveByName(value)) {
432 parser = ingredientAdjectiveParser;
433 parser.parse(tokenStack);
434 } else {
435 StringTokenizer z = new StringTokenizer(value, " ");
436 while (z.hasMoreTokens()) {
437 tokenStack.push(z.nextToken());
438 }
439 String endToken = (String) tokenStack.pop();
440 value = "";
441 parser = endAdjectiveParser;
442 parser.value = appendToken(endToken, parser.value);
443 parser.parse(tokenStack);
444 }
445 }
446 }
447
448 /**
449 * Class that parses the ingredient adjective element.
450 *
451 * @since LargoRecipes 0.8
452 * @author Douglas Squirrel
453 */
454 private class IngredientAdjectiveParser extends ElementParser {
455 /**
456 * Construct a new <code>IngredientAdjectiveParser</code>.
457 *
458 * @since LargoRecipes 0.8
459 */
460 private IngredientAdjectiveParser() {}
461
462 public void parse(Stack tokenStack) throws UnrecoverableException {
463 if (true == tokenStack.empty()) {
464 return;
465 }
466 String token = (String) tokenStack.peek();
467 if (true == isNumber(token))
468 {
469 parser = quantityParser;
470 parser.parse(tokenStack);
471 return;
472 }
473
474 if (true == Measure.find(stripToken(token))) {
475 parser = measureParser;
476 parser.parse(tokenStack);
477 } else {
478 tokenStack.pop();
479 value = appendToken(token, value);
480 }
481 }
482 }
483
484 /**
485 * Class that parses the measure element.
486 *
487 * @since LargoRecipes 0.8
488 * @author Douglas Squirrel
489 */
490 private class MeasureParser extends ElementParser {
491 /**
492 * Construct a new <code>MeasureParser</code>.
493 *
494 * @since LargoRecipes 0.8
495 */
496 private MeasureParser() {}
497
498 public void parse(Stack tokenStack) throws UnrecoverableException {
499 if (true == tokenStack.empty()) {
500 return;
501 }
502 String token = (String) tokenStack.peek();
503 if (true == isNumber(token))
504 {
505 parser = quantityParser;
506 parser.parse(tokenStack);
507 return;
508 }
509
510 String newMeasure = appendToken(stripToken(token), value);
511 if (false == Measure.find(newMeasure)) {
512 parser = measureAdjectiveParser;
513 parser.parse(tokenStack);
514 } else {
515 tokenStack.pop();
516 value = newMeasure;
517 }
518 }
519 }
520
521 /**
522 * Class that parses the measure adjective element.
523 *
524 * @since LargoRecipes 0.8
525 * @author Douglas Squirrel
526 */
527 private class MeasureAdjectiveParser extends ElementParser {
528 /**
529 * Construct a new <code>MeasureAdjectiveParser</code>.
530 *
531 * @since LargoRecipes 0.8
532 */
533 private MeasureAdjectiveParser() {}
534
535 public void parse(Stack tokenStack) throws UnrecoverableException {
536 if (true == tokenStack.empty()) {
537 return;
538 }
539 String token = (String) tokenStack.peek();
540 if (true == isNumber(token)) {
541 parser = quantityParser;
542 parser.parse(tokenStack);
543 } else {
544 tokenStack.pop();
545 value = appendToken(token, value);
546 }
547 }
548 }
549
550 /**
551 * Get a string representation of this <code>IngredientAmountParser</code>.
552 *
553 * @return The desired <code>String</code> representation.
554 *
555 * @since LargoRecipes 0.8
556 */
557 public String toString() {
558 return getNumber() + "/"
559 + getMeasureAdjective() + "/"
560 + getMeasure() + "/"
561 + getIngredientAdjective() + "/"
562 + getIngredient() + "/"
563 + getEndAdjective();
564 }
565
566 /**
567 * Utility function that determines if the given token could be a number.
568 * Checks the first character and concludes that the token is a number if
569 * that character is a digit or a full stop.
570 *
571 * @param token The token to check.
572 *
573 * @return <code>true</code> if the token could be a number according to the
574 * above test; <code>false</code> if not.
575 *
576 * @since LargoRecipes 0.8
577 */
578 private boolean isNumber(String token) {
579 if (true == token.matches("(\\d|\\.)(\\d|\\.|/)*")) {
580 return true;
581 } else {
582 return false;
583 }
584 }
585
586 /**
587 * Utility function that concatenates two strings, separated by a space.
588 * If one or the other is empty, returns the non-empty string. If both
589 * are empty, returns an empty string.
590 *
591 * @param s The first string.
592 * @param t The second string.
593 *
594 * @return <code>s + " " + t</code>, unless <code>s</code> or </code>t</code>
595 * or both is empty, in which case returns <code>s + t</code>.
596 *
597 * @since LargoRecipes 0.8
598 */
599 private String appendToken(String s, String t) {
600 if (0 == s.length()) {
601 return t;
602 }
603 if (0 == t.length()) {
604 return s;
605 }
606 return (s + " " + t);
607 }
608 }
609
610
611