View Javadoc
1   /*
2    * REPOWEB, repository manager.
3    *
4    * Terms of license - http://opensource.org/licenses/apachepl.php
5    */
6   /* Derived from Ant source. */
7   package org.repoweb.model.file.util;
8   import java.io.File;
9   import java.util.ArrayList;
10  import java.util.List;
11  import java.util.StringTokenizer;
12  
13  /***
14   * <p>This is a utility class used by selectors and DirectoryScanner. The
15   * functionality more properly belongs just to selectors, but unfortunately
16   * DirectoryScanner exposed these as protected methods. Thus we have to
17   * support any subclasses of DirectoryScanner that may access these methods.
18   * </p>
19   * <p>This is a Singleton.</p>
20   *
21   * @author Arnout J. Kuiper
22   * <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
23   * @author Magesh Umasankar
24   * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
25   * @since 1.5
26   */
27  final class SelectorUtils {
28      /***
29       * Private Constructor.
30       */
31      private SelectorUtils() {}
32  
33      /***
34       * Tests whether or not a given path matches the start of a given
35       * pattern up to the first "**".
36       * <p>
37       * This is not a general purpose test and should only be used if you
38       * can live with false positives. For example, <code>pattern=**\a</code>
39       * and <code>str=b</code> will yield <code>true</code>.
40       *
41       * @param pattern The pattern to match against. Must not be
42       *                <code>null</code>.
43       * @param str     The path to match, as a String. Must not be
44       *                <code>null</code>.
45       * @param isCaseSensitive Whether or not matching should be performed
46       *                        case sensitively.
47       *
48       * @return whether or not a given path matches the start of a given
49       * pattern up to the first "**".
50       */
51      public static boolean matchPatternStart(String pattern, String str,
52          boolean isCaseSensitive) {
53          // When str starts with a File.separator, pattern has to start with a
54          // File.separator.
55          // When pattern starts with a File.separator, str has to start with a
56          // File.separator.
57          if (str.startsWith(File.separator) != pattern.startsWith(File.separator)) {
58              return false;
59          }
60  
61          List patDirs = tokenizePath(pattern);
62          List strDirs = tokenizePath(str);
63  
64          int patIdxStart = 0;
65          int patIdxEnd = patDirs.size() - 1;
66          int strIdxStart = 0;
67          int strIdxEnd = strDirs.size() - 1;
68  
69          // up to first '**'
70          while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
71              String patDir = (String)patDirs.get(patIdxStart);
72              if (patDir.equals("**")) {
73                  break;
74              }
75              if (!match(patDir, (String)strDirs.get(strIdxStart), isCaseSensitive)) {
76                  return false;
77              }
78              patIdxStart++;
79              strIdxStart++;
80          }
81  
82          if (strIdxStart > strIdxEnd) {
83              // String is exhausted
84              return true;
85          }
86          else if (patIdxStart > patIdxEnd) {
87              // String not exhausted, but pattern is. Failure.
88              return false;
89          }
90          else {
91              // pattern now holds ** while string is not exhausted
92              // this will generate false positives but we can live with that.
93              return true;
94          }
95      }
96  
97  
98      /***
99       * Tests whether or not a given path matches a given pattern.
100      *
101      * @param pattern The pattern to match against. Must not be
102      *                <code>null</code>.
103      * @param str     The path to match, as a String. Must not be
104      *                <code>null</code>.
105      * @param isCaseSensitive Whether or not matching should be performed
106      *                        case sensitively.
107      *
108      * @return <code>true</code> if the pattern matches against the string,
109      *         or <code>false</code> otherwise.
110      */
111     public static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
112         // When str starts with a File.separator, pattern has to start with a
113         // File.separator.
114         // When pattern starts with a File.separator, str has to start with a
115         // File.separator.
116         if (str.startsWith(File.separator) != pattern.startsWith(File.separator)) {
117             return false;
118         }
119 
120         List patDirs = tokenizePath(pattern);
121         List strDirs = tokenizePath(str);
122 
123         int patIdxStart = 0;
124         int patIdxEnd = patDirs.size() - 1;
125         int strIdxStart = 0;
126         int strIdxEnd = strDirs.size() - 1;
127 
128         // up to first '**'
129         while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
130             String patDir = (String)patDirs.get(patIdxStart);
131             if (patDir.equals("**")) {
132                 break;
133             }
134             if (!match(patDir, (String)strDirs.get(strIdxStart), isCaseSensitive)) {
135                 return false;
136             }
137             patIdxStart++;
138             strIdxStart++;
139         }
140         if (strIdxStart > strIdxEnd) {
141             // String is exhausted
142             for (int i = patIdxStart; i <= patIdxEnd; i++) {
143                 if (!patDirs.get(i).equals("**")) {
144                     return false;
145                 }
146             }
147             return true;
148         }
149         else {
150             if (patIdxStart > patIdxEnd) {
151                 // String not exhausted, but pattern is. Failure.
152                 return false;
153             }
154         }
155 
156         // up to last '**'
157         while (patIdxStart <= patIdxEnd && strIdxStart <= strIdxEnd) {
158             String patDir = (String)patDirs.get(patIdxEnd);
159             if (patDir.equals("**")) {
160                 break;
161             }
162             if (!match(patDir, (String)strDirs.get(strIdxEnd), isCaseSensitive)) {
163                 return false;
164             }
165             patIdxEnd--;
166             strIdxEnd--;
167         }
168         if (strIdxStart > strIdxEnd) {
169             // String is exhausted
170             for (int i = patIdxStart; i <= patIdxEnd; i++) {
171                 if (!patDirs.get(i).equals("**")) {
172                     return false;
173                 }
174             }
175             return true;
176         }
177 
178         while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
179             int patIdxTmp = -1;
180             for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
181                 if (patDirs.get(i).equals("**")) {
182                     patIdxTmp = i;
183                     break;
184                 }
185             }
186             if (patIdxTmp == patIdxStart + 1) {
187                 // '**/***' situation, so skip one
188                 patIdxStart++;
189                 continue;
190             }
191 
192             // Find the pattern between padIdxStart & padIdxTmp in str between
193             // strIdxStart & strIdxEnd
194             int patLength = (patIdxTmp - patIdxStart - 1);
195             int strLength = (strIdxEnd - strIdxStart + 1);
196             int foundIdx = -1;
197 strLoop: 
198             for (int i = 0; i <= strLength - patLength; i++) {
199                 for (int j = 0; j < patLength; j++) {
200                     String subPat = (String)patDirs.get(patIdxStart + j + 1);
201                     String subStr = (String)strDirs.get(strIdxStart + i + j);
202                     if (!match(subPat, subStr, isCaseSensitive)) {
203                         continue strLoop;
204                     }
205                 }
206 
207                 foundIdx = strIdxStart + i;
208                 break;
209             }
210 
211             if (foundIdx == -1) {
212                 return false;
213             }
214 
215             patIdxStart = patIdxTmp;
216             strIdxStart = foundIdx + patLength;
217         }
218 
219         for (int i = patIdxStart; i <= patIdxEnd; i++) {
220             if (!patDirs.get(i).equals("**")) {
221                 return false;
222             }
223         }
224 
225         return true;
226     }
227 
228 
229     /**
230      * Tests whether or not a string matches against a pattern.
231      * The pattern may contain two special characters:<br>
232      * '*' means zero or more characters<br>
233      * '?' means one and only one character
234      *
235      * @param pattern The pattern to match against.
236      *                Must not be <code>null</code>.
237      * @param str     The string which must be matched against the pattern.
238      *                Must not be <code>null</code>.
239      * @param isCaseSensitive Whether or not matching should be performed
240      *                        case sensitively.
241      *
242      *
243      * @return <code>true</code> if the string matches against the pattern,
244      *         or <code>false</code> otherwise.
245      */
246     public static boolean match(String pattern, String str, boolean isCaseSensitive) {
247         char[] patArr = pattern.toCharArray();
248         char[] strArr = str.toCharArray();
249         int patIdxStart = 0;
250         int patIdxEnd = patArr.length - 1;
251         int strIdxStart = 0;
252         int strIdxEnd = strArr.length - 1;
253         char ch;
254 
255         boolean containsStar = false;
256         for (int i = 0; i < patArr.length; i++) {
257             if (patArr[i] == '*') {
258                 containsStar = true;
259                 break;
260             }
261         }
262 
263         if (!containsStar) {
264             // No '*'s, so we make a shortcut
265             if (patIdxEnd != strIdxEnd) {
266                 return false; // Pattern and string do not have the same size
267             }
268             for (int i = 0; i <= patIdxEnd; i++) {
269                 ch = patArr[i];
270                 if (ch != '?') {
271                     if (isCaseSensitive && ch != strArr[i]) {
272                         return false; // Character mismatch
273                     }
274                     if (!isCaseSensitive
275                             && Character.toUpperCase(ch) != Character.toUpperCase(
276                                 strArr[i])) {
277                         return false; // Character mismatch
278                     }
279                 }
280             }
281             return true; // String matches against pattern
282         }
283 
284         if (patIdxEnd == 0) {
285             return true; // Pattern contains only '*', which matches anything
286         }
287 
288         // Process characters before first star
289         while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
290             if (ch != '?') {
291                 if (isCaseSensitive && ch != strArr[strIdxStart]) {
292                     return false; // Character mismatch
293                 }
294                 if (!isCaseSensitive
295                         && Character.toUpperCase(ch) != Character.toUpperCase(
296                             strArr[strIdxStart])) {
297                     return false; // Character mismatch
298                 }
299             }
300             patIdxStart++;
301             strIdxStart++;
302         }
303         if (strIdxStart > strIdxEnd) {
304             // All characters in the string are used. Check if only '*'s are
305             // left in the pattern. If so, we succeeded. Otherwise failure.
306             for (int i = patIdxStart; i <= patIdxEnd; i++) {
307                 if (patArr[i] != '*') {
308                     return false;
309                 }
310             }
311             return true;
312         }
313 
314         // Process characters after last star
315         while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
316             if (ch != '?') {
317                 if (isCaseSensitive && ch != strArr[strIdxEnd]) {
318                     return false; // Character mismatch
319                 }
320                 if (!isCaseSensitive
321                         && Character.toUpperCase(ch) != Character.toUpperCase(
322                             strArr[strIdxEnd])) {
323                     return false; // Character mismatch
324                 }
325             }
326             patIdxEnd--;
327             strIdxEnd--;
328         }
329         if (strIdxStart > strIdxEnd) {
330             // All characters in the string are used. Check if only '*'s are
331             // left in the pattern. If so, we succeeded. Otherwise failure.
332             for (int i = patIdxStart; i <= patIdxEnd; i++) {
333                 if (patArr[i] != '*') {
334                     return false;
335                 }
336             }
337             return true;
338         }
339 
340         // process pattern between stars. padIdxStart and patIdxEnd point
341         // always to a '*'.
342         while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
343             int patIdxTmp = -1;
344             for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
345                 if (patArr[i] == '*') {
346                     patIdxTmp = i;
347                     break;
348                 }
349             }
350             if (patIdxTmp == patIdxStart + 1) {
351                 // Two stars next to each other, skip the first one.
352                 patIdxStart++;
353                 continue;
354             }
355 
356             // Find the pattern between padIdxStart & padIdxTmp in str between
357             // strIdxStart & strIdxEnd
358             int patLength = (patIdxTmp - patIdxStart - 1);
359             int strLength = (strIdxEnd - strIdxStart + 1);
360             int foundIdx = -1;
361 strLoop: 
362             for (int i = 0; i <= strLength - patLength; i++) {
363                 for (int j = 0; j < patLength; j++) {
364                     ch = patArr[patIdxStart + j + 1];
365                     if (ch != '?') {
366                         if (isCaseSensitive && ch != strArr[strIdxStart + i + j]) {
367                             continue strLoop;
368                         }
369                         if (!isCaseSensitive
370                                 && Character.toUpperCase(ch) != Character.toUpperCase(
371                                     strArr[strIdxStart + i + j])) {
372                             continue strLoop;
373                         }
374                     }
375                 }
376 
377                 foundIdx = strIdxStart + i;
378                 break;
379             }
380 
381             if (foundIdx == -1) {
382                 return false;
383             }
384 
385             patIdxStart = patIdxTmp;
386             strIdxStart = foundIdx + patLength;
387         }
388 
389         // All characters in the string are used. Check if only '*'s are left
390         // in the pattern. If so, we succeeded. Otherwise failure.
391         for (int i = patIdxStart; i <= patIdxEnd; i++) {
392             if (patArr[i] != '*') {
393                 return false;
394             }
395         }
396         return true;
397     }
398 
399 
400     /***
401      * Breaks a path up into a Vector of path elements, tokenizing on
402      * <code>File.separator</code>.
403      *
404      * @param path Path to tokenize. Must not be <code>null</code>.
405      *
406      * @return a Vector of path elements from the tokenized path
407      */
408     public static List tokenizePath(String path) {
409         List ret = new ArrayList();
410         StringTokenizer st = new StringTokenizer(path, File.separator);
411         while (st.hasMoreTokens()) {
412             ret.add(st.nextToken());
413         }
414         return ret;
415     }
416 }