1 /*
2 * The akquinet maven-latex-plugin project
3 *
4 * Copyright (c) 2011 by akquinet tech@spree GmbH
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19 package eu.simuline.m2latex.core;
20
21 import java.io.BufferedReader;
22 import java.io.Closeable;
23 import java.io.File;
24 import java.io.FileFilter;
25 import java.io.FileInputStream;
26 import java.io.FileNotFoundException;
27 import java.io.FileOutputStream;
28 import java.io.FileReader;
29 import java.io.FileWriter;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.OutputStream;
33
34 import java.nio.file.Path;
35
36 import java.util.Collection;
37 import java.util.HashSet;
38 import java.util.Set;
39 import java.util.TreeSet;
40 import java.util.concurrent.atomic.AtomicBoolean;
41
42 import com.florianingerl.util.regex.Matcher;
43 import com.florianingerl.util.regex.Pattern;
44
45 import org.apache.commons.io.FileUtils;
46
47 // FIXME: jdee bug: delete static imports: does not find superfluous
48
49 /**
50 * Sole interface to <code>org.apache.commons.io.</code>.
51 * A collection of utility methods for file manipulation.
52 */
53 class TexFileUtils {
54
55 //private final static String PREFIX_HIDDEN = ".";
56
57 private final static String PATTERN_INS_LATEX_MAIN = "T\\$T";
58
59 private final LogWrapper log;
60
61 TexFileUtils(LogWrapper log) {
62 this.log = log;
63 }
64
65 /**
66 * Returns the listing of the directory <code>dir</code>
67 * or <code>null</code> if it is not readable
68 * and emit an according warning if so.
69 * <p>
70 * Logging:
71 * WFU01: Cannot read directory
72 *
73 * @param dir
74 * an existing directory.
75 * @return
76 * the list of entries of <code>dir</code>
77 * or <code>null</code> if it is not readable.
78 */
79 // used only in
80 // constructor of DirNode
81 // copyOutputToTargetFolder, deleteX
82 File[] listFilesOrWarn(File dir) {
83 assert dir != null && dir.isDirectory() : "Expected folder found " + dir;
84 File[] files = dir.listFiles();
85 warnIfNull(files, dir);
86 return files;
87 }
88
89 /**
90 * Returns the listing of the directory <code>dir</code>
91 * filtered by <code>filter</code>
92 * or <code>null</code> if <code>dir</code> is not readable
93 * and emit an according warning if so.
94 * <p>
95 * Logging:
96 * WFU01: Cannot read directory
97 *
98 * @param dir
99 * an existing directory.
100 * @param filter
101 * a file filter
102 * @return
103 * the list of entries of <code>dir</code>
104 * accepted by <code>filter</code>
105 * or <code>null</code> if <code>dir</code> is not readable.
106 */
107 // used by LatexProcessor.runMakeIndexByNeed only
108 File[] listFilesOrWarn(File dir, FileFilter filter) {
109 assert dir != null && dir.isDirectory() : "Expected folder found " + dir;
110 File[] files = dir.listFiles(filter);
111 warnIfNull(files, dir);
112 return files;
113 }
114
115 private void warnIfNull(File[] files, File dir) {
116 if (files == null) {
117 this.log.warn("WFU01: Cannot read directory '" + dir
118 + "'; build may be incomplete. ");
119 }
120 }
121
122 /**
123 * Returns the directory containing <code>sourceFile</code>
124 * with the prefix <code>sourceBaseDir</code>
125 * replaced by <code>targetBaseDir</code>.
126 * E.g. <code>sourceFile=/tmp/adir/afile</code>,
127 * <code>sourceBaseDir=/tmp</code>, <code>targetBaseDir=/home</code>
128 * returns <code>/home/adir/</code>.
129 *
130 * @param srcFile
131 * the source file the parent directory of which
132 * shall be converted to the target.
133 * @param srcBaseDir
134 * the base directory of the source.
135 * Immediately or not,
136 * <code>sourceFile</code> shall be in <code>sourceBaseDir</code>.
137 * @param targetBaseDir
138 * the base directory of the target.
139 * @return
140 * the directory below <code>targetBaseDir</code>
141 * which corresponds to the parent directory of <code>sourceFile</code>
142 * which is below <code>sourceBaseDir</code>.
143 * @throws BuildFailureException
144 * TFU01: if the target directory that would be returned
145 * exists already as a regular file.
146 */
147 // used by LatexProcessor.create() only
148 File getTargetDirectory(File srcFile, File srcBaseDir, File targetBaseDir)
149 throws BuildFailureException {
150 Path srcParentPath = srcFile.getParentFile().toPath();
151 Path srcBasePath = srcBaseDir.toPath();
152
153 assert srcParentPath.startsWith(srcBasePath);
154 srcParentPath = srcBasePath.relativize(srcParentPath);
155
156 // this may exist but if so it is a directory because holds srcFile
157 File targetDir = new File(targetBaseDir, srcParentPath.toString());
158
159 targetDir.mkdirs();
160
161 if (!targetDir.isDirectory()) {
162 throw new BuildFailureException(
163 "TFU01: Cannot create destination directory '" + targetDir + "'. ");
164 }
165 assert targetDir.isDirectory();
166 return targetDir;
167 }
168
169 /**
170 * Returns the pdf file which serves as a blue print for <code>pdfFileAct</code>.
171 * The relative path of <code>pdfFileAct</code> to its ancestor <code>texSourceDir</code>
172 * is the same as the blueprint to its ancestor <code>diffRootDir</code>.
173 *
174 * @param pdfFileAct
175 * the artifact to be checked agains a blueprint in <code>diffRootDir</code>.
176 * It must be contained in <code>artifactBaseDir</code>, immediately or not.
177 * @param texSourceDir
178 * the base directory of the source files.
179 * @param diffBaseDir
180 * the root directory of all blue prints corresponding with <code>diffRootDir</code>.
181 */
182 static File getPdfFileDiff(File pdfFileAct, File texSourceDir,
183 File diffBaseDir) {
184 Path pdfFileActPath = pdfFileAct.toPath();
185 Path texSourcePath = texSourceDir.toPath();
186
187 assert pdfFileActPath.startsWith(texSourcePath);
188 pdfFileActPath = texSourcePath.relativize(pdfFileActPath);
189
190 // this may exist but if so it is a directory because holds srcFile
191 return new File(diffBaseDir, pdfFileActPath.toString());
192 }
193
194 /**
195 * Returns a file filter matching neither directories
196 * nor <code>texFile</code>
197 * but else all files with names matching <code>pattern</code>,
198 * where the special sequence {@link #PATTERN_INS_LATEX_MAIN}
199 * is replaced by the prefix of <code>texFile</code>.
200 *
201 * @param texFile
202 * a latex main file for which a file filter has to be created.
203 * @param pattern
204 * a pattern
205 * for which the special sequence {@link #PATTERN_INS_LATEX_MAIN}
206 * is replaced by the prefix of <code>texFile</code>
207 * before a file filter is created from it.
208 * @param allowsDirs
209 * Whether the filter returned accepts also directories.
210 * @return
211 * a non-null file filter matching neither directories
212 * nor <code>texFile</code>
213 * but else all files with names matching <code>pattern</code>,
214 * where the special sequence {@link #PATTERN_INS_LATEX_MAIN}
215 * is replaced by the prefix of <code>texFile</code>.
216 */
217 // used only: in methods
218 // - LatexProcessor.create on tex-file to determine output files.
219 // - LatexPreProcessor.clearTargetTex to clear also intermediate files.
220 static FileFilter getFileFilter(File texFile, String pattern,
221 boolean allowsDirs) {
222 final String patternAccept = pattern.replaceAll(PATTERN_INS_LATEX_MAIN,
223 getFileNameWithoutSuffix(texFile));
224 return new FileFilter() {
225 public boolean accept(File file) {
226 // the second is superfluous for copying
227 // and only needed for deletion.
228 if ((file.isDirectory() && !allowsDirs) || file.equals(texFile)) {
229 return false;
230 }
231 return file.getName().matches(patternAccept);
232 }
233 };
234 }
235
236 /**
237 * Returns a file filter matching no directories
238 * but else all files with names matching <code>xxx<pattern>.idx</code>,
239 * where <code>idxFile</code> has the form <code>xxx.idx</code>.
240 *
241 * @param idxFile
242 * an idx file for which a file filter has to be created.
243 * @param pattern
244 * a pattern which is inserted in the name of <code>idxFile</code>
245 * right before the suffix.
246 * @return
247 * a non-null file filter matching no directories
248 * but else all files matching <code>xxx<pattern>.idx</code>.
249 */
250 // used by LatexProcessor.runMakeIndexByNeed only
251 FileFilter getFileFilterReplace(File idxFile, String pattern) {
252 final String patternAccept =
253 getFileNameWithoutSuffix(idxFile) + pattern + getSuffix(idxFile);
254 return new FileFilter() {
255 public boolean accept(File file) {
256 if (file.isDirectory()) {
257 return false;
258 }
259 return file.getName().matches(patternAccept);
260 }
261 };
262 }
263
264 /**
265 * Copies output of the current goal to target folder.
266 * The source is the parent folder of <code>texFile</code>,
267 * all its files passing <code>fileFilter</code>
268 * are considered as output files and
269 * are copied to <code>targetDir</code>.
270 * <p>
271 * Logging:
272 * <ul>
273 * <li> WFU01: Cannot read directory...
274 * <li> WFU03: Cannot close
275 * </ul>
276 *
277 * @param texFile
278 * the latex main file which was processed.
279 * Its parent directory
280 * is the working directory of the compilation process
281 * in which the output files are created.
282 * Thus it must be readable (in fact it must also be writable;
283 * otherwise the output files could not have been created).
284 * @param fileFilter
285 * the filter accepting the files (and best only the files)
286 * which are the result of the processing.
287 * @param targetDir
288 * the target directory the output files have to be copied to.
289 * If this exists already, it must be a directory
290 * and it must be writable.
291 * If it does not exist, it must be creatable.
292 * @return
293 * The set of all target files.
294 * @throws BuildFailureException
295 * <ul>
296 * <li>TFU04, TFU05 if
297 * the destination file exists
298 * and is either a directory (TFU04) or is not writable (TFU05).
299 * <li>TFU06 if
300 * an IO-error orrurs when copying: opening streams, reading or writing.
301 * </ul>
302 */
303 // used in LatexProcessor.create() only
304 Set<File> copyOutputToTargetFolder(File texFile, FileFilter fileFilter, File targetDir)
305 throws BuildFailureException {
306 Set<File> targetFiles = new HashSet<File>();
307 assert texFile.exists()
308 && !texFile.isDirectory() : "Expected existing (regular) tex file " + texFile;
309 assert !targetDir.exists() || targetDir
310 .isDirectory() : "Expected existing target folder " + targetDir;
311
312 File texFileDir = texFile.getParentFile();
313 // may log warning WFU01
314 File[] outputFiles = listFilesOrWarn(texFileDir);
315 if (outputFiles == null) {
316 // Here, logging WFU01 already done
317 return targetFiles;
318 }
319 assert outputFiles != null;
320
321 File srcFile, destFile;
322 for (int idx = 0; idx < outputFiles.length; idx++) {
323 srcFile = outputFiles[idx];
324 assert srcFile.exists() : "Missing " + srcFile;
325 if (!fileFilter.accept(srcFile)) {
326 continue;
327 }
328 assert srcFile.exists()
329 && !srcFile.isDirectory() : "Expected existing (regular) tex file "
330 + texFile;
331 // since !targetDir.exists() || targetDir.isDirectory()
332 assert !srcFile.equals(targetDir);
333 assert !srcFile.equals(texFile);
334
335 destFile = new File(targetDir, srcFile.getName());
336
337 if (destFile.isDirectory()) {
338 throw new BuildFailureException(
339 "TFU04: Cannot overwrite directory '" + destFile + "'. ");
340 }
341
342 this.log.debug(
343 "Copying '" + srcFile.getName() + "' to '" + targetDir + "'. ");
344 try {
345 // may throw IOException: opening streams, read/write
346 // may log warning WFU03: Cannot close
347 doCopyFile(srcFile, destFile);
348 targetFiles.add(destFile);
349 } catch (IOException e) {
350 throw new BuildFailureException("TFU06: Cannot copy '"
351 + srcFile.getName() + "' to '" + targetDir + "'. ", e);
352 }
353 } // for
354 return targetFiles;
355 }
356
357 // FIXME: copied from FileUtils
358 /**
359 * Internal copy file method.
360 * <p>
361 * Logging:
362 * WFU03: Cannot close
363 *
364 * @param srcFile
365 * the source file.
366 * @param destFile
367 * the destination file.
368 * @throws IOException
369 * if an error occurs: opening input/output streams,
370 * reading from file/writing to file.
371 */
372 private void doCopyFile(File srcFile, File destFile) throws IOException {
373 // Files.copy(srcFile.toPath(),
374 // destFile.toPath(), StandardCopyOption.COPY_ATTRIBUTES);
375
376 // may throw FileNotFoundException <= IOException
377 // if cannot be opened for reading: e.g. not exists, is a directory,...
378 FileInputStream input = new FileInputStream(srcFile);
379 try {
380 // may throw FileNotFoundException <= IOException
381 FileOutputStream output = new FileOutputStream(destFile);
382 // if cannot be opened for writing:
383 // e.g. not exists, is a directory,...
384 try {
385 // may throw IOException if an I/O-error occurs
386 // when reading or writing
387 copyStream(input, output);
388 } finally {
389 // may log warning WFU03
390 closeQuietly(output);
391 }
392 } finally {
393 // may log warning WFU03
394 closeQuietly(input);
395 }
396
397 assert !destFile.isDirectory() && destFile
398 .canWrite() : "Expected existing (regular) writable file " + destFile;
399 destFile.setLastModified(srcFile.lastModified());
400 }
401
402
403 // TBD: eliminate WFU04
404 // void setModificationTimeS(File file, long timestampSec) {
405 // boolean success = file.setLastModified(timestampSec * 1000);
406 // if (!success) {
407 // this.log.warn(
408 // "WFU04: Could not assign timestamp to target file " + file + ". ");
409 // }
410 // }
411
412
413 /**
414 * The default buffer size ({@value}) to use for
415 * {@link #copyStream(InputStream, OutputStream)}
416 */
417 private static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
418
419 /**
420 * Copy bytes from a large (over 2GB) <code>InputStream</code> to an
421 * <code>OutputStream</code>.
422 * <p>
423 * This method uses the provided buffer, so there is no need to use a
424 * <code>BufferedInputStream</code>.
425 *
426 * @param input
427 * the <code>InputStream</code> to read from
428 * @param output
429 * the <code>OutputStream</code> to write to
430 * @throws IOException
431 * if an I/O error occurs while reading or writing
432 */
433 private static void copyStream(InputStream input, OutputStream output)
434 throws IOException {
435 byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
436 int n;
437 // may throw IOException
438 while (-1 != (n = input.read(buffer))) {
439 // may throw IOException
440 output.write(buffer, 0, n);
441 }
442 }
443
444 // FIXME: almost copy from IOUtils
445 /**
446 * Unconditionally close a <code>Closeable</code>.
447 * <p>
448 * Equivalent to {@link Closeable#close()},
449 * except any exceptions will be ignored. FIXME
450 * This is typically used in finally blocks.
451 * <p>
452 * Example code:
453 * <pre>
454 * Closeable closeable = null;
455 * try {
456 * closeable = new FileReader("foo.txt");
457 * // process closeable
458 * closeable.close();
459 * } catch (Exception e) {
460 * // error handling
461 * } finally {
462 * IOUtils.closeQuietly(closeable);
463 * }
464 * </pre>
465 * <p>
466 * Logging:
467 * WFU03: Cannot close
468 *
469 * @param closeable
470 * the object to close, may be null or already closed
471 */
472 private void closeQuietly(Closeable closeable) {
473 try {
474 closeable.close();
475 } catch (IOException ioe) {
476 this.log.warn("WFU03: Cannot close '" + closeable + "'. ", ioe);
477 }
478 }
479
480 // TBD: move elsewhere because this is specific for inkscape
481 // TBD: better even to eliminate.
482 /**
483 * The new preamble of the tex file originally created by inkscape
484 * with ending <code>eps_tex</code>.
485 * FIXME: version to be included.
486 */
487 private final static String INKSCAPE_PREAMBLE =
488 "%% LatexMavenPlugin (version unknown) modified "
489 + "two of the following lines\n";
490
491 /**
492 * This is just a workaround because of inkscape's current flaw.
493 * It reads file <code>srcFile</code>
494 * which is expected to have name with ending <code>eps_tex</code>
495 * and writes a file with same name
496 * replacing ending by <code>tex</code> with following modifications:
497 * <ul>
498 * <li>Adds line {@link #INKSCAPE_PREAMBLE} atop </li>
499 * <li>Replaces line '%%Accompanies ...' by
500 * '%% Accompanies image files 'xxx.pdf/eps/ps'</li>
501 * <li>Replaces line
502 * '... \includegraphics[width=\\unitlength]{xxx.eps}...'
503 * by
504 * '... \includegraphics[width=\\unitlength]{xxx}...'</li>
505 * </ul>
506 * <p>
507 * Logging:
508 * EFU07, EFU08, EFU09: cannot fiter
509 *
510 * @param srcFile
511 * A file created by inkscape with ending <code>eps_tex</code>
512 * containing a lines
513 * <code>
514 * %% Accompanies image file 'xxx.eps' (pdf, eps, ps)</code> and
515 * <code>\put(0,0){\includegraphics[width=\\unitlength]{xxx.eps}}</code>
516 * with variable <code>xxx</code> and leading blanks\
517 */
518 public void filterInkscapeIncludeFile(File srcFile,
519 File destFile,
520 String bareFileName,
521 String epsSuffix) {
522 // assert LatexPreProcessor.SUFFIX_EPSTEX
523 // .equals(getSuffix(srcFile)) : "Expected suffix '"
524 // + LatexPreProcessor.SUFFIX_EPSTEX + "' found '" + getSuffix(srcFile)
525 // + "'";
526 // File destFile = replaceSuffix(srcFile, LatexPreProcessor.SUFFIX_PTX);
527 // File bareFile = replaceSuffix(srcFile, LatexPreProcessor.SUFFIX_VOID);
528 // //FileReader reader = null;
529 BufferedReader bufferedReader = null;
530 FileWriter writer = null;
531 try {
532 // may throw FileNotFoundException < IOExcption
533 FileReader reader = new FileReader(srcFile);
534 // BufferedReader for performance and to be able to read a line
535 bufferedReader = new BufferedReader(reader);
536
537 // may throw IOExcption
538 writer = new FileWriter(destFile);
539 //BufferedWriter bufferedWriter = new BufferedWriter(writer);
540 String line;
541 // write preamble
542 // readLine may throw IOException
543 writer.write(INKSCAPE_PREAMBLE);
544 // first two lines: write as read
545 line = bufferedReader.readLine();
546 writer.write(line + "\n");
547 line = bufferedReader.readLine();
548 writer.write(line + "\n");
549
550 // third line must be changed.
551 line = bufferedReader.readLine();
552 line = line.replace(
553 bareFileName + epsSuffix + "' (pdf, eps, ps)",
554 bareFileName + ".pdf/eps/ps'\n");
555 writer.write(line);
556
557 // readLine may throw IOException
558 // TBD: eliminate magic numbers
559 for (int idx = 4; idx < 56; idx++) {
560 line = bufferedReader.readLine();
561 writer.write(line + "\n");
562 }
563
564 // readLine may throw IOException
565 line = bufferedReader.readLine();
566 line = line.replace(
567 bareFileName + epsSuffix + "}}%",
568 bareFileName + "}}%\n");
569 writer.write(line);
570
571 line = bufferedReader.readLine();
572 do {
573 writer.write(line + "\n");
574 // readLine may thr. IOException
575 line = bufferedReader.readLine();
576 } while (line != null);
577 } catch (IOException e) {
578 if (bufferedReader == null) {
579 // Here, FileNotFoundException on srcFile
580 this.log.error(
581 "EFU07: File '" + srcFile + "' to be filtered cannot be read. ");
582 return;
583 }
584 if (writer == null) {
585 this.log.error("EFU08: Destination file '" + destFile
586 + "' for filtering cannot be written. ");
587 return;
588 }
589 this.log.error("EFU09: Cannot filter file '" + srcFile + "' into '"
590 + destFile + "'. ");
591 } finally {
592 // Here, an IOException may have occurred
593 // may log warning WFU03
594 // TBD: what if null?
595 closeQuietly(bufferedReader);
596 closeQuietly(writer);
597 }
598 }
599
600 /**
601 * Return the name of the given file without the suffix.
602 * If the suffix is empty, this is just the name of that file.
603 *
604 * @see #getSuffix(File)
605 */
606 static String getFileNameWithoutSuffix(File file) {
607 String nameFile = file.getName();
608 int idxDot = nameFile.lastIndexOf(".");
609 return idxDot == -1 ? nameFile : nameFile.substring(0, idxDot);
610 }
611
612 /**
613 * Return the suffix of the name of the given file
614 * including the <code>.</code>,
615 * except there is no <code>.</code>.
616 * Then the suffix is empty.
617 *
618 * @see #getFileNameWithoutSuffix(File)
619 */
620 // used only by
621 // LatexPreProcessor.processGraphicsSelectMain(Collection)
622 // LatexPreProcessor.clearCreated(DirNode)
623 // FIXME: problem if filename starts with . and has no further .
624 // then we have a hidden file and the suffix is all but the .
625 // This is not appropriate.
626 // One may ensure that this does not happen via an assertion
627 // and by modifying getFilesRec in a way that hidden files are skipped
628 static String getSuffix(File file) {
629 return getSuffix(file, true);
630 }
631
632 static String getSuffix(File file, boolean withDot) {
633 String nameFile = file.getName();
634 int idxDot = nameFile.lastIndexOf(".");
635 if (idxDot == -1) {
636 return "";
637 }
638 if (!withDot) {
639 idxDot++;
640 }
641 return nameFile.substring(idxDot, nameFile.length());
642 }
643
644 // logFile may be .log or .blg or something
645 /**
646 * Returns whether the given file <code>file</code> (which shall exist)
647 * contains the given pattern <code>pattern</code>
648 * or <code>null</code> in case of problems reading <code>file</code>.
649 * This is typically applied to log files,
650 * but also to latex-files to find the latex main files.
651 * <p>
652 * Logging:
653 * WFU03 cannot close <br>
654 * Note that in case <code>null</code> is returned,
655 * no error/warning is logged.
656 * This must be done by the invoking method.
657 *
658 * @param file
659 * an existing proper file, not a folder.
660 * @param regex
661 * the pattern (regular expression) to look for in <code>file</code>.
662 * @return
663 * whether the given file <code>file</code> (which shall exist)
664 * contains the given pattern <code>pattern</code>.
665 * If the file does not exist or an IOException occurs
666 * while reading, <code>null</code> is returned.
667 */
668 // used only in
669 // LatexPreProcessor.isLatexMainFile(File)
670 // LatexProcessor.needRun(...)
671 // AbstractLatexProcessor.hasErrsWarns(File, String)
672 // CAUTION: only in tests
673 Boolean matchInFile(File file, String regex) {
674 FileMatch fileMatch = getMatchInFile(file, regex);// TBD: eliminate null
675 // Idea is to allow more than one group name... could be an array.
676 if (fileMatch.isFileReadable()) {
677 return fileMatch.doesExprMatch();
678 }
679 // TBD: eliminate hack: just to avoid warnings.
680 //return null;
681 return false;
682 }
683
684 /**
685 * Returns a descriptor for the match of regular expression <code>regex</code>
686 * in file <code>file</code>.
687 * Depending on whether <code>regex</code> starts with <code>\\A</code>,
688 * matching refers to the beginning of the file,
689 * else is linewise.
690 *
691 * @param file
692 * a file to parse.
693 * @param regex
694 * a regular expression used to parse <code>file</code>.
695 * @return
696 * an object representing the match state of type {@ink FileMatch}.
697 * For details see the class documentation.
698 * If there is a match, the regular expression <code>regex</code>
699 * match some named groups from which further pieces of information can be extracted.
700 */
701 FileMatch getMatchInFile(File file, String regex) {
702 Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);//
703 boolean fromStart = regex.startsWith("\\A");
704 String lines = "";
705
706 try {
707 // constructor of FileReader may throw FileNotFoundException < IOExcption
708 // BufferedReader for performance and to be able to read a line
709 BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
710 //CharBuffer chars = CharBuffer.allocate(1000);
711 try {
712 // may throw IOException
713 // int numRead = bufferedReader.read(chars);
714 // System.out.println("file: "+file);
715 // System.out.println("numRead: "+numRead);
716 // System.out.println("chars: '"+chars+"'");
717
718
719 // FIXME: seemingly,
720 // find may not terminate in case ^(\s*)* but with ^s*
721 // but this seems a bug in java's regex engine
722 // return pattern.matcher(chars).find();
723
724
725 // readLine may throw IOException
726 for (String line = bufferedReader.readLine(); line != null;
727 // readLine may thr. IOException
728 line = bufferedReader.readLine()) {
729 // FIXME: linewise matching is not appropriate
730 // for further patterns line patternReRunLatex
731 // FIXME: seemingly, find may not terminate in case ^(\s*)* but with ^s*
732 // but this seems a bug in java's regex engine
733
734 lines = fromStart ? lines += line + "\n" : line;
735 Matcher matcher = pattern.matcher(lines);
736 if (matcher.find()) {
737 this.log.debug("Matched line: ;" + line + "'");
738 return FileMatch.fileMatch(matcher);// as match result TBD: .toMatchResult()
739 }
740 } // for
741 // Here, the whole file has been read but no match
742 // TBD: reimplement this: this is not performant:
743 // If the pattern does not match,
744 // this is detected not before the end of the file.
745 return FileMatch.noMatch();
746 } catch (IOException ioe) {
747 // Error/Warning must be issued by invoking method
748 return FileMatch.unreadable();
749 } finally {
750 // Here, an IOException may have occurred
751 // may log warning WFU03
752 closeQuietly(bufferedReader);
753 }
754 } catch (FileNotFoundException ffe) {
755 // Error/Warning must be issued by invoking method
756 return FileMatch.unreadable();
757 }
758 }
759
760 /**
761 * Pattern for any line in an IDX file,
762 * whether with explicit identifier of index
763 * created with <code>\sindex</code> provided by package splitidx
764 * or for generic index created with buitin <code>\index</code>.
765 */
766 private static final Pattern PATTERN_IDX_LINE =
767 Pattern.compile("^(\\\\indexentry)(\\[([^]]*)\\])?(.*)$");
768
769 /**
770 * The default label of an index as assigned by package splitidx.
771 * This is <code>idx</code>.
772 * It is used for entries defined without explicit label,
773 * either by <code>\index</code> or by <code>\sindex</code>
774 * without optional parameter which is the label.
775 */
776 private static final String DEFAULT_IDX_LABEL = "idx";
777
778 /**
779 * A pattern that matches all strings.
780 */
781 private static final Pattern PATTERN_MATCH_ALL = Pattern.compile("");
782
783 /**
784 * File ending occurring in {@link #PATTERN_ISTFILE_LINE}
785 * indicating that <code>makeglossaries</code> whether to use
786 * a makeindex-like indexing program.
787 */
788 private static final String INDICATOR_IDX_MAKEINDEX_LIKE = "ist";
789
790 /**
791 * File ending occurring in {@link #PATTERN_ISTFILE_LINE}
792 * indicating that <code>makeglossaries</code> whether to use
793 * indexing program xindy.
794 */
795 private static final String INDICATOR_IDX_XINDY = "xdy";
796
797 /**
798 * A pattern that matches an IST file name in an AUX file which has the form
799 * <code>\@istfilename){]<jobname>.(ist|xdy)}</code>
800 * and indicates to <code>makeglossaries</code> whether to use
801 * a makeindex-like indexing program ({!link #INDICATOR_IDX_MAKEINDEX_LIKE})
802 * or xindy (INDICATOR_IDX_XINDY).
803 */
804 private static final Pattern PATTERN_ISTFILE_LINE =
805 Pattern.compile("^\\\\@istfilename\\{.*\\.("
806 + INDICATOR_IDX_MAKEINDEX_LIKE + "|" + INDICATOR_IDX_XINDY + ")\\}$");
807
808
809 /**
810 * A pattern that matches a declaration of a glossary in an AUX file which has the form
811 * <code>\\@newglossary{label}{glg}{gls}{glo}</code>
812 * and indicates to <code>makeglossaries</code> when invoking makeindex or related
813 * which files should be used as input (glo), as output glossary (gls) and as log file (glg).
814 * The glossary label (label) is irrelevant.
815 */
816 private static final Pattern PATTERN_MATCH_GLOSSARY_DESC =
817 Pattern.compile("^\\\\@newglossary\\{(.+)\\}\\{(.+)\\}\\{(.+)\\}\\{(.+)\\}$");
818
819 //\@newglossary{main}{glg}{gls}{glo}
820
821 /**
822 * Returns the set of strings representing the <code>idxGroup</code>
823 * of the pattern <code>regex</code> matching a line
824 * in file <code>file</code> with possibly added {@link #DEFAULT_IDX_LABEL}
825 * or returns <code>null</code>
826 * in case of problems reading <code>file</code>.
827 * The default label is added only,
828 * if there are further labels from matching a line with <code>regex</code>
829 * and there is at least one line not matching that expression.
830 * <p>
831 * This rule is not complicated if all lines match <code>regex</code>.
832 * Then the list of labels is returned.
833 * If part of the lines match the rule, then the lines not matching the rule
834 * must be treated like matching with label {@link #DEFAULT_IDX_LABEL}.
835 * So, in this case, the default label must be added.
836 * If there are other lines matching <code>regex</code> with default label,
837 * this has no effect.
838 * The fine point is, that, if no line matches <code>regex</code>
839 * the default label shall not be added: this is the case where the index is not split.
840 * <p>
841 * This is used only to collect the identifiers
842 * of explicitly given indices in an idx-file and added implicitly given indices.
843 *
844 * <p>
845 * Logging:
846 * WFU03 cannot close <br>
847 *
848 * @param idxFile
849 * an existing proper file, not a folder.
850 * In practice this is an idx file.
851 * @param pattern
852 * the pattern (regular expression) to look for in <code>file</code>.
853 * @param idxGroupIdx
854 * the number of a group of the pattern {@link Settings#getPatternMultiIndex()}
855 * tied to the name of the index if given explicitly.
856 * @return
857 * <ul>
858 * <li> <code>null</code>
859 * in case of problems reading <code>file</code>.
860 * <li> an empty list
861 * if neither line in <code>file</code> matches <code>regex</code>
862 * <li> the set of labels of lines matching <code>regex</code>
863 * if all lines are matching
864 * <li> the set of labels of lines matching <code>regex</code> with {@link #DEFAULT_IDX_LABEL}
865 * if there are besides matching lines also non-matching lines.
866 * </ul>
867 */
868 // used in LatexProcessor.runMakeIndexByNeed only
869 // **** a lot of copying from method matchInFile
870 Set<String> collectMatchesForIdx(File idxFile, Pattern pattern, int idxGroupIdx) {
871 AtomicBoolean doesMatchAll = new AtomicBoolean();
872 Set<String> res = collectMatches(idxFile, pattern, PATTERN_IDX_LINE, idxGroupIdx, doesMatchAll);
873 if (res == null) {
874 return res;
875 }
876 if (!(doesMatchAll.get() || res.isEmpty())) {
877 // if there is a default label and non-default ones, one has to add
878 res.add(DEFAULT_IDX_LABEL);
879 }
880 return res;
881 }
882
883 Boolean withMakindexLike(File auxFile) {
884 AtomicBoolean doesMatchAll = new AtomicBoolean();
885 Set<String> res = collectMatches(auxFile, PATTERN_ISTFILE_LINE, PATTERN_MATCH_ALL, 1, doesMatchAll);
886 if (res == null) {
887 return null;
888 }
889 if (res.size() != 1) {
890 this.log.warn("WFUXX: For makeglossaries found " + res.size() + " specifications of index creator. ");
891 return null;
892 }
893
894 String ind = res.iterator().next();
895 switch (ind) {
896 case INDICATOR_IDX_MAKEINDEX_LIKE:
897 return Boolean.TRUE;
898 case INDICATOR_IDX_XINDY:
899 return Boolean.FALSE;
900 default:
901 this.log.warn(
902 "WFUXX: For makeglossaries found unknown specification of index creator '" + ind + "'. ");
903 }
904 return null;
905 }
906
907 Set<String> collectLogsForGloss(File auxFile) {
908 AtomicBoolean doesMatchAll = new AtomicBoolean();
909 return collectMatches(auxFile, PATTERN_MATCH_GLOSSARY_DESC, PATTERN_MATCH_ALL, 2, doesMatchAll);
910 }
911
912
913 private Set<String> collectMatches(File file, Pattern pattern, Pattern patternAll, int idxGroupIdx, AtomicBoolean matchAll) {
914 Set<String> res = new TreeSet<String>();
915
916 // may throw FileNotFoundException < IOExcption
917 try (FileReader fileReader = new FileReader(file)) {
918 // BufferedReader for performance
919 BufferedReader bufferedReader = new BufferedReader(fileReader);
920
921 Matcher matcher;
922 matchAll.set(true);
923
924 // readLine may throw IOException
925 for (String line = bufferedReader.readLine();
926 line != null;
927 // readLine may throw IOException
928 line = bufferedReader.readLine()) {
929
930 //assert PATTERN_MATCH_ALL.matcher(line).find();
931 assert patternAll.matcher(line).find()
932 : "Found unexpected line '" + line + "' in file '" + file + "'. ";
933 matcher = pattern.matcher(line);
934 if (matcher.find()) {
935 // Here, a match has been found
936 res.add(matcher.group(idxGroupIdx));
937 } else {
938 //foundDefaultEntry = true;
939 matchAll.set(false);
940 }
941 } // for
942
943 // if (!(matchAll.get() || res.isEmpty())) {
944 // // if there is a default label and non-default ones, one has to add
945 // res.add(DEFAULT_IDX_LABEL);
946 // }
947
948 return res;
949 } catch (IOException ioe) {
950 // Error/Warning must be issued by invoking method
951 return null;
952 }
953 }
954
955 // used in LatexPreProcessor and in LatexProcessor and in LatexDec
956 // at numerous places
957 // TBD: clarify what is wrong with mock that we cannot make this static
958 static File replaceSuffix(File file, String suffix) {
959 return new File(file.getParentFile(),
960 getFileNameWithoutSuffix(file) + suffix);
961 }
962
963 // is assumed to be without suffix
964 static File replacePrefix(String prefix, File file) {
965 return new File(file.getParentFile(), prefix + file.getName());
966 }
967
968 static File appendSuffix(File file, String suffix) {
969 return new File(file.getParentFile(), file.getName() + suffix);
970 }
971
972
973 /**
974 * Deletes all files in the same folder as <code>pFile</code> directly,
975 * i.e. not in subfolders, which are accepted by <code>filter</code>.
976 * <p>
977 * Logging:
978 * <ul>
979 * <li> WFU01: Cannot read directory...
980 * <li> EFU05: Failed to delete file
981 * </ul>
982 *
983 * @param pFile
984 * a file in a folder to be deleted from.
985 * This is either a metapost file or a latex main file.
986 * @param filter
987 * a filter which decides which files
988 * from the parent directory of <code>pFile</code> to delete.
989 * @param allowsDirs
990 * Whether deletion also allows directories.
991 */
992 // used in LatexPreProcessor.clearTargetMp
993 // used in LatexPreProcessor.clearTargetTex only
994 void deleteX(File pFile, FileFilter filter, boolean allowsDirs) {
995 // FIXME: not true for clear target.
996 // Required: cleanup in order reverse to creation.
997 assert pFile.exists()
998 && !pFile.isDirectory() : "Expected existing (regular) file " + pFile;
999 File dir = pFile.getParentFile();
1000 // may log warning WFU01
1001 File[] found = listFilesOrWarn(dir);
1002 if (found == null) {
1003 // Here, logging WFU01 already done
1004 return;
1005 }
1006 for (File delFile : found) {
1007 //System.out.println("delFile: "+delFile);
1008 // FIXME: not true for clear target.
1009 // Required: cleanup in order reverse to creation.
1010 //assert delFile.exists();
1011 if (filter.accept(delFile)) {
1012 assert delFile.exists() && (!delFile.isDirectory()
1013 || allowsDirs) : "Expected existing (regular) file " + delFile;
1014 // may log EFU05: failed to delete
1015 deleteOrError(delFile, allowsDirs);
1016 }
1017 }
1018 }
1019
1020 /**
1021 * Deletes <code>delFile</code> or logs a warning.
1022 * <p>
1023 * Logging:
1024 * EFU05: failed to delete
1025 *
1026 * @param delFile
1027 * the existing file to be deleted.
1028 * This must not be a directory.
1029 * @param allowsDirs
1030 * Whether deletion also allows directories.
1031 */
1032 void deleteOrError(File delFile, boolean allowsDirs) {
1033 assert delFile.exists() && (!delFile.isDirectory() || allowsDirs)
1034 : "Expected existing (regular) file " + delFile;
1035 if (!FileUtils.deleteQuietly(delFile)) {
1036 this.log.error("EFU05: Cannot delete file '" + delFile + "'. ");
1037 }
1038 }
1039
1040 /**
1041 * Moves file <code>fromFile</code> to <code>toFile</code>
1042 * or logs a warning.
1043 * <p>
1044 * Logging:
1045 * EFU06: failed to move.
1046 *
1047 * @param fromFile
1048 * the existing file to be moved.
1049 * This must not be a directory.
1050 * @param toFile
1051 * the file to be moved to
1052 * This must not be a directory.
1053 */
1054 void moveOrError(File fromFile, File toFile) {
1055 assert fromFile.exists()
1056 && !fromFile.isDirectory() : "Expected existing (regular) source file "
1057 + fromFile;
1058 assert !toFile.isDirectory() : "Expected (regular) target file " + toFile;
1059 boolean success = fromFile.renameTo(toFile);
1060 if (!success) {
1061 this.log.error(
1062 "EFU06: Cannot move file '" + fromFile + "' to '" + toFile + "'. ");
1063 }
1064 }
1065
1066 /**
1067 * Deletes all files in <code>texDir</code> including subdirectories
1068 * which are not in <code>orgNode</code>.
1069 * The background is, that <code>orgNode</code> represents the files
1070 * originally in <code>texDir</code>.
1071 * <p>
1072 * Logging:
1073 * <ul>
1074 * <li> WFU01: Cannot read directory
1075 * <li> EFU05: Cannot delete...
1076 * </ul>
1077 *
1078 * @param orgNode
1079 *
1080 * @param texDir
1081 *
1082 * @param pytexPrefixOutFolder
1083 * for checks only.
1084 */
1085 // used in LatexProcessor.create() only
1086 // FIXME: warn if deletion failed.
1087 void cleanUp(DirNode orgNode, File texDir, String pytexPrefixOutFolder) {
1088 // constructor DirNode may log warning WFU01 Cannot read directory
1089 // cleanUpRec may log warning EFU05 Cannot delete...
1090 cleanUpRec(texDir, orgNode, new DirNode(texDir, this), pytexPrefixOutFolder);
1091 }
1092
1093 /**
1094 * Deletes all files in <code>currNode</code>
1095 * which are not in <code>orgNode</code> recursively
1096 * including subdirectories.
1097 * The background is, that <code>orgNode</code> represents the files
1098 * originally in the directory and <code>currNode</code>
1099 * the current ones at the end of the creating goal.
1100 * <p>
1101 * Logging:
1102 * EFU05: Cannot delete...
1103 *
1104 * @param dir
1105 * the directory where to cleanup.
1106 * @param origNode
1107 * the node representing the original files.
1108 * This is the latex source directory or a subdirectory.
1109 * @param currNode
1110 * the node representing the current files.
1111 * This is the latex source directory or a subdirectory.
1112 */
1113 // used in cleanUp only
1114 private void cleanUpRec(File dir, DirNode origNode, DirNode currNode, String pytexPrefixOutFolder) {
1115 Set<String> origSubdirs = origNode.getSubdirs().keySet();
1116 Set<String> currSubdirs =
1117 new TreeSet<String>(currNode.getSubdirs().keySet());
1118 boolean containsAll = currSubdirs.containsAll(origSubdirs);
1119 assert containsAll;
1120 currSubdirs.removeAll(origSubdirs);
1121 Set<String> diffSet = currSubdirs;
1122 String regex = pytexPrefixOutFolder + ".+";// represents file name
1123 for (String name : diffSet) {
1124 assert name.matches(regex);
1125 System.out.println("del: " + new File(dir, name));
1126 deleteOrError(new File(dir, name), true);
1127 }
1128 File file;
1129 for (String key : origNode.getSubdirs().keySet()) {
1130 file = new File(dir, key);
1131 cleanUpRec(file, origNode.getSubdirs().get(key),
1132 currNode.getSubdirs().get(key), pytexPrefixOutFolder);
1133 }
1134 Collection<String> currFileNames = currNode.getRegularFileNames();
1135 currFileNames.removeAll(origNode.getRegularFileNames());
1136
1137 for (String fileName : currFileNames) {
1138 file = new File(dir, fileName);
1139 // may log error EFU05: Cannot delete file
1140 deleteOrError(file, false);
1141 }
1142 }
1143
1144 // TBD: clarify whether this hack is really needed.
1145 /**
1146 * Temporarily generated file to be passed to {@link Converter#Makeindex}
1147 * to allow to determine the version of the tool.
1148 */
1149 private static File EMPTY_IDX;
1150
1151 // unsorted and not unified index created by latex
1152 final static String SUFFIX_IDX = ".idx";
1153 // sorted and unified index created by makeindex
1154 final static String SUFFIX_IND = ".ind";
1155 // log file created by makeindex
1156 final static String SUFFIX_ILG = ".ilg";
1157
1158
1159
1160 static File getEmptyIdx() {
1161 if (EMPTY_IDX == null) {
1162 try {
1163 EMPTY_IDX = File.createTempFile("forMakeindex", SUFFIX_IDX);
1164 EMPTY_IDX.deleteOnExit();
1165 replaceSuffix(EMPTY_IDX, SUFFIX_IND).deleteOnExit();
1166 replaceSuffix(EMPTY_IDX, SUFFIX_ILG).deleteOnExit();
1167 } catch (Exception e) {
1168 // TBD: eliminate: shall be a warning or even less than that.
1169 // But to that end, this must not be static.
1170 throw new IllegalStateException("Could not create temp file.");
1171 }
1172 }
1173 return EMPTY_IDX;
1174 }
1175
1176
1177 /**
1178 * The part of the headline of generated files for injections
1179 * after the comment symbol.
1180 * Used e.g.
1181 * for <code>.latexmkrc</code> and for <code>.chktex</code>.
1182 * This headline signifies,
1183 * that the file was created by this software.
1184 * As a consequence,
1185 * it may be deleted or overwritten by this software.
1186 * Else this is not done.
1187 * Note that the headline is the first line,
1188 * except the file has a shebang like code>.latexmkrc</code>.
1189 * Since the shebang must be in the first line and must be preserved,
1190 * the headline is the second line if a shebang is present.
1191 */
1192 static final String HEADLINE_GEN = " injection file written by latex plugin ";
1193
1194 // Could be in LatexProcessor or here in TexFileUtils.
1195 // Since it does logging and LatexProcessor does none so far
1196 // and since this fits layered architecture, we decided to put it here.
1197 /**
1198 * Returns whether the given file is created by this software.
1199 * This is assumed if the comment character followed by {@link #HEADLINE_GEN}
1200 * is the first line if no shebang is expected else the second line.
1201 * It is assumed that the file exists.
1202 *
1203 * Warnings:
1204 * WFU10: if the file has not been created by this software
1205 * WFU11: if it cannot be ensured that the file has been created by this software
1206 * or if the reader to read to decide cannot be closed.
1207 *
1208 * @param aFile
1209 * the file to be considered.
1210 * @param inj
1211 * the injection for which the file is created.
1212 * What is used is merely the comment character and whether there is a shebang line.
1213 * @return
1214 * whether the given file is created by this software.
1215 * This is assumed if it can be proved that
1216 * the first line starts with the comment symbol
1217 * followed by {@link #HEADLINE_GEN},
1218 * except if the file has a shebang in the first line,
1219 * then the second line takes the role of the headline.
1220 * If and only if false, an warning is emitted.
1221 */
1222 boolean isCreatedByMyself(File aFile, Injection inj) {
1223 assert aFile.exists() : "File " + aFile + " expected to exist. ";
1224 // to be checked whether it shall be overwritten
1225 try {
1226 if (!aFile.isDirectory()) {
1227 // constructor of FileReader may throw
1228 // FileNotFoundException which is an IOException:
1229 // Since it does exist and is not a directory,
1230 // it is unreadable for some other reason
1231 BufferedReader reader =
1232 new BufferedReader(new FileReader(aFile));
1233 // TBD: treat IOException better
1234 // may throw IOException if an IO error occurs
1235 String headline = reader.readLine();
1236 if (inj.hasShebang()) {
1237 headline = reader.readLine();
1238 }
1239 // may throw IOException
1240 reader.close();
1241 // headline is null iff the aFile is empty
1242 if (headline != null && headline.startsWith(inj.commentStr() + HEADLINE_GEN)) {
1243 return true;
1244 }
1245 }
1246 // Here, the file was not written by this software
1247 // so it shall not be overwritten
1248 this.log.warn("WFU10: Cannot overwrite/clean file '" + aFile
1249 + "' because it is not self-created. ");
1250 return false;
1251
1252 } catch (IOException ioe) {
1253 // In both cases: could not read headline
1254 this.log.warn("WFU11: Refuse to overwrite/clean file '" + aFile
1255 + "' because it may be not self-created or has dangling reader. ");
1256 return false;
1257 }
1258 }
1259
1260
1261
1262 public static void main(String[] args) {
1263 String regex = args[0];
1264 String text = args[1];
1265 text = "xx\nyzzz";
1266 System.out.println("regex: " + regex);
1267 System.out.println("text: " + text);
1268 System.out.println("len: " + text.length());
1269
1270
1271 Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
1272 Matcher matcher = pattern.matcher(text);
1273 matcher.useAnchoringBounds(true);
1274 System.out.println("find: " + matcher.find());
1275 System.out.println("hitEnd: " + matcher.hitEnd());
1276 System.out.println("hitEnd: " + matcher.end());
1277 }
1278 }