xzgrep 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. #!/bin/sh
  2. # xzgrep -- a wrapper around a grep program that decompresses files as needed
  3. # Adapted from a version sent by Charles Levert <charles@comm.polymtl.ca>
  4. # Copyright (C) 1998, 2001, 2002, 2006, 2007 Free Software Foundation
  5. # Copyright (C) 1993 Jean-loup Gailly
  6. # Modified for XZ Utils by Andrew Dudman and Lasse Collin.
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 2 of the License, or
  10. # (at your option) any later version.
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #SET_PATH - This line is a placeholder to ease patching this script.
  16. # Instead of unsetting XZ_OPT, just make sure that xz will use file format
  17. # autodetection. This way memory usage limit and thread limit can be
  18. # specified via XZ_OPT. With gzip, bzip2, and lzop it's OK to just unset the
  19. # environment variables.
  20. xz='xz --format=auto'
  21. unset GZIP BZIP BZIP2 LZOP
  22. case ${0##*/} in
  23. *egrep*) prog=xzegrep; grep=${GREP:-grep -E};;
  24. *fgrep*) prog=xzfgrep; grep=${GREP:-grep -F};;
  25. *) prog=xzgrep; grep=${GREP:-grep};;
  26. esac
  27. version="$prog (XZ Utils) 5.4.2"
  28. usage="Usage: ${0##*/} [OPTION]... [-e] PATTERN [FILE]...
  29. Look for instances of PATTERN in the input FILEs, using their
  30. uncompressed contents if they are compressed.
  31. OPTIONs are the same as for '$grep'.
  32. Report bugs to <xz@tukaani.org>."
  33. # sed script to escape all ' for the shell, and then (to handle trailing
  34. # newlines correctly) turn trailing X on last line into '.
  35. escape='
  36. s/'\''/'\''\\'\'''\''/g
  37. $s/X$/'\''/
  38. '
  39. operands=
  40. have_pat=0
  41. files_with_matches=0
  42. files_without_matches=0
  43. no_filename=0
  44. with_filename=0
  45. # See if -H and --label options are supported (GNU and *BSDs).
  46. if test f:x = "$(eval "echo x | $grep -H --label=f x 2> /dev/null")"; then
  47. grep_supports_label=1
  48. else
  49. grep_supports_label=0
  50. fi
  51. while test $# -ne 0; do
  52. option=$1
  53. shift
  54. optarg=
  55. case $option in
  56. (-[0123456789abcdEFGhHiIKlLnoPqrRsTuUvVwxyzZ]*[!0123456789]*)
  57. # Something like -Fiv was specified, that is, $option contains more
  58. # than one option of which the first option (in this example -F)
  59. # doesn't take an argument. Split the first option into a standalone
  60. # argument and continue parsing the rest of the options (in this example,
  61. # replace -Fiv with -iv in the argument list and set option=-F).
  62. #
  63. # If there are digits [0-9] they are treated as if they were a single
  64. # option character because this syntax is an alias for -C for GNU grep.
  65. # For example, "grep -25F" is equivalent to "grep -C25 -F". If only
  66. # digits are specified like "grep -25" we don't get here because the
  67. # above pattern in the case-statement doesn't match such strings.
  68. arg2=-\'$(LC_ALL=C expr "X${option}X" : 'X-.[0-9]*\(.*\)' |
  69. LC_ALL=C sed "$escape")
  70. eval "set -- $arg2 "'${1+"$@"}'
  71. option=$(LC_ALL=C expr "X$option" : 'X\(-.[0-9]*\)');;
  72. (--binary-*=* | --[lm]a*=* | --reg*=*)
  73. # These options require an argument and an argument has been provided
  74. # with the --foo=argument syntax. All is good.
  75. ;;
  76. (-[ABCDefmX] | --binary-* | --file | --[lm]a* | --reg*)
  77. # These options require an argument which should now be in $1.
  78. # If it isn't, display an error and exit.
  79. case ${1?"$option option requires an argument"} in
  80. (*\'*)
  81. optarg=" '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");;
  82. (*)
  83. optarg=" '$1'";;
  84. esac
  85. shift;;
  86. (--)
  87. break;;
  88. (-?*)
  89. ;;
  90. (*)
  91. case $option in
  92. (*\'*)
  93. operands="$operands '"$(printf '%sX\n' "$option" |
  94. LC_ALL=C sed "$escape");;
  95. (*)
  96. operands="$operands '$option'";;
  97. esac
  98. ${POSIXLY_CORRECT+break}
  99. continue;;
  100. esac
  101. case $option in
  102. (-[drRzZ] | --di* | --exc* | --inc* | --rec* | --nu*)
  103. printf >&2 '%s: %s: Option not supported\n' "$0" "$option"
  104. exit 2;;
  105. (-[ef]* | --file | --file=* | --reg*)
  106. have_pat=1;;
  107. (--h | --he | --hel | --help)
  108. printf '%s\n' "$usage" || exit 2
  109. exit;;
  110. (-H | --wi | --wit | --with | --with- | --with-f | --with-fi \
  111. | --with-fil | --with-file | --with-filen | --with-filena | --with-filenam \
  112. | --with-filename)
  113. with_filename=1
  114. continue;;
  115. (-l | --files-with-*)
  116. files_with_matches=1
  117. continue;;
  118. (-L | --files-witho*)
  119. files_without_matches=1
  120. continue;;
  121. (-h | --no-f*)
  122. no_filename=1;;
  123. (-V | --v | --ve | --ver | --vers | --versi | --versio | --version)
  124. printf '%s\n' "$version" || exit 2
  125. exit;;
  126. esac
  127. case $option in
  128. (*\'?*)
  129. option=\'$(printf '%sX\n' "$option" | LC_ALL=C sed "$escape");;
  130. (*)
  131. option="'$option'";;
  132. esac
  133. grep="$grep $option$optarg"
  134. done
  135. eval "set -- $operands "'${1+"$@"}'
  136. if test $have_pat -eq 0; then
  137. case ${1?"Missing pattern; try \`${0##*/} --help' for help"} in
  138. (*\'*)
  139. grep="$grep -e '"$(printf '%sX\n' "$1" | LC_ALL=C sed "$escape");;
  140. (*)
  141. grep="$grep -e '$1'";;
  142. esac
  143. shift
  144. fi
  145. if test $# -eq 0; then
  146. set -- -
  147. fi
  148. exec 3>&1
  149. # res=1 means that no file matched yet
  150. res=1
  151. for i; do
  152. case $i in
  153. *[-.][zZ] | *_z | *[-.]gz | *.t[ag]z) uncompress="gzip -cdf";;
  154. *[-.]bz2 | *[-.]tbz | *.tbz2) uncompress="bzip2 -cdf";;
  155. *[-.]lzo | *[-.]tzo) uncompress="lzop -cdf";;
  156. *[-.]zst | *[-.]tzst) uncompress="zstd -cdfq";; # zstd needs -q.
  157. *) uncompress="$xz -cdfqQ";; # -qQ to ignore warnings like unsupp. check.
  158. esac
  159. # xz_status will hold the decompressor's exit status.
  160. # Exit status of grep (and in rare cases, printf or sed) is
  161. # available as the exit status of this assignment command.
  162. xz_status=$(
  163. exec 5>&1
  164. ($uncompress -- "$i" 5>&-; echo $? >&5) 3>&- |
  165. if test $files_with_matches -eq 1; then
  166. eval "$grep -q" && { printf '%s\n' "$i" || exit 2; }
  167. elif test $files_without_matches -eq 1; then
  168. eval "$grep -q" || {
  169. r=$?
  170. if test $r -eq 1; then
  171. printf '%s\n' "$i" || r=2
  172. fi
  173. exit $r
  174. }
  175. elif test $with_filename -eq 0 &&
  176. { test $# -eq 1 || test $no_filename -eq 1; }; then
  177. eval "$grep"
  178. elif test $grep_supports_label -eq 1; then
  179. # The grep implementation in use allows us to specify the filename
  180. # that grep will prefix to the output lines. This is faster and
  181. # less prone to security bugs than the fallback method that uses sed.
  182. # This also avoids confusing output with GNU grep >= 3.5 (2020-09-27)
  183. # which prints "binary file matches" to stderr instead of stdout.
  184. #
  185. # If reading from stdin, let grep use whatever name it prefers for
  186. # stdin. With GNU grep it is a locale-specific translated string.
  187. if test "x$i" = "x-"; then
  188. eval "$grep -H"
  189. else
  190. eval "$grep -H --label \"\$i\""
  191. fi
  192. else
  193. # Append a colon so that the last character will never be a newline
  194. # which would otherwise get lost in shell command substitution.
  195. i="$i:"
  196. # Escape & \ | and newlines only if such characters are present
  197. # (speed optimization).
  198. case $i in
  199. (*'
  200. '* | *'&'* | *'\'* | *'|'*)
  201. # If sed fails, set i to a known safe string to ensure that
  202. # failing sed did not create a half-escaped dangerous string.
  203. i=$(printf '%s\n' "$i" | LC_ALL=C sed 's/[&\|]/\\&/g; $!s/$/\\/') ||
  204. i='(unknown filename):';;
  205. esac
  206. # $i already ends with a colon so do not add it here.
  207. sed_script="s|^|$i|"
  208. # If grep or sed fails, pick the larger value of the two exit statuses.
  209. # If sed fails, use at least 2 since we use >= 2 to indicate errors.
  210. r=$(
  211. exec 4>&1
  212. (eval "$grep" 4>&-; echo $? >&4) 3>&- |
  213. LC_ALL=C sed "$sed_script" >&3 4>&-
  214. ) || {
  215. sed_status=$?
  216. test "$sed_status" -lt 2 && sed_status=2
  217. test "$r" -lt "$sed_status" && r=$sed_status
  218. }
  219. exit $r
  220. fi >&3 5>&-
  221. )
  222. r=$?
  223. # If grep or sed or other non-decompression command failed with a signal,
  224. # exit immediately and ignore the possible remaining files.
  225. #
  226. # NOTE: Instead of 128 + signal_number, some shells use
  227. # 256 + signal_number (ksh) or 384 + signal_number (yash).
  228. # This is fine for us since their "exit" and "kill -l" commands take
  229. # this into account. (At least the versions I tried do but there is
  230. # a report of an old ksh variant whose "exit" truncates the exit status
  231. # to 8 bits without any special handling for values indicating a signal.)
  232. test "$r" -ge 128 && exit "$r"
  233. if test -z "$xz_status"; then
  234. # Something unusual happened, for example, we got a signal and
  235. # the exit status of the decompressor was never echoed and thus
  236. # $xz_status is empty. Exit immediately and ignore the possible
  237. # remaining files.
  238. exit 2
  239. elif test "$xz_status" -ge 128; then
  240. # The decompressor died due to a signal. SIGPIPE is ignored since it can
  241. # occur if grep exits before the whole file has been decompressed (grep -q
  242. # can do that). If the decompressor died with some other signal, exit
  243. # immediately and ignore the possible remaining files.
  244. test "$(kill -l "$xz_status" 2> /dev/null)" != "PIPE" && exit "$xz_status"
  245. elif test "$xz_status" -gt 0; then
  246. # Decompression failed but we will continue with the remaining
  247. # files anwyway. Set exit status to at least 2 to indicate an error.
  248. test "$r" -lt 2 && r=2
  249. fi
  250. # Since res=1 is the initial value, we only need to care about
  251. # matches (r == 0) and errors (r >= 2) here; r == 1 can be ignored.
  252. if test "$r" -ge 2; then
  253. # An error occurred in decompressor, grep, or some other command. Update
  254. # res unless a larger error code has been seen with an earlier file.
  255. test "$res" -lt "$r" && res=$r
  256. elif test "$r" -eq 0; then
  257. # grep found a match and no errors occurred. Update res if no errors have
  258. # occurred with earlier files.
  259. test "$res" -eq 1 && res=0
  260. fi
  261. done
  262. # 0: At least one file matched and no errors occurred.
  263. # 1: No matches were found and no errors occurred.
  264. # >=2: Error. It's unknown if matches were found.
  265. exit "$res"