001package org.unix4j.unix.sort;
002
003import java.util.Collections;
004import java.util.EnumSet;
005import java.util.Iterator;
006
007import org.unix4j.option.Option;
008import org.unix4j.unix.Sort;
009
010/**
011 * Options for the {@link Sort sort} command.
012 * <p>
013 * For most applications, it may be more convenient to use {@link Sort#Options} 
014 * instead of the option constants defined here.
015 * <p>
016 * <table>
017 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -c}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --check}</td><td>&nbsp;</td><td>Checks that the single input file is ordered as specified by the
018                        arguments and the collating sequence of the current locale. No 
019                        output is produced; only the exit code is affected.</td></tr>
020 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -m}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --merge}</td><td>&nbsp;</td><td>Merge only; the input file are assumed to be already sorted.</td></tr>
021 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -u}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --unique}</td><td>&nbsp;</td><td>Unique: suppress all but one in each set of lines having equal keys.
022                        If used with the {@code -c} option, checks that there are no lines 
023                        with duplicate keys, in addition to checking that the input file is 
024                        sorted.</td></tr>
025 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -b}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --ignoreLeadingBlanks}</td><td>&nbsp;</td><td>Ignore leading blanks. 
026                        (This option is ignored if a comparator operand is present).</td></tr>
027 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -d}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --dictionaryOrder}</td><td>&nbsp;</td><td>Consider only blanks and alphanumeric characters.
028                        (This option is ignored if a comparator operand is present).</td></tr>
029 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -f}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --ignoreCase}</td><td>&nbsp;</td><td>Consider all lowercase characters that have uppercase equivalents to
030                        be the uppercase equivalent for the purposes of comparison.
031                        (This option is ignored if a comparator operand is present).</td></tr>
032 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -n}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --numericSort}</td><td>&nbsp;</td><td>Sort numerically; the number begins each line and consists of 
033                        optional blanks, an optional minus sign, and zero or more digits
034                        possibly separated by thousands separators, optionally followed by a
035                        decimal-point character and zero or more digits. An empty number is
036                        treated as '0'. The current local specifies the decimal-point 
037                        character and thousands separator.
038                        <p>
039                        Comparison is exact; there is no rounding error.
040                        <p>
041                        Neither a leading '+' nor exponential notation is recognized. To 
042                        compare such strings numerically, use the
043                        {@code -genericNumericSort (-g)} option. 
044<p>
045                        (This option is ignored if a comparator operand is present).</td></tr>
046 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -g}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --generalNumericSort}</td><td>&nbsp;</td><td>Sort numerically, using the standard {@link Double#parseDouble(String)}  
047                        function to convert a trimmed line to a double-precision floating 
048                        point number. This allows floating point numbers to be specified in 
049                        scientific notation, like 1.0e-34 and 10e100. 
050                        <p>
051                        Uses the following collating sequence: Lines that cannot be parsed 
052                        because they do not represent valid double values (in alpha-numeric
053                        order); "-Infinity"; finite numbers in ascending numeric order 
054                        (with -0 < +0); "Infinity"; "NaN".
055<p>
056                        This option is usually slower than {@code -numeric-sort (-n)} and it
057                        can lose information when converting to floating point.         
058                <p>
059                        (This option is ignored if a comparator operand is present).</td></tr>
060 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -h}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --humanNumericSort}</td><td>&nbsp;</td><td>Sort numerically, first by numeric sign (negative, zero, or 
061                        positive); then by SI suffix (either empty, or 'k' or 'K', or one 
062                        of 'MGTPEZY', in that order); and finally by numeric value. For
063                        example, '1023M' sorts before '1G' because 'M' (mega) precedes 'G' 
064                        (giga) as an SI suffix. 
065                        <p>
066                        This option sorts values that are consistently scaled to the nearest
067                        suffix, regardless of whether suffixes denote powers of 1000 or
068                        1024, and it therefore sorts the output of any single invocation of 
069                        the {@code ls} command that are invoked with the --human-readable 
070                        option. 
071                        <p>
072                        The syntax for numbers is the same as for the
073                        {@code --numericSort (-n)} option; the SI suffix must immediately 
074                        follow the number.              
075<p>
076                        (This option is ignored if a comparator operand is present).</td></tr>
077 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -M}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --monthSort}</td><td>&nbsp;</td><td>An initial string, consisting of any amount of blanks, followed by a
078                        month name abbreviation, is folded to UPPER case and compared in the
079                        order: (unknown) < 'JAN' < ... < 'DEC'. The current locale
080                        determines the month spellings.</td></tr>
081 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -V}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --versionSort}</td><td>&nbsp;</td><td>Sort by version name and number. It behaves like a standard sort, 
082                        except that each sequence of decimal digits is treated numerically 
083                        as an index/version number.
084                        <p>
085                        (This option is ignored if a comparator operand is present).</td></tr>
086 * <tr valign="top"><td width="10px"></td><td nowrap="nowrap">{@code -r}</td><td>&nbsp;&nbsp;</td><td nowrap="nowrap">{@code --reverse}</td><td>&nbsp;</td><td>Reverse the sense of comparisons.</td></tr>
087 * </table>
088 */
089public enum SortOption implements Option, SortOptions {
090        /**
091         * Option <b>{@code --check}</b>, <b>{@code -c}</b>: 
092         * Checks that the single input file is ordered as specified by the
093                        arguments and the collating sequence of the current locale. No 
094                        output is produced; only the exit code is affected.
095         */
096        check('c'),
097        /**
098         * Option <b>{@code --merge}</b>, <b>{@code -m}</b>: 
099         * Merge only; the input file are assumed to be already sorted.
100         */
101        merge('m'),
102        /**
103         * Option <b>{@code --unique}</b>, <b>{@code -u}</b>: 
104         * Unique: suppress all but one in each set of lines having equal keys.
105                        If used with the {@code -c} option, checks that there are no lines 
106                        with duplicate keys, in addition to checking that the input file is 
107                        sorted.
108         */
109        unique('u'),
110        /**
111         * Option <b>{@code --ignoreLeadingBlanks}</b>, <b>{@code -b}</b>: 
112         * Ignore leading blanks. 
113                        (This option is ignored if a comparator operand is present).
114         */
115        ignoreLeadingBlanks('b'),
116        /**
117         * Option <b>{@code --dictionaryOrder}</b>, <b>{@code -d}</b>: 
118         * Consider only blanks and alphanumeric characters.
119                        (This option is ignored if a comparator operand is present).
120         */
121        dictionaryOrder('d'),
122        /**
123         * Option <b>{@code --ignoreCase}</b>, <b>{@code -f}</b>: 
124         * Consider all lowercase characters that have uppercase equivalents to
125                        be the uppercase equivalent for the purposes of comparison.
126                        (This option is ignored if a comparator operand is present).
127         */
128        ignoreCase('f'),
129        /**
130         * Option <b>{@code --numericSort}</b>, <b>{@code -n}</b>: 
131         * Sort numerically; the number begins each line and consists of 
132                        optional blanks, an optional minus sign, and zero or more digits
133                        possibly separated by thousands separators, optionally followed by a
134                        decimal-point character and zero or more digits. An empty number is
135                        treated as '0'. The current local specifies the decimal-point 
136                        character and thousands separator.
137                        <p>
138                        Comparison is exact; there is no rounding error.
139                        <p>
140                        Neither a leading '+' nor exponential notation is recognized. To 
141                        compare such strings numerically, use the
142                        {@code -genericNumericSort (-g)} option. 
143<p>
144                        (This option is ignored if a comparator operand is present).
145         */
146        numericSort('n'),
147        /**
148         * Option <b>{@code --generalNumericSort}</b>, <b>{@code -g}</b>: 
149         * Sort numerically, using the standard {@link Double#parseDouble(String)}  
150                        function to convert a trimmed line to a double-precision floating 
151                        point number. This allows floating point numbers to be specified in 
152                        scientific notation, like 1.0e-34 and 10e100. 
153                        <p>
154                        Uses the following collating sequence: Lines that cannot be parsed 
155                        because they do not represent valid double values (in alpha-numeric
156                        order); "-Infinity"; finite numbers in ascending numeric order 
157                        (with -0 < +0); "Infinity"; "NaN".
158<p>
159                        This option is usually slower than {@code -numeric-sort (-n)} and it
160                        can lose information when converting to floating point.         
161                <p>
162                        (This option is ignored if a comparator operand is present).
163         */
164        generalNumericSort('g'),
165        /**
166         * Option <b>{@code --humanNumericSort}</b>, <b>{@code -h}</b>: 
167         * Sort numerically, first by numeric sign (negative, zero, or 
168                        positive); then by SI suffix (either empty, or 'k' or 'K', or one 
169                        of 'MGTPEZY', in that order); and finally by numeric value. For
170                        example, '1023M' sorts before '1G' because 'M' (mega) precedes 'G' 
171                        (giga) as an SI suffix. 
172                        <p>
173                        This option sorts values that are consistently scaled to the nearest
174                        suffix, regardless of whether suffixes denote powers of 1000 or
175                        1024, and it therefore sorts the output of any single invocation of 
176                        the {@code ls} command that are invoked with the --human-readable 
177                        option. 
178                        <p>
179                        The syntax for numbers is the same as for the
180                        {@code --numericSort (-n)} option; the SI suffix must immediately 
181                        follow the number.              
182<p>
183                        (This option is ignored if a comparator operand is present).
184         */
185        humanNumericSort('h'),
186        /**
187         * Option <b>{@code --monthSort}</b>, <b>{@code -M}</b>: 
188         * An initial string, consisting of any amount of blanks, followed by a
189                        month name abbreviation, is folded to UPPER case and compared in the
190                        order: (unknown) < 'JAN' < ... < 'DEC'. The current locale
191                        determines the month spellings.
192         */
193        monthSort('M'),
194        /**
195         * Option <b>{@code --versionSort}</b>, <b>{@code -V}</b>: 
196         * Sort by version name and number. It behaves like a standard sort, 
197                        except that each sequence of decimal digits is treated numerically 
198                        as an index/version number.
199                        <p>
200                        (This option is ignored if a comparator operand is present).
201         */
202        versionSort('V'),
203        /**
204         * Option <b>{@code --reverse}</b>, <b>{@code -r}</b>: 
205         * Reverse the sense of comparisons.
206         */
207        reverse('r');
208        
209        private final char acronym;
210        private SortOption(char acronym) {
211                this.acronym = acronym;
212        }
213        @Override
214        public Class<SortOption> optionType() {
215                return SortOption.class;
216        }
217        /**
218         * Returns the option with the given {@code acronym}, or {@code null} if no
219         * such option is found.
220         * 
221         * @param acronym the option {@link #acronym() acronym}
222         * @return      the option with the given {@code acronym} or {@code null} if it
223         *                      is not found
224         */
225        public static SortOption findByAcronym(char acronym) {
226                for (final SortOption opt : values()) {
227                        if (opt.acronym() == acronym) return opt;
228                }
229                return null;
230        }
231        @Override
232        public char acronym() {
233                return acronym;
234        }
235        @Override
236        public boolean isSet(SortOption option) {
237                return equals(option);
238        }
239        /**
240         * Returns a new set with {@code this} active option.
241         * 
242         * @return a new set containing this option
243         */
244        @Override
245        public EnumSet<SortOption> asSet() {
246                return EnumSet.of(this);
247        }
248        
249        /**
250         * Returns an immutable iterator returning o single element: {@code this} 
251         * option.
252         * 
253         * @return an immutable iterator with {@code this} active option.
254         */
255        @Override
256        public Iterator<SortOption> iterator() {
257                return Collections.singleton(this).iterator();
258        }
259        
260        /**
261         * Returns 1 as this is a set with a single element: {@code this} option
262         * 
263         * @return one
264         */
265        @Override
266        public int size() {
267                return 1;
268        }
269
270        /**
271         * Returns true if the {@link Option#acronym() acronym} should be used for
272         * the specified {@code option} in string representations. 
273         * <p>
274         * This method returns always true for all options.
275         *  
276         * @param option
277         *            the option of interest
278         * @return always true indicating that option acronyms should be used in
279         *                      string representations for all options
280         */
281        @Override
282        public boolean useAcronymFor(SortOption option) {
283                return true;
284        }
285}