init
This commit is contained in:
commit
38355d2442
9083 changed files with 1225834 additions and 0 deletions
|
|
@ -0,0 +1 @@
|
|||
pip
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
Copyright (c) 2017 Chris Kuehl, Anthony Sottile
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
|
@ -0,0 +1,162 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: identify
|
||||
Version: 2.4.12
|
||||
Summary: File identification library for Python
|
||||
Home-page: https://github.com/pre-commit/identify
|
||||
Author: Chris Kuehl
|
||||
Author-email: ckuehl@ocf.berkeley.edu
|
||||
License: MIT
|
||||
Platform: UNKNOWN
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: Programming Language :: Python :: 3 :: Only
|
||||
Classifier: Programming Language :: Python :: 3.7
|
||||
Classifier: Programming Language :: Python :: 3.8
|
||||
Classifier: Programming Language :: Python :: 3.9
|
||||
Classifier: Programming Language :: Python :: 3.10
|
||||
Classifier: Programming Language :: Python :: Implementation :: CPython
|
||||
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
||||
Requires-Python: >=3.7
|
||||
Description-Content-Type: text/markdown
|
||||
License-File: LICENSE
|
||||
Provides-Extra: license
|
||||
Requires-Dist: ukkonen ; extra == 'license'
|
||||
|
||||
identify
|
||||
========
|
||||
|
||||
[](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master)
|
||||
[](https://dev.azure.com/asottile/asottile/_build/latest?definitionId=67&branchName=master)
|
||||
[](https://results.pre-commit.ci/latest/github/pre-commit/identify/master)
|
||||
[](https://pypi.python.org/pypi/identify)
|
||||
|
||||
File identification library for Python.
|
||||
|
||||
Given a file (or some information about a file), return a set of standardized
|
||||
tags identifying what the file is.
|
||||
|
||||
## Installation
|
||||
|
||||
`pip install identify`
|
||||
|
||||
## Usage
|
||||
### With a file on disk
|
||||
|
||||
If you have an actual file on disk, you can get the most information possible
|
||||
(a superset of all other methods):
|
||||
|
||||
```python
|
||||
>>> from identify import identify
|
||||
>>> identify.tags_from_path('/path/to/file.py')
|
||||
{'file', 'text', 'python', 'non-executable'}
|
||||
>>> identify.tags_from_path('/path/to/file-with-shebang')
|
||||
{'file', 'text', 'shell', 'bash', 'executable'}
|
||||
>>> identify.tags_from_path('/bin/bash')
|
||||
{'file', 'binary', 'executable'}
|
||||
>>> identify.tags_from_path('/path/to/directory')
|
||||
{'directory'}
|
||||
>>> identify.tags_from_path('/path/to/symlink')
|
||||
{'symlink'}
|
||||
```
|
||||
|
||||
When using a file on disk, the checks performed are:
|
||||
|
||||
* File type (file, symlink, directory, socket)
|
||||
* Mode (is it executable?)
|
||||
* File name (mostly based on extension)
|
||||
* If executable, the shebang is read and the interpreter interpreted
|
||||
|
||||
|
||||
### If you only have the filename
|
||||
|
||||
```python
|
||||
>>> identify.tags_from_filename('file.py')
|
||||
{'text', 'python'}
|
||||
```
|
||||
|
||||
|
||||
### If you only have the interpreter
|
||||
|
||||
```python
|
||||
>>> identify.tags_from_interpreter('python3.5')
|
||||
{'python', 'python3'}
|
||||
>>> identify.tags_from_interpreter('bash')
|
||||
{'shell', 'bash'}
|
||||
>>> identify.tags_from_interpreter('some-unrecognized-thing')
|
||||
set()
|
||||
```
|
||||
|
||||
### As a cli
|
||||
|
||||
```
|
||||
$ identify-cli --help
|
||||
usage: identify-cli [-h] [--filename-only] path
|
||||
|
||||
positional arguments:
|
||||
path
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--filename-only
|
||||
```
|
||||
|
||||
```console
|
||||
$ identify-cli setup.py; echo $?
|
||||
["file", "non-executable", "python", "text"]
|
||||
0
|
||||
$ identify-cli setup.py --filename-only; echo $?
|
||||
["python", "text"]
|
||||
0
|
||||
$ identify-cli wat.wat; echo $?
|
||||
wat.wat does not exist.
|
||||
1
|
||||
$ identify-cli wat.wat --filename-only; echo $?
|
||||
1
|
||||
```
|
||||
|
||||
### Identifying LICENSE files
|
||||
|
||||
`identify` also has an api for determining what type of license is contained
|
||||
in a file. This routine is roughly based on the approaches used by
|
||||
[licensee] (the ruby gem that github uses to figure out the license for a
|
||||
repo).
|
||||
|
||||
The approach that `identify` uses is as follows:
|
||||
|
||||
1. Strip the copyright line
|
||||
2. Normalize all whitespace
|
||||
3. Return any exact matches
|
||||
4. Return the closest by edit distance (where edit distance < 5%)
|
||||
|
||||
To use the api, install via `pip install identify[license]`
|
||||
|
||||
```pycon
|
||||
>>> from identify import identify
|
||||
>>> identify.license_id('LICENSE')
|
||||
'MIT'
|
||||
```
|
||||
|
||||
The return value of the `license_id` function is an [SPDX] id. Currently
|
||||
licenses are sourced from [choosealicense.com].
|
||||
|
||||
[licensee]: https://github.com/benbalter/licensee
|
||||
[SPDX]: https://spdx.org/licenses/
|
||||
[choosealicense.com]: https://github.com/github/choosealicense.com
|
||||
|
||||
## How it works
|
||||
|
||||
A call to `tags_from_path` does this:
|
||||
|
||||
1. What is the type: file, symlink, directory? If it's not file, stop here.
|
||||
2. Is it executable? Add the appropriate tag.
|
||||
3. Do we recognize the file extension? If so, add the appropriate tags, stop
|
||||
here. These tags would include binary/text.
|
||||
4. Peek at the first X bytes of the file. Use these to determine whether it is
|
||||
binary or text, add the appropriate tag.
|
||||
5. If identified as text above, try to read and interpret the shebang, and add
|
||||
appropriate tags.
|
||||
|
||||
By design, this means we don't need to partially read files where we recognize
|
||||
the file extension.
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
../../../bin/identify-cli,sha256=k4Bj-fs5Lb1EKZlZV_TG9hcrS7MaIUCByrP7cnD0nP8,248
|
||||
identify-2.4.12.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
||||
identify-2.4.12.dist-info/LICENSE,sha256=7bwq07cIT6yHPhsbRQ_TcNkU65f38xJjtP9l9H3BDEM,1072
|
||||
identify-2.4.12.dist-info/METADATA,sha256=YSRKqaZ1Mz_Z2NKdzbixniVp9uT3eEZItM2TjzK-sBM,4972
|
||||
identify-2.4.12.dist-info/RECORD,,
|
||||
identify-2.4.12.dist-info/WHEEL,sha256=z9j0xAa_JmUKMpmz72K0ZGALSM_n-wQVmGbleXx2VHg,110
|
||||
identify-2.4.12.dist-info/entry_points.txt,sha256=r4kj3kHUeFIRM1eWxt75VpJAH8_3YjZOdFR_X36VSzI,52
|
||||
identify-2.4.12.dist-info/top_level.txt,sha256=Rlt8stwsb21b0aSlWbp_2EuNfX8sdPOvAEF0-FxGGVs,9
|
||||
identify/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
identify/__pycache__/__init__.cpython-38.pyc,,
|
||||
identify/__pycache__/cli.cpython-38.pyc,,
|
||||
identify/__pycache__/extensions.cpython-38.pyc,,
|
||||
identify/__pycache__/identify.cpython-38.pyc,,
|
||||
identify/__pycache__/interpreters.cpython-38.pyc,,
|
||||
identify/cli.py,sha256=o-ZK-aKt5BzY0Cts21WwIBh7sj9ZvtJiuGWQKfYU6LA,730
|
||||
identify/extensions.py,sha256=PbvgS8dGB6ha9jMJ9DUlmaKXcJIWRLtNyXQsiwRratc,10960
|
||||
identify/identify.py,sha256=JVUfSKQP5B9KgWP3O7JEABrgtJ8tni6zqxi7Ywt7dZU,7932
|
||||
identify/interpreters.py,sha256=u2N0Xs1JGzs-AZ7mi9xxa8d3vQ3b6IuhYdFE8VkH8Z8,610
|
||||
identify/vendor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
||||
identify/vendor/__pycache__/__init__.cpython-38.pyc,,
|
||||
identify/vendor/__pycache__/licenses.cpython-38.pyc,,
|
||||
identify/vendor/licenses.py,sha256=jsArrb7_5qdhbkeIrDScNWhs9IZ1TEw_J81m05FZQBw,335105
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
Wheel-Version: 1.0
|
||||
Generator: bdist_wheel (0.37.1)
|
||||
Root-Is-Purelib: true
|
||||
Tag: py2-none-any
|
||||
Tag: py3-none-any
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
[console_scripts]
|
||||
identify-cli = identify.cli:main
|
||||
|
||||
|
|
@ -0,0 +1 @@
|
|||
identify
|
||||
Loading…
Add table
Add a link
Reference in a new issue