nexB/aboutcode-toolkit

Potential update for all the `--scancode` options as SCTK's output is updated AND update gen with `--scancode` option

chinyeungli opened this issue · 1 comments

Note that SCTK's output is updated in the development branch.
Current stable structure (version 31.2.4 ):

  "files": [
    {
      "path": "clean-text-0.3.0-mod/cleantext/isc_lic.py",
      "type": "file",
      "name": "isc_lic.py",
      "base_name": "isc_lic",
      "extension": ".py",
      "size": 9593,
      "date": "2020-12-04",
      "sha1": "ba25c99004d422e98c7f948ce6a7cf7914c69b23",
      "md5": "b64befb0e9457e941e362bbb2955e5e2",
      "sha256": "a089501312bc9caed493dd7cdb76f66757d92bab45c9b5861c627e2a20887573",
      "mime_type": "text/plain",
      "file_type": "Python script, UTF-8 Unicode text executable",
      "programming_language": "Python",
      "is_binary": false,
      "is_text": true,
      "is_archive": false,
      "is_media": false,
      "is_source": true,
      "is_script": true,
      "licenses": [
        {
          "key": "isc",
          "score": 99.0,
          "name": "ISC License",
          "short_name": "ISC License",
          "category": "Permissive",
          "is_exception": false,
          "owner": "ISC - Internet Systems Consortium",
          "homepage_url": "https://www.isc.org/software/license",
          "text_url": "http://fedoraproject.org/wiki/Licensing:MIT#Old_Style_with_legal_disclaimer_2",
          "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:isc",
          "spdx_license_key": "ISC",
          "spdx_url": "https://spdx.org/licenses/ISC",
          "start_line": 1,
          "end_line": 1,
          "matched_rule": {
            "identifier": "isc_22.RULE",
            "license_expression": "isc",
            "licenses": [
              "isc"
            ],
            "is_license_text": false,
            "is_license_notice": false,
            "is_license_reference": true,
            "is_license_tag": false,
            "matcher": "2-aho",
            "rule_length": 2,
            "matched_length": 2,
            "match_coverage": 100.0,
            "rule_relevance": 99.0
          }
        },
        {
          "key": "mit",
          "score": 100.0,
          "name": "MIT License",
          "short_name": "MIT License",
          "category": "Permissive",
          "is_exception": false,
          "owner": "MIT",
          "homepage_url": "http://opensource.org/licenses/mit-license.php",
          "text_url": "http://opensource.org/licenses/mit-license.php",
          "reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:mit",
          "spdx_license_key": "MIT",
          "spdx_url": "https://spdx.org/licenses/MIT",
          "start_line": 5,
          "end_line": 15,
          "matched_rule": {
            "identifier": "mit.LICENSE",
            "license_expression": "mit",
            "licenses": [
              "mit"
            ],
            "is_license_text": true,
            "is_license_notice": false,
            "is_license_reference": false,
            "is_license_tag": false,
            "matcher": "2-aho",
            "rule_length": 112,
            "matched_length": 112,
            "match_coverage": 100.0,
            "rule_relevance": 100
          }
        }
      ],
      "license_expressions": [
        "isc",
        "mit"
      ],
      "percentage_of_license_text": 0.24,
      "copyrights": [],
      "holders": [],
      "authors": [],
      "packages": [],
      "emails": [],
      "urls": [
        {
          "url": "http://ftfy.readthedocs.org/",
          "start_line": 43,
          "end_line": 43
        }
      ],
      "files_count": 0,
      "dirs_count": 0,
      "size_count": 0,
      "scan_errors": []
    }
  ]

Development branch structure:

  "files": [
    {
      "path": "js-yaml-master/dist/js-yaml.js",
      "type": "file",
      "package_data": [],
      "for_packages": [
        "pkg:npm/js-yaml@4.1.0?uuid=ad9a873f-b200-48f1-b295-e3fa877f87d1"
      ],
      "detected_license_expression": "mit",
      "detected_license_expression_spdx": "MIT",
      "license_detections": [
        {
          "license_expression": "mit",
          "detection_log": [
            "not-combined"
          ],
          "matches": [
            {
              "score": 100.0,
              "start_line": 2,
              "end_line": 2,
              "matched_length": 2,
              "match_coverage": 100.0,
              "matcher": "2-aho",
              "license_expression": "mit",
              "rule_identifier": "mit_30.RULE",
              "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_30.RULE"
            }
          ]
        }
      ],
      "license_clues": [],
      "percentage_of_license_text": 0.02,
      "for_license_detections": [
        "mit-d023e558-35f1-ef53-5625-5f585054dde4"
      ],
      "copyrights": [],
      "holders": [],
      "authors": [],
      "emails": [],
      "urls": [
        {
          "url": "https://github.com/nodeca/js-yaml",
          "start_line": 2,
          "end_line": 2
        },
        {
          "url": "http://stackoverflow.com/questions/8458984",
          "start_line": 75,
          "end_line": 75
        },
        {
          "url": "https://en.wikipedia.org/wiki/UTF-16#Code_points_U.2B010000_to_U.2B10FFFF",
          "start_line": 1212,
          "end_line": 1212
        },
        {
          "url": "http://www.yaml.org/spec/1.2/spec.html#id2799784",
          "start_line": 2567,
          "end_line": 2567
        },
        {
          "url": "https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae",
          "start_line": 3118,
          "end_line": 3118
        }
      ],
      "scan_errors": []
    },
    {
      "path": "js-yaml-master/dist/js-yaml.min.js",
      "type": "file",
      "package_data": [],
      "for_packages": [
        "pkg:npm/js-yaml@4.1.0?uuid=ad9a873f-b200-48f1-b295-e3fa877f87d1"
      ],
      "detected_license_expression": "mit",
      "detected_license_expression_spdx": "MIT",
      "license_detections": [
        {
          "license_expression": "mit",
          "detection_log": [
            "not-combined"
          ],
          "matches": [
            {
              "score": 100.0,
              "start_line": 1,
              "end_line": 1,
              "matched_length": 2,
              "match_coverage": 100.0,
              "matcher": "2-aho",
              "license_expression": "mit",
              "rule_identifier": "mit_30.RULE",
              "rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/mit_30.RULE"
            }
          ]
        }
      ],
      "license_clues": [],
      "percentage_of_license_text": 0.02,
      "for_license_detections": [
        "mit-d023e558-35f1-ef53-5625-5f585054dde4"
      ],
      "copyrights": [],
      "holders": [],
      "authors": [],
      "emails": [],
      "urls": [
        {
          "url": "https://github.com/nodeca/js-yaml",
          "start_line": 1,
          "end_line": 1
        }
      ],
      "scan_errors": []
    },

If the changes are applied, we need to update the code to work with the latest changes.

I tried to run gen with the latest sctk input and the result doesn't look nice.
For example,

about_resource: deflate.c
name: deflate.c
type: file
base_name: deflate
extension: .c
size: 71476
date: '2023-07-19'
sha1: 7b4ace6d698c5dbbfb9a8f047f63228ca54d2e77
md5: cd7826278ce9d9d9ed5abdefef50c3e2
sha256: 565e68ddfff5af8efd55f71e122b860ad11527a7d9de40a76af2b16afef24cc0
mime_type: text/x-c
file_type: C source, ASCII text
programming_language: C
is_text: True
is_source: True
detected_license_expression: zlib
detected_license_expression_spdx: Zlib
license_detections: '[{''license_expression'': ''zlib'', ''matches'': [{''score'': 100.0, ''start_line'':
  3, ''end_line'': 3, ''matched_length'': 12, ''match_coverage'': 100.0, ''matcher'': ''2-aho'',
  ''license_expression'': ''zlib'', ''rule_identifier'': ''zlib_5.RULE'', ''rule_relevance'':
  100, ''rule_url'': ''https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/zlib_5.RULE''},
  {''score'': 100.0, ''start_line'': 6, ''end_line'': 23, ''matched_length'': 144, ''match_coverage'':
  100.0, ''matcher'': ''2-aho'', ''license_expression'': ''zlib'', ''rule_identifier'': ''zlib_17.RULE'',
  ''rule_relevance'': 100, ''rule_url'': ''https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/zlib_17.RULE''}],
  ''identifier'': ''zlib-663c0d51-510f-fca6-b163-671ecb188ff9'', ''detection_log'': [''unknown-reference-to-local-file'']}]'
percentage_of_license_text: '0.13'
copyrights: '[{''copyright'': ''Copyright (c) 1995-2013 Jean-loup Gailly and Mark Adler'', ''start_line'':
  2, ''end_line'': 2}, {''copyright'': ''Copyright 1995-2013 Jean-loup Gailly and Mark Adler'',
  ''start_line'': 55, ''end_line'': 55}]'
holders: '[{''holder'': ''Jean-loup Gailly and Mark Adler'', ''start_line'': 2, ''end_line'':
  2}, {''holder'': ''Jean-loup Gailly and Mark Adler'', ''start_line'': 55, ''end_line'': 55}]'
authors: '[{''author'': ''Leonid Broukhis'', ''start_line'': 34, ''end_line'': 34}]'
urls: '[{''url'': ''http://tools.ietf.org/html/rfc1951'', ''start_line'': 40, ''end_line'':
  40}]'

We need to update the tool to have it support.