HTTPArchive/tech-report-apis

Lighthouse Report

maceto opened this issue · 6 comments

Could you describe the origin/source of this data?

[
	{
		date: '2020-01-01',
		technology: 'Wordpress',
		lighthouse: [
			{
				name: 'performance',
				tested: 10,
				desktop: {
					median_score: 71,
				},
				mobile: {
					median_score: 73,
				},
				across_dataset: {
					median_score: 83,
				},
			},
			{
				name: 'accessibility',
				tested: 10,
				desktop: {
					median_score: 71,
				},
				mobile: {
					median_score: 73,
				},
				across_dataset: {
					good_number: 83,
				},
			},
			...
		],
	}
]

The goal is to create a script to query this data from BQ transform and save it in Firestore.

Query

CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE(
  records ARRAY<STRUCT<
      client STRING,
      median_lighthouse_score_accessibility NUMERIC,
      median_lighthouse_score_best_practices NUMERIC,
      median_lighthouse_score_performance NUMERIC,
      median_lighthouse_score_pwa NUMERIC,
      median_lighthouse_score_seo NUMERIC
  >>
) RETURNS ARRAY<STRUCT<
  name STRING,
  desktop STRUCT<
    median_score NUMERIC
  >,
  mobile STRUCT<
    median_score NUMERIC
  >
>> LANGUAGE js AS '''
const METRIC_MAP = {
  accessibility: 'median_lighthouse_score_accessibility',
  best_practices: 'median_lighthouse_score_best_practices',
  performance: 'median_lighthouse_score_performance',
  pwa: 'median_lighthouse_score_pwa',
  seo: 'median_lighthouse_score_seo',
};

// Initialize the Lighthouse map.
const lighthouse = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => {
  return [metricName, {name: metricName}];
}));

// Populate each client record.
records.forEach(record => {
  Object.entries(METRIC_MAP).forEach(([metricName, median_score]) => {
    lighthouse[metricName][record.client] = {median_score: record[median_score]};
  });
});

return Object.values(lighthouse);
''';

SELECT
  date,
  app AS technology,
  rank,
  geo,
  GET_LIGHTHOUSE(ARRAY_AGG(STRUCT(
    client,
    median_lighthouse_score_accessibility,
    median_lighthouse_score_best_practices,
    median_lighthouse_score_performance,
    median_lighthouse_score_pwa,
    median_lighthouse_score_seo

  ))) AS lighthouse
FROM
  `httparchive.core_web_vitals.technologies`
WHERE
  date = '2023-07-01'
GROUP BY
  date,
  app,
  rank,
  geo

Example record

{
  "date": "2023-07-01",
  "technology": "WordPress",
  "rank": "ALL",
  "geo": "ALL",
  "lighthouse": [{
    "name": "accessibility",
    "desktop": {
      "median_score": "0.865"
    },
    "mobile": {
      "median_score": "0.865"
    }
  }, {
    "name": "best_practices",
    "desktop": {
      "median_score": "0.92"
    },
    "mobile": {
      "median_score": "0.92"
    }
  }, {
    "name": "performance",
    "desktop": {
      "median_score": "0.545"
    },
    "mobile": {
      "median_score": "0.345"
    }
  }, {
    "name": "pwa",
    "desktop": {
      "median_score": "0.25"
    },
    "mobile": {
      "median_score": "0.33"
    }
  }, {
    "name": "seo",
    "desktop": {
      "median_score": "0.875"
    },
    "mobile": {
      "median_score": "0.895"
    }
  }]
}

Note: Omitted the tested value since it's not relevant at the Lighthouse category-level.

@rviscomi, should we have any mandatory param for this endpoint?

  • technology
  • rank
  • geo

WDYT @sarahfossheim?

Example of how to consume this endpoint

curl --request GET \
  --url 'https://dev-gw-2vzgiib6.ue.gateway.dev/v1/lighthouse?geo=Maldives&technology=["Oracle HTTP Server"]&rank=ALL'

Query

CREATE TEMPORARY FUNCTION GET_LIGHTHOUSE(
  records ARRAY<STRUCT<
      client STRING,
      median_lighthouse_score_accessibility NUMERIC,
      median_lighthouse_score_best_practices NUMERIC,
      median_lighthouse_score_performance NUMERIC,
      median_lighthouse_score_pwa NUMERIC,
      median_lighthouse_score_seo NUMERIC
  >>
) RETURNS ARRAY<STRUCT<
  name STRING,
  desktop STRUCT<
    median_score INT64
  >,
  mobile STRUCT<
    median_score INT64
  >
>> LANGUAGE js AS '''
const METRIC_MAP = {
  accessibility: 'median_lighthouse_score_accessibility',
  best_practices: 'median_lighthouse_score_best_practices',
  performance: 'median_lighthouse_score_performance',
  pwa: 'median_lighthouse_score_pwa',
  seo: 'median_lighthouse_score_seo',
};

// Initialize the Lighthouse map.
const lighthouse = Object.fromEntries(Object.keys(METRIC_MAP).map(metricName => {
  return [metricName, {name: metricName}];
}));

// Populate each client record.
records.forEach(record => {
  Object.entries(METRIC_MAP).forEach(([metricName, median_score]) => {
    lighthouse[metricName][record.client] = {median_score: Math.round(record[median_score] * 100)};
  });
});

return Object.values(lighthouse);
''';

SELECT
  date,
  app AS technology,
  rank,
  geo,
  GET_LIGHTHOUSE(ARRAY_AGG(STRUCT(
    client,
    median_lighthouse_score_accessibility,
    median_lighthouse_score_best_practices,
    median_lighthouse_score_performance,
    median_lighthouse_score_pwa,
    median_lighthouse_score_seo

  ))) AS lighthouse
FROM
  `httparchive.core_web_vitals.technologies`
WHERE
  date = '2023-07-01'
GROUP BY
  date,
  app,
  rank,
  geo

Results

{
"date": "2023-07-01",
"technology": "WordPress",
"rank": "Top 10k",
"geo": "Kenya",
"lighthouse": [{
  "name": "accessibility",
  "desktop": {
    "median_score": "85"
  },
  "mobile": {
    "median_score": "85"
  }
}, {
  "name": "best_practices",
  "desktop": {
    "median_score": "92"
  },
  "mobile": {
    "median_score": "88"
  }
}, {
  "name": "performance",
  "desktop": {
    "median_score": "53"
  },
  "mobile": {
    "median_score": "38"
  }
}, {
  "name": "pwa",
  "desktop": {
    "median_score": "25"
  },
  "mobile": {
    "median_score": "33"
  }
}, {
  "name": "seo",
  "desktop": {
    "median_score": "88"
  },
  "mobile": {
    "median_score": "90"
  }
}]
}

@rviscomi @sarahfossheim, all the changes discussed are already deployed.

New URL https://dev-gw-2vzgiib6.uk.gateway.dev/v1/lighthouse

Documentation: https://github.com/HTTPArchive/tech-report-apis#get-lighthouse