avwx-rest/avwx-api

TAF bug with PROB and multiple time periods

Opened this issue · 2 comments

Hello, I think I've found an issue with parsing of this TAF message:

ESSA 261130Z 2612/2712 VRB02KT 9999 SCT005 BKN030 PROB40 2612/2618 4000 BR BKN004 2618/2706 0300 FZFG VV002 2706/2709 3000 2709/2712 BKN008

Expection

I would expect the message to be parsed into 5 forecast objects, with the following time periods:

TAF code start_time end_time
2612/2712 2024-02-26T12:00:00Z 2024-02-27T12:00:00Z
2612/2618 2024-02-26T12:00:00Z 2024-02-26T18:00:00Z
2618/2706 2024-02-26T18:00:00Z 2024-02-27T06:00:00Z
2706/2709 2024-02-27T06:00:00Z 2024-02-27T09:00:00Z
2709/2712 2024-02-27T09:00:00Z 2024-02-27T12:00:00Z

The JSON should look like this:
expected.json

Expected
    "forecast": [
        {
            ...
            "end_time": {
                "dt": "2024-02-27T12:00:00Z",
                "repr": "2712"
            },
            "flight_rules": "MVFR",
            "icing": [],
            "other": [],
            "probability": null,
            "raw": "2612/2712 VRB02KT 9999 SCT005 BKN030",
            "sanitized": "2612/2712 VRB02KT 9999 SCT005 BKN030",
            "start_time": {
                "dt": "2024-02-26T12:00:00Z",
                "repr": "2612"
            },
            ...
        },
        {
            "altimeter": null,
            "clouds": [
                {
                    "altitude": 4,
                    "modifier": null,
                    "repr": "BKN004",
                    "type": "BKN"
                }
            ],
            "end_time": {
                "dt": "2024-02-26T18:00:00Z",
                "repr": "2618"
            },
            "flight_rules": "MVFR",
            "icing": [],
            "other": [],
            "probability": {
                "repr": "40",
                "spoken": "four zero",
                "value": 40
            },
            "raw": "PROB40 2612/2618 4000 BR BKN004",
            "sanitized": "PROB40 2612/2618 4000 BR BKN004",
            "start_time": {
                "dt": "2024-02-26T12:00:00Z",
                "repr": "2612"
            },
            "summary": "Vis 4km, Mist, Broken layer at 400ft",
            "transition_start": null,
            "turbulence": [],
            "type": "FROM",
            "visibility": {
                "repr": "4000",
                "spoken": "four thousand",
                "value": 4000
            },
            "wind_direction": null,
            "wind_gust": null,
            "wind_shear": null,
            "wind_speed": null,
            "wind_variable_direction": [],
            "wx_codes": [
                {
                    "repr": "BR",
                    "value": "Mist"
                }
            ]
        },
        {
            "altimeter": null,
            "clouds": [
                {
                    "altitude": 2,
                    "modifier": null,
                    "repr": "VV002",
                    "type": "VV"
                }
            ],
            "end_time": {
                "dt": "2024-02-27T06:00:00Z",
                "repr": "2706"
            },
            "flight_rules": "LIFR",
            "icing": [],
            "other": [],
            "probability": {
                "repr": "40",
                "spoken": "four zero",
                "value": 40
            },
            "raw": "2618/2706 0300 FZFG VV002",
            "sanitized": "2618/2706 0300 FZFG VV002",
            "start_time": {
                "dt": "2024-02-26T18:00:00Z",
                "repr": "2618"
            },
            "summary": "Vis 0.3km, Freezing Fog, Vertical visibility up to 200ft",
            "transition_start": null,
            "turbulence": [],
            "type": "FROM",
            "visibility": {
                "repr": "0300",
                "spoken": "three hundred",
                "value": 300
            },
            "wind_direction": null,
            "wind_gust": null,
            "wind_shear": null,
            "wind_speed": null,
            "wind_variable_direction": [],
            "wx_codes": [
                {
                    "repr": "FZFG",
                    "value": "Freezing Fog"
                }
            ]
        },
        {
            "altimeter": null,
            "clouds": [],
            "end_time": {
                "dt": "2024-02-27T09:00:00Z",
                "repr": "2709"
            },
            "flight_rules": "MVFR",
            "icing": [],
            "other": [],
            "probability": {
                "repr": "40",
                "spoken": "four zero",
                "value": 40
            },
            "raw": "2706/2709 3000",
            "sanitized": "2706/2709 3000",
            "start_time": {
                "dt": "2024-02-27T06:00:00Z",
                "repr": "2612"
            },
            "summary": "Vis 3km",
            "transition_start": null,
            "turbulence": [],
            "type": "FROM",
            "visibility": {
                "repr": "3000",
                "spoken": "three thousand",
                "value": 3000
            },
            "wind_direction": null,
            "wind_gust": null,
            "wind_shear": null,
            "wind_speed": null,
            "wind_variable_direction": [],
            "wx_codes": []
        },
        {
            "altimeter": null,
            "clouds": [
                {
                    "altitude": 8,
                    "modifier": null,
                    "repr": "BKN008",
                    "type": "BKN"
                }
            ],
            "end_time": {
                "dt": "2024-02-27T12:00:00Z",
                "repr": "2712"
            },
            "flight_rules": "MVFR",
            "icing": [],
            "other": [],
            "probability": {
                "repr": "40",
                "spoken": "four zero",
                "value": 40
            },
            "raw": "2709/2712 BKN008",
            "sanitized": "2709/2712 BKN008",
            "start_time": {
                "dt": "2024-02-27T09:00:00Z",
                "repr": "2709"
            },
            "summary": "Broken layer at 800ft",
            "transition_start": null,
            "turbulence": [],
            "type": "FROM",
            "visibility": null,
            "wind_direction": null,
            "wind_gust": null,
            "wind_shear": null,
            "wind_speed": null,
            "wind_variable_direction": [],
            "wx_codes": []
        }
    ],

Actual

The message was parsed to only two forecast objects, one for 2612/2712 VRB02KT 9999 SCT005 BKN030 and one for PROB40 2612/2618 4000 BR BKN004 2618/2706 0300 FZFG VV002 2706/2709 3000 2709/2712 BKN008.
As is, there is no way to tell that the VV002 and FZFG codes should apply during 2618/2706.

TAF code start_time end_time
2612/2712 2024-02-26T12:00:00Z 2024-02-27T12:00:00Z
2612/2618 2024-02-26T12:00:00Z 2024-02-26T18:00:00Z

actual.json

Actual
    "forecast": [
        {
            ...
            "end_time": {
                "dt": "2024-02-27T12:00:00Z",
                "repr": "2712"
            },
            "flight_rules": "MVFR",
            "icing": [],
            "other": [],
            "probability": null,
            "raw": "2612/2712 VRB02KT 9999 SCT005 BKN030",
            "sanitized": "2612/2712 VRB02KT 9999 SCT005 BKN030",
            "start_time": {
                "dt": "2024-02-26T12:00:00Z",
                "repr": "2612"
            },
            ...
        },
        {
            "altimeter": null,
            "clouds": [
                {
                    "altitude": 2,
                    "modifier": null,
                    "repr": "VV002",
                    "type": "VV"
                },
                {
                    "altitude": 4,
                    "modifier": null,
                    "repr": "BKN004",
                    "type": "BKN"
                },
                {
                    "altitude": 8,
                    "modifier": null,
                    "repr": "BKN008",
                    "type": "BKN"
                }
            ],
            "end_time": {
                "dt": "2024-02-26T18:00:00Z",
                "repr": "2618"
            },
            "flight_rules": "LIFR",
            "icing": [],
            "other": [
                "2618/2706",
                "0300",
                "2706/2709",
                "3000",
                "2709/2712"
            ],
            "probability": {
                "repr": "40",
                "spoken": "four zero",
                "value": 40
            },
            "raw": "PROB40 2612/2618 4000 BR BKN004 2618/2706 0300 FZFG VV002 2706/2709 3000 2709/2712 BKN008",
            "sanitized": "PROB40 2612/2618 4000 BR BKN004 2618/2706 0300 FZFG VV002 2706/2709 3000 2709/2712 BKN008",
            "start_time": {
                "dt": "2024-02-26T12:00:00Z",
                "repr": "2612"
            },
            "summary": "Vis 4km, Mist, Freezing Fog, Vertical visibility up to 200ft, Broken layer at 400ft, Broken layer at 800ft",
            "transition_start": null,
            "turbulence": [],
            "type": "FROM",
            "visibility": {
                "repr": "4000",
                "spoken": "four thousand",
                "value": 4000
            },
            "wind_direction": null,
            "wind_gust": null,
            "wind_shear": null,
            "wind_speed": null,
            "wind_variable_direction": [],
            "wx_codes": [
                {
                    "repr": "BR",
                    "value": "Mist"
                },
                {
                    "repr": "FZFG",
                    "value": "Freezing Fog"
                }
            ]
        }
    ]

Reproduction

I don't have access to the API so I'm not sure how to reproduce it myself. I was investigating this data to be used in a project, I believe our integration team pulls this data from the taf endpoint.

I believe the core issue is that the report parser is looking for some kind of separator (like BECMG) before each of the time periods. That's why it was still able to split the PROB40 just fine. I'll run this through the engine to confirm.

My suspicion was confirmed. I've added a new check to split the forecast sections to match the expected time periods. Because there is no line type available in the original text, the default "FROM" is included since each of the others (PROB, BECMG, TEMP, etc) imply different transitions. This should be rolled out soon along with other fixes.