DatasetValidationError on import_data_task in Azure
Closed this issue · 2 comments
holyketzer commented
DatasetValidationError
on import_data_task
in Azure
https://app-staging.auger.ai/admin/cluster_tasks/81400 - this happened when I tried to load CSV file through UI, after I retried task it worked
https://app-staging.auger.ai/admin/cluster_tasks/83283 - this happened on review process, after retry it also worked
It happens time to time, Probably it's race conditions
cc @skatedplrn
holyketzer commented
https://app-staging.auger.ai/admin/cluster_tasks/88391
{
"azure": {
"data": "HTTP Error 400: Bad Request",
"result": false
}
}
Log:
2020-08-28T10:49:44.192162163Z [2020-08-28 10:49:44,191: INFO/MainProcess] Received task: a2ml.tasks_queue.tasks_hub_api.import_data_task[af20e889-f734-49b7-a58e-749989c0cdaf]
2020-08-28T10:49:49.991546407Z [2020-08-28 10:49:49,991: INFO/ForkPoolWorker-2] [azure] HTTP Error 400: Bad Request
2020-08-28T10:49:50.929878129Z [2020-08-28 10:49:50,929: INFO/ForkPoolWorker-2] Task a2ml.tasks_queue.tasks_hub_api.import_data_task[af20e889-f734-49b7-a58e-749989c0cdaf] succeeded in 6.571025623998139s: None
holyketzer commented
Stack trace
2020-08-28T18:33:04.446149895Z [2020-08-28 18:33:04,445: INFO/MainProcess] Received task: a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]
2020-08-28T18:33:06.181432202Z [2020-08-28 18:33:06,181: WARNING/ForkPoolWorker-601] [warning] SOCKS support in urllib3 requires the installation of optional dependencies: specifically, PySocks. For more information, see https://urllib3.readthedocs.io/en/latest/contrib.html#socks-proxies
2020-08-28T18:33:06.702998511Z [2020-08-28 18:33:06,702: WARNING/ForkPoolWorker-601] [warning] FileType Enum is Deprecated in > 1.0.39. Use strings instead.
2020-08-28T18:33:07.346427084Z [2020-08-28 18:33:07,346: WARNING/ForkPoolWorker-601] [warning] Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated since Python 3.3,and in 3.9 it will stop working
2020-08-28T18:33:07.349004917Z [2020-08-28 18:33:07,348: INFO/ForkPoolWorker-603] a2ml.tasks_queue.tasks_hub_api.distribution_chart_stats_task[bcc591e3-6023-4216-8330-03e05b65e925]: Send JSON data to Hub: {"type": "TaskResult", "provider": "azure", "hub_info": {"pipeline_id": "AutoML_3adce96a-b0ad-42e8-9e48-559ffa5b9b37_9", "project_file": {"id": 2974, "url": "https://options-auger.s3.us-east-2.amazonaws.com/training-2020-08-10T191701Z.csv", "name": "training-2020-08-10T191701Z.csv", "object": "project_file", "status": "processed", "deleted": false, "industry": null, "image_url": null, "providers": ["auger", "azure"], "created_at": "2020-08-10T20:52:25.991Z", "project_id": 851, "updated_at": "2020-08-20T16:18:06.680Z", "description": null, "content_type": null, "error_message": null, "providers_data": {"auger": null, "azure": {"url": "training-2020-08-10T191701Z.csv"}}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/options-a2ml", "experiment_id": "1989e7883ebd05c4", "prediction_id": null, "cluster_task_id": 89279, "experiment_name": "training-2020-08-10T191701Z.csv-experiment", "project_file_id": 2974, "experiment_session": {"id": "1e33214c11a62eb2", "number": 4, "object": "experiment_session", "status": "completed", "deleted": false, "message": null, "runtime": 9492.835444, "providers": ["azure"], "created_at": "2020-08-11T19:14:10.174Z", "model_type": "regression", "project_id": 851, "started_at": "2020-08-11T19:14:10.174Z", "updated_at": "2020-08-11T21:52:23.010Z", "completed_at": "2020-08-11T21:52:23.010Z", "search_space": null, "error_message": null, "experiment_id": "1989e7883ebd05c4", "model_settings": {"evaluation_options": {"scoring": "r2_score", "max_n_trials": 10, "use_ensemble": true, "classification": false, "test_data_path": null, "max_eval_time_mins": 60, "max_total_time_mins": 120, "cpu_per_mt_algorithm": 4, "crossValidationFolds": 5, "max_concurrent_iterations": 4}}, "providers_data": {"azure": {"id": "AutoML_3adce96a-b0ad-42e8-9e48-559ffa5b9b37", "errors": {"error": null, "error_details": null}, "status": "completed", "completed_evaluations": 8}}, "project_file_id": 2974, "starter_user_id": 21, "top_score_value": 0.706982541322667, "dataset_statistics": {"count": 1972128, "stat_data": [{"use": false, "datatype": "boolean", "isTarget": false, "column_name": "predicted_gainer", "value_counts": {"f": 1798895, "t": 173233}, "orig_datatype": "string", "unique_values": 2, "value_counts_ex": [{"count": 1798895, "value": "f"}, {"count": 173233, "value": "t"}]}, {"avg": -0.076688, "use": false, "range": ["-1.00", "295.00"], "std_dev": 0.736028, "datatype": "double", "isTarget": true, "histogram": {"hist": [1972005, 84, 26, 8, 2, 0, 0, 2, 0, 1], "bin_edges": [-0.9996019900497509, 28.60035820895522, 58.2003184079602, 87.80027860696516, 117.4002388059701, 147.0001990049751, 176.6001592039801, 206.2001194029851, 235.8000796019901, 265.400039800995, 295.0]}, "column_name": "predicted_delta_price", "orig_datatype": "double", "unique_values": 160005}, {"avg": 136.1273, "use": false, "range": ["0.50", "3300.00"], "std_dev": 178.879777, "datatype": "double", "isTarget": false, "histogram": {"hist": [1865273, 87359, 4253, 4813, 5149, 1724, 800, 1502, 708, 547], "bin_edges": [0.5, 330.45, 660.4, 990.3499999999999, 1320.3, 1650.25, 1980.2, 2310.15, 2640.1, 2970.05, 3300.0]}, "column_name": "strike_price", "orig_datatype": "double", "unique_values": 818}, {"avg": 136.224451, "use": false, "range": ["0.48", "3200.00"], "std_dev": 178.809939, "datatype": "double", "isTarget": false, "histogram": {"hist": [1855425, 96455, 4840, 4275, 5765, 1356, 1205, 1208, 943, 656], "bin_edges": [0.4786, 320.43074, 640.38288, 960.33502, 1280.28716, 1600.2393, 1920.19144, 2240.14358, 2560.09572, 2880.04786, 3200.0]}, "column_name": "underlying_price", "orig_datatype": "double", "unique_values": 21436}, {"avg": 0.042523, "use": true, "range": ["-0.77", "49.83"], "std_dev": 0.356574, "datatype": "double", "isTarget": false, "histogram": {"hist": [1970583, 1265, 150, 41, 12, 50, 10, 8, 5, 4], "bin_edges": [-0.7728384557709399, 4.287568464746874, 9.34797538526469, 14.4083823057825, 19.46878922630032, 24.52919614681813, 29.58960306733595, 34.65000998785376, 39.71041690837158, 44.77082382888939, 49.8312307494072]}, "column_name": "avg_price_change", "orig_datatype": "double", "unique_values": 1917643}, {"use": true, "datatype": "datetime", "isTarget": false, "column_name": "data_date", "orig_datatype": "datetime", "unique_values": 123}, {"avg": 54.045773, "use": true, "range": ["0.00", "364.00"], "std_dev": 70.196704, "datatype": "integer", "isTarget": false, "histogram": {"hist": [1263651, 279075, 96707, 84744, 68537, 82937, 48919, 21789, 14818, 10951], "bin_edges": [0.0, 36.4, 72.8, 109.2, 145.6, 182.0, 218.4, 254.8, 291.2, 327.6, 364.0]}, "column_name": "days_left", "orig_datatype": "integer", "unique_values": 267}, {"avg": 3.041938, "use": true, "range": ["1.00", "5.00"], "std_dev": 1.363659, "datatype": "integer", "isTarget": false, "histogram": {"hist": [332479, 0, 422539, 0, 0, 420959, 0, 422097, 0, 374054], "bin_edges": [1.0, 1.4, 1.8, 2.2, 2.6, 3.0, 3.4, 3.8, 4.2, 4.6, 5.0]}, "column_name": "day_of_week", "orig_datatype": "integer", "unique_values": 5}, {"avg": -8.451585, "use": true, "range": ["-3209.91", "34.74"], "std_dev": 168.889916, "datatype": "double", "isTarget": false, "column_name": "debttoequity", "orig_datatype": "double", "unique_values": 457, "missing_values": 1876}, {"avg": 0.052427, "use": true, "range": ["-1.00", "407.00"], "std_dev": 1.060254, "datatype": "double", "isTarget": false, "histogram": {"hist": [1971993, 97, 22, 11, 0, 0, 2, 1, 1, 1], "bin_edges": [-0.9970414201183431, 39.80266272189349, 80.60236686390533, 121.4020710059172, 162.201775147929, 203.0014792899408, 243.8011834319527, 284.6008875739645, 325.4005917159763, 366.2002958579881, 407.0]}, "column_name": "delta_price", "orig_datatype": "double", "unique_values": 255665}, {"avg": -0.194821, "use": true, "range": ["-1.00", "25.29"], "std_dev": 0.899279, "datatype": "double", "isTarget": false, "histogram": {"hist": [1888156, 74100, 7953, 1421, 356, 100, 22, 14, 3, 3], "bin_edges": [-0.999910503100428, 1.629508340125145, 4.258927183350719, 6.888346026576292, 9.517764869801864, 12.14718371302744, 14.77660255625301, 17.40602139947859, 20.03544024270416, 22.66485908592973, 25.2942779291553]}, "column_name": "delta_over_avg_price", "orig_datatype": "double", "unique_values": 1852508}, {"avg": 0.072243, "use": true, "range": ["-1.00", "21.00"], "std_dev": 0.806958, "datatype": "double", "isTarget": false, "histogram": {"hist": [1815236, 145582, 8955, 1598, 471, 143, 90, 37, 13, 3], "bin_edges": [-0.99798704846377, 1.201391488315377, 3.400770025094524, 5.600148561873672, 7.799527098652819, 9.998905635431965, 12.19828417221111, 14.39766270899026, 16.59704124576941, 18.79641978254855, 20.9957983193277]}, "column_name": "delta_over_avg_underlying_price", "orig_datatype": "double", "unique_values": 1830374}, {"avg": 0.00129, "use": true, "range": ["-0.38", "0.93"], "std_dev": 0.028311, "datatype": "double", "isTarget": false, "histogram": {"hist": [102, 2973, 1489883, 476976, 2031, 58, 48, 3, 38, 16], "bin_edges": [-0.378395451674037, -0.2472589462572179, -0.1161224408403988, 0.01501406457642035, 0.1461505699932394, 0.2772870754100585, 0.4084235808268777, 0.5395600862436969, 0.6706965916605159, 0.8018330970773351, 0.9329696024941542]}, "column_name": "delta_underlying", "orig_datatype": "double", "unique_values": 61243}, {"avg": 0.434738, "use": true, "range": ["-3427.00", "93184.00"], "std_dev": 67.845288, "datatype": "double", "isTarget": false, "histogram": {"hist": [1972126, 1, 0, 0, 0, 0, 0, 0, 0, 1], "bin_edges": [-3427.0, 6234.1, 15895.2, 25556.3, 35217.4, 44878.5, 54539.600000000006, 64200.7, 73861.8, 83522.90000000001, 93184.0]}, "column_name": "delta_volume", "orig_datatype": "double", "unique_values": 618}, {"avg": -0.000427, "use": true, "range": ["-0.05", "0.05"], "std_dev": 0.028395, "datatype": "double", "isTarget": false, "histogram": {"hist": [189355, 198436, 201828, 203077, 202539, 208123, 201137, 197179, 189577, 180877], "bin_edges": [-0.0499982407374828, -0.03999859927211259, -0.02999895780674238, -0.01999931634137218, -0.009999674876001965, -3.34106317542715e-08, 0.00999960805473845, 0.01999924952010867, 0.02999889098547887, 0.03999853245084908, 0.0499981739162193]}, "column_name": "distance", "orig_datatype": "double", "unique_values": 101888}, {"avg": 0.026231, "use": true, "range": ["0.00", "1.01"], "std_dev": 0.02737, "datatype": "double", "isTarget": false, "column_name": "dividend_yield", "orig_datatype": "double", "unique_values": 407, "missing_values": 1766}, {"avg": 0.039429, "use": true, "range": ["-13.46", "0.71"], "std_dev": 0.101943, "datatype": "double", "isTarget": false, "column_name": "earnings_pct", "orig_datatype": "double", "unique_values": 40888, "missing_values": 20656}, {"avg": 0.143933, "use": true, "range": ["-57.87", "61.24"], "std_dev": 3.01937, "datatype": "double", "isTarget": false, "column_name": "ebitgrowth", "orig_datatype": "double", "unique_values": 470, "missing_values": 16111}, {"avg": -0.218599, "use": true, "range": ["-2896.77", "409.38"], "std_dev": 72.856965, "datatype": "double", "isTarget": false, "column_name": "fcffgrowth", "orig_datatype": "double", "unique_values": 469, "missing_values": 19368}, {"avg": 84193709973.43858, "use": true, "range": ["54401613.00", "1246018910000.00"], "std_dev": 153591595988.0705, "datatype": "double", "isTarget": false, "column_name": "market_cap", "orig_datatype": "double", "unique_values": 474, "missing_values": 1766}, {"avg": 0.192281, "use": true, "range": ["-16.21", "13.50"], "std_dev": 1.434123, "datatype": "double", "isTarget": false, "column_name": "ocfgrowth", "orig_datatype": "double", "unique_values": 471, "missing_values": 11594}, {"use": true, "datatype": "categorical", "isTarget": false, "column_name": "p_c", "value_counts": {"C": 1014486, "P": 957642}, "orig_datatype": "string", "unique_values": 2, "value_counts_ex": [{"count": 1014486, "value": "C"}, {"count": 957642, "value": "P"}]}, {"avg": 20.729762, "use": true, "range": ["0.00", "1403.38"], "std_dev": 101.037486, "datatype": "double", "isTarget": false, "column_name": "price_book", "orig_datatype": "double", "unique_values": 470, "missing_values": 11339}, {"avg": 29.157959, "use": true, "range": ["-251.53", "592.93"], "std_dev": 49.221768, "datatype": "double", "isTarget": false, "column_name": "price_earnings", "orig_datatype": "double", "unique_values": 475, "missing_values": 1766}, {"avg": 13.746705, "use": true, "range": ["0.00", "2159.06"], "std_dev": 142.563789, "datatype": "double", "isTarget": false, "column_name": "price_revenue", "orig_datatype": "double", "unique_values": 470, "missing_values": 1876}, {"avg": 0.049383, "use": true, "range": ["-1.36", "5.13"], "std_dev": 0.272459, "datatype": "double", "isTarget": false, "column_name": "revenuegrowth", "orig_datatype": "double", "unique_values": 470, "missing_values": 16446}, {"use": true, "datatype": "categorical", "isTarget": false, "column_name": "sector", "value_counts": {"Energy": 113488, "Materials": 67326, "Utilities": 56589, "Financials": 205371, "Health Care": 327660, "Industrials": 225934, "Real Estate": 41568, "Consumer Staples": 208331, "Consumer Discretionary": 314367, "Information Technology": 375559, "Telecommunication Services": 15279}, "orig_datatype": "string", "unique_values": 12, "missing_values": 20656, "value_counts_ex": [{"count": 375559, "value": "Information Technology"}, {"count": 327660, "value": "Health Care"}, {"count": 314367, "value": "Consumer Discretionary"}, {"count": 225934, "value": "Industrials"}, {"count": 208331, "value": "Consumer Staples"}, {"count": 205371, "value": "Financials"}, {"count": 113488, "value": "Energy"}, {"count": 67326, "value": "Materials"}, {"count": 56589, "value": "Utilities"}, {"count": 41568, "value": "Real Estate"}, {"count": 15279, "value": "Telecommunication Services"}]}, {"avg": 0.675197, "use": true, "range": ["0.06", "1.59"], "std_dev": 0.100533, "datatype": "double", "isTarget": false, "column_name": "year_vary", "orig_datatype": "double", "unique_values": 474, "missing_values": 1766}, {"avg": 0.074001, "use": true, "range": ["-0.86", "18.62"], "std_dev": 0.487439, "datatype": "double", "isTarget": false, "column_name": "delta_stock_volume", "orig_datatype": "double", "unique_values": 43492, "missing_values": 92}, {"avg": 0.000655, "use": true, "range": ["-0.75", "0.62"], "std_dev": 0.021044, "datatype": "double", "isTarget": false, "histogram": {"hist": [120, 0, 2, 123, 18122, 1943330, 10001, 336, 40, 54], "bin_edges": [-0.747365158090515, -0.6102899326040442, -0.4732147071175734, -0.3361394816311025, -0.1990642561446317, -0.06198903065816097, 0.07508619482830992, 0.2121614203147807, 0.3492366458012515, 0.4863118712877224, 0.623387096774193]}, "column_name": "delta_afterhours", "orig_datatype": "double", "unique_values": 42559}], "columns_count": 30}, "dataset_manifest_id": "828bd95a0203005a"}, "dataset_manifest_id": "828bd95a0203005a", "experiment_session_id": "1e33214c11a62eb2"}, "status": "success", "runtime": 45.749157190322876, "result": {"2020-08-24": {"predicted_predicted_delta_price": {"avg": -0.1637150279576552, "std_dev": 0.0961785147172678, "imp": 0}, "avg_price_change": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000195}, "data_date": {"dist": {"2020-08-24": 8296}, "imp": 0.051324}, "days_left": {"avg": 57.94623915139827, "std_dev": 73.44412696556313, "imp": 0.032152}, "day_of_week": {"avg": 1.0, "std_dev": 0.0, "imp": 0.000797}, "debttoequity": {"avg": 0.08486821359691453, "std_dev": 13.50361830579007, "imp": 0.000735}, "delta_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.02582}, "delta_over_avg_price": {"avg": 0.2310086582701198, "std_dev": 0.5900894809853053, "imp": 0.052062}, "delta_over_avg_underlying_price": {"avg": 1.1727012113941716e-17, "std_dev": 3.6435292765397057e-16, "imp": 0.018138}, "delta_underlying": {"avg": 0.0, "std_dev": 0.0, "imp": 0.095268}, "delta_volume": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000857}, "distance": {"avg": -0.0026852960649947377, "std_dev": 0.02872942466500326, "imp": 0.011778}, "dividend_yield": {"avg": 0.021658444069431054, "std_dev": 0.01699883568342046, "imp": 0.000757}, "earnings_pct": {"avg": 0.03175959777473209, "std_dev": 0.08101752644374777, "imp": 0.000622}, "ebitgrowth": {"avg": 0.13099633642719385, "std_dev": 1.987340852623593, "imp": 0.000774}, "fcffgrowth": {"avg": -5.860800529893927, "std_dev": 142.7117410107019, "imp": 0.000256}, "market_cap": {"avg": 87329010712.17346, "std_dev": 171802082036.34216, "imp": 0.023501}, "ocfgrowth": {"avg": 0.3215011333172614, "std_dev": 2.197032206777087, "imp": 0.000204}, "p_c": {"dist": {"C": 4366, "P": 3930}, "imp": 0.060361}, "price_book": {"avg": 17.21766069189972, "std_dev": 59.53485840880176, "imp": 0.000667}, "price_earnings": {"avg": 31.31919758919963, "std_dev": 45.10977234688211, "imp": 0.000181}, "price_revenue": {"avg": 28.852721709257477, "std_dev": 227.11283472876408, "imp": 0.000274}, "revenuegrowth": {"avg": 0.06686582328833178, "std_dev": 0.3510797405198097, "imp": 0.003276}, "sector": {"dist": {"Health Care": 1573, "Information Technology": 1635, "Financials": 1223, "Materials": 198, "Real Estate": 239, "Consumer Staples": 711, "Industrials": 717, "Consumer Discretionary": 1127, "Utilities": 236, "Energy": 395, "": 212, "Telecommunication Services": 30}, "imp": 0.000562}, "year_vary": {"avg": 0.6707410948011083, "std_dev": 0.09910197979599422, "imp": 0.000754}, "delta_stock_volume": {"avg": 0.08438456404113456, "std_dev": 0.441846352836815, "imp": 0.006112}, "delta_afterhours": {"avg": 0.004772295636917571, "std_dev": 0.0069699472503413915, "imp": 0.002062}, "actual_predicted_delta_price": {"avg": -0.13162321438274072, "std_dev": 0.39665628303912703, "imp": 0}}, "2020-08-25": {"predicted_predicted_delta_price": {"avg": -0.09160704913977963, "std_dev": 0.03784128864763512, "imp": 0}, "avg_price_change": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000195}, "data_date": {"dist": {"2020-08-25": 9}, "imp": 0.051324}, "days_left": {"avg": 139.88888888888889, "std_dev": 70.8792008357255, "imp": 0.032152}, "day_of_week": {"avg": 2.0, "std_dev": 0.0, "imp": 0.000797}, "debttoequity": {"avg": -23.4662020-08-28T18:33:07.349004917Z 500000000003, "std_dev": 68.57102681615976, "imp": 0.000735}, "delta_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.02582}, "delta_over_avg_price": {"avg": 0.0, "std_dev": 0.0, "imp": 0.052062}, "delta_over_avg_underlying_price": {"avg": 2.245155654911473e-16, "std_dev": 3.6279600609255133e-16, "imp": 0.018138}, "delta_underlying": {"avg": 0.0, "std_dev": 0.0, "imp": 0.095268}, "delta_volume": {"avg": 0.0, "std_dev": 0.0, "imp": 0.000857}, "distance": {"avg": -0.009043649655571352, "std_dev": 0.031158578122551003, "imp": 0.011778}, "dividend_yield": {"avg": 0.02028422222222222, "std_dev": 0.013830768270217113, "imp": 0.000757}, "earnings_pct": {"avg": 0.05735383999823371, "std_dev": 0.09165092634071227, "imp": 0.000622}, "ebitgrowth": {"avg": 0.19712322222222223, "std_dev": 0.4963098677028238, "imp": 0.000774}, "fcffgrowth": {"avg": -322.1897401111111, "std_dev": 965.4708217800447, "imp": 0.000256}, "market_cap": {"avg": 185330477231.8889, "std_dev": 391086342355.95593, "imp": 0.023501}, "ocfgrowth": {"avg": 0.3437012222222222, "std_dev": 0.8479625678135471, "imp": 0.000204}, "p_c": {"dist": {"P": 8, "C": 1}, "imp": 0.060361}, "price_book": {"avg": 15.790866666666668, "std_dev": 23.164786997725667, "imp": 0.000667}, "price_earnings": {"avg": 21.08026666666667, "std_dev": 9.611020997141772, "imp": 0.000181}, "price_revenue": {"avg": 2.634133333333333, "std_dev": 1.88259410920145, "imp": 0.000274}, "revenuegrowth": {"avg": 0.6152126666666666, "std_dev": 1.692694416523919, "imp": 0.003276}, "sector": {"dist": {"Information Technology": 2, "Health Care": 4, "Consumer Discretionary": 1, "Materials": 1, "Industrials": 1}, "imp": 0.000562}, "year_vary": {"avg": 0.6532950060133905, "std_dev": 0.09451891762017572, "imp": 0.000754}, "delta_stock_volume": {"avg": -0.0951215505203919, "std_dev": 0.33461359310491623, "imp": 0.006112}, "delta_afterhours": {"avg": 0.0004585794645390801, "std_dev": 0.006441979711806947, "imp": 0.002062}, "actual_predicted_delta_price": {"avg": -0.1461754907371818, "std_dev": 0.7203377851805648, "imp": 0}}}, "traceback": null}
2020-08-28T18:33:07.877421892Z [2020-08-28 18:33:07,877: INFO/ForkPoolWorker-603] Task a2ml.tasks_queue.tasks_hub_api.distribution_chart_stats_task[bcc591e3-6023-4216-8330-03e05b65e925] succeeded in 46.27928587399947s: None
2020-08-28T18:33:09.202165631Z [2020-08-28 18:33:09,201: INFO/ForkPoolWorker-601] [azure] Use presigned url: https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]
2020-08-28T18:33:10.928005458Z [2020-08-28 18:33:10,927: INFO/ForkPoolWorker-601] Could not load the run context. Logging offline
2020-08-28T18:33:12.299690378Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.299711869Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 65, in _validate_has_data
2020-08-28T18:33:12.299715610Z dataflow.verify_has_data()
2020-08-28T18:33:12.299753380Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_loggerfactory.py", line 179, in wrapper
2020-08-28T18:33:12.299762950Z return func(*args, **kwargs)
2020-08-28T18:33:12.299810061Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 804, in verify_has_data
2020-08-28T18:33:12.299815792Z if len(self.take(1)._to_pyrecords()) == 0:
2020-08-28T18:33:12.299915813Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 724, in _to_pyrecords
2020-08-28T18:33:12.299934745Z span_context=to_dprep_span_context(span.get_context() if span else None)
2020-08-28T18:33:12.299937575Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
2020-08-28T18:33:12.299939815Z return send_message_func(op_code, message, cancellation_token)
2020-08-28T18:33:12.299970805Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 110, in execute_anonymous_activity
2020-08-28T18:33:12.299978795Z response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)
2020-08-28T18:33:12.300008956Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 182, in send_message
2020-08-28T18:33:12.300016657Z raise_engine_error(response['error'])
2020-08-28T18:33:12.300056007Z [2020-08-28 18:33:12,299: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
2020-08-28T18:33:12.300061297Z raise ExecutionError(error_response)
2020-08-28T18:33:12.300119999Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] azureml.dataprep.api.errorhandlers.ExecutionError:
2020-08-28T18:33:12.300124129Z Error Code: ScriptExecution.StreamAccess.NotFound
2020-08-28T18:33:12.300126529Z Failed Step: 1977d509-351d-4f05-9491-9886e756bc42
2020-08-28T18:33:12.300129129Z Error Message: ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300131559Z StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300134560Z Found no resources for the input provided: 'https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]'
2020-08-28T18:33:12.300141179Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300154550Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] During handling of the above exception, another exception occurred:
2020-08-28T18:33:12.300229651Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.300288773Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 4, in wrapper
2020-08-28T18:33:12.300293893Z return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.300360165Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 24, in wrapper
2020-08-28T18:33:12.300364945Z return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.300388886Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/dataset.py", line 52, in create
2020-08-28T18:33:12.300391466Z dataset = Dataset.Tabular.from_delimited_files(path=source)
2020-08-28T18:33:12.300457487Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/_loggerfactory.py", line 126, in wrapper
2020-08-28T18:33:12.300459867Z return func(*args, **kwargs)
2020-08-28T18:33:12.300494738Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 315, in from_delimited_files
2020-08-28T18:33:12.300497108Z validate or infer_column_types or _is_inference_required(set_column_types))
2020-08-28T18:33:12.300597630Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 771, in _transform_and_validate
2020-08-28T18:33:12.300607611Z _validate_has_data(dataflow, 'Cannot load any data from the specified path. '
2020-08-28T18:33:12.300629111Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 68, in _validate_has_data
2020-08-28T18:33:12.300631971Z raise DatasetValidationError(error_message + '\n' + e.compliant_message, exception=e)
2020-08-28T18:33:12.300715883Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] azureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:
2020-08-28T18:33:12.300720293Z Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.300722964Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300725354Z StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300727684Z Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.300730114Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300732444Z InnerException None
2020-08-28T18:33:12.300734753Z ErrorResponse
2020-08-28T18:33:12.300737093Z {
2020-08-28T18:33:12.300739284Z "error": {
2020-08-28T18:33:12.300741724Z "code": "UserError",
2020-08-28T18:33:12.300744324Z "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n StreamAccessException was caused by NotFoundException.\n Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.300747664Z }
2020-08-28T18:33:12.300749944Z }
2020-08-28T18:33:12.300836806Z [2020-08-28 18:33:12,300: INFO/ForkPoolWorker-601] [azure] DatasetValidationError:
2020-08-28T18:33:12.300839617Z Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.300841617Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.300843377Z StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.300845077Z Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.300846757Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.300848416Z InnerException None
2020-08-28T18:33:12.300850086Z ErrorResponse
2020-08-28T18:33:12.300851706Z {
2020-08-28T18:33:12.300853286Z "error": {
2020-08-28T18:33:12.300855077Z "code": "UserError",
2020-08-28T18:33:12.300857047Z "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n StreamAccessException was caused by NotFoundException.\n Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.300859507Z }
2020-08-28T18:33:12.300861107Z }
2020-08-28T18:33:12.301090953Z [2020-08-28 18:33:12,300: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.301154644Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 65, in _validate_has_data
2020-08-28T18:33:12.301161874Z dataflow.verify_has_data()
2020-08-28T18:33:12.301165334Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_loggerfactory.py", line 179, in wrapper
2020-08-28T18:33:12.301168414Z return func(*args, **kwargs)
2020-08-28T18:33:12.301209896Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 804, in verify_has_data
2020-08-28T18:33:12.301215515Z if len(self.take(1)._to_pyrecords()) == 0:
2020-08-28T18:33:12.301218035Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/dataflow.py", line 724, in _to_pyrecords
2020-08-28T18:33:12.301220905Z span_context=to_dprep_span_context(span.get_context() if span else None)
2020-08-28T18:33:12.301292218Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/_aml_helper.py", line 38, in wrapper
2020-08-28T18:33:12.301300757Z return send_message_func(op_code, message, cancellation_token)
2020-08-28T18:33:12.301303427Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/api.py", line 110, in execute_anonymous_activity
2020-08-28T18:33:12.301314768Z response = self._message_channel.send_message('Engine.ExecuteActivity', message_args, cancellation_token)
2020-08-28T18:33:12.301337128Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/engineapi/engine.py", line 182, in send_message
2020-08-28T18:33:12.301339798Z raise_engine_error(response['error'])
2020-08-28T18:33:12.301440171Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/dataprep/api/errorhandlers.py", line 10, in raise_engine_error
2020-08-28T18:33:12.301443511Z raise ExecutionError(error_response)
2020-08-28T18:33:12.301445321Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] azureml.dataprep.api.errorhandlers.ExecutionError:
2020-08-28T18:33:12.301447081Z Error Code: ScriptExecution.StreamAccess.NotFound
2020-08-28T18:33:12.301448721Z Failed Step: 1977d509-351d-4f05-9491-9886e756bc42
2020-08-28T18:33:12.301450351Z Error Message: ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.301452051Z StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.301461482Z Found no resources for the input provided: 'https://auger-mt-org-cinpns.s3.amazonaws.com/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAY4JC2MKVBYHZHPM6%2F20200828%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20200828T183309Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Security-Token=[REDACTED]'
2020-08-28T18:33:12.301466392Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.301477702Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] During handling of the above exception, another exception occurred:
2020-08-28T18:33:12.301528763Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] Traceback (most recent call last):
2020-08-28T18:33:12.301581025Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/provider_runner.py", line 82, in execute_provider
2020-08-28T18:33:12.301583704Z result = getattr(provider, operation_name)(*args, **kwargs)
2020-08-28T18:33:12.301614905Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/a2ml.py", line 12, in import_data
2020-08-28T18:33:12.301617285Z return AzureDataset(self.ctx).create(source=source)
2020-08-28T18:33:12.301654836Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 4, in wrapper
2020-08-28T18:33:12.301660797Z return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.301684517Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/utils/decorators.py", line 24, in wrapper
2020-08-28T18:33:12.301687067Z return decorated(self, *args, **kwargs)
2020-08-28T18:33:12.301742078Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/app/a2ml/api/azure/dataset.py", line 52, in create
2020-08-28T18:33:12.301744289Z dataset = Dataset.Tabular.from_delimited_files(path=source)
2020-08-28T18:33:12.301746019Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/_loggerfactory.py", line 126, in wrapper
2020-08-28T18:33:12.301747909Z return func(*args, **kwargs)
2020-08-28T18:33:12.301791139Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 315, in from_delimited_files
2020-08-28T18:33:12.301794970Z validate or infer_column_types or _is_inference_required(set_column_types))
2020-08-28T18:33:12.301837791Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_factory.py", line 771, in _transform_and_validate
2020-08-28T18:33:12.301840450Z _validate_has_data(dataflow, 'Cannot load any data from the specified path. '
2020-08-28T18:33:12.301864802Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] File "/usr/local/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py", line 68, in _validate_has_data
2020-08-28T18:33:12.301867241Z raise DatasetValidationError(error_message + '\n' + e.compliant_message, exception=e)
2020-08-28T18:33:12.301910872Z [2020-08-28 18:33:12,301: WARNING/ForkPoolWorker-601] azureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:
2020-08-28T18:33:12.301916633Z Message: Cannot load any data from the specified path. Make sure the path is accessible and contains data.
2020-08-28T18:33:12.301918723Z ScriptExecutionException was caused by StreamAccessException.
2020-08-28T18:33:12.301920593Z StreamAccessException was caused by NotFoundException.
2020-08-28T18:33:12.301922403Z Found no resources for the input provided: '[REDACTED]'
2020-08-28T18:33:12.301924362Z | session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1
2020-08-28T18:33:12.301926182Z InnerException None
2020-08-28T18:33:12.301928003Z ErrorResponse
2020-08-28T18:33:12.301929653Z {
2020-08-28T18:33:12.301931373Z "error": {
2020-08-28T18:33:12.301933113Z "code": "UserError",
2020-08-28T18:33:12.301935253Z "message": "Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n StreamAccessException was caused by NotFoundException.\n Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1"
2020-08-28T18:33:12.301943213Z }
2020-08-28T18:33:12.301944944Z }
2020-08-28T18:33:12.302522927Z [2020-08-28 18:33:12,302: INFO/ForkPoolWorker-601] a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]: Send JSON data to Hub: {"type": "ProviderInfoUpdate", "hub_info": {"pipeline_id": null, "project_file": {"id": 3014, "url": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv", "name": "iris-2.csv", "object": "project_file", "status": "processing", "deleted": false, "industry": null, "image_url": null, "providers": [], "created_at": "2020-08-28T18:33:03.560Z", "project_id": 860, "updated_at": "2020-08-28T18:33:03.716Z", "description": null, "content_type": null, "error_message": null, "providers_data": {}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure", "experiment_id": null, "prediction_id": null, "cluster_task_id": 89287, "experiment_name": null, "project_file_id": 3014, "experiment_session": null, "dataset_manifest_id": null, "experiment_session_id": null}, "provider": "azure", "provider_info": {"azure": {}}}
2020-08-28T18:33:12.882683412Z [2020-08-28 18:33:12,882: INFO/ForkPoolWorker-601] a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41]: Send JSON data to Hub: {"type": "TaskResult", "provider": "azure", "hub_info": {"pipeline_id": null, "project_file": {"id": 3014, "url": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure/files/KQqoYESVEHqoSCnGqp5GuC-iris-9918f0.csv", "name": "iris-2.csv", "object": "project_file", "status": "processing", "deleted": false, "industry": null, "image_url": null, "providers": [], "created_at": "2020-08-28T18:33:03.560Z", "project_id": 860, "updated_at": "2020-08-28T18:33:03.716Z", "description": null, "content_type": null, "error_message": null, "providers_data": {}}, "project_name": "a2mlworkspacestaging", "project_path": "s3://auger-mt-org-cinpns/workspace/projects/alex-mt-with-azure", "experiment_id": null, "prediction_id": null, "cluster_task_id": 89287, "experiment_name": null, "project_file_id": 3014, "experiment_session": null, "dataset_manifest_id": null, "experiment_session_id": null}, "status": "success", "runtime": 8.435573101043701, "result": {"azure": {"result": false, "data": "DatasetValidationError:\n\tMessage: Cannot load any data from the specified path. Make sure the path is accessible and contains data.\nScriptExecutionException was caused by StreamAccessException.\n StreamAccessException was caused by NotFoundException.\n Found no resources for the input provided: '[REDACTED]'\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1\n\tInnerException None\n\tErrorResponse \n{\n \"error\": {\n \"code\": \"UserError\",\n \"message\": \"Cannot load any data from the specified path. Make sure the path is accessible and contains data.\\nScriptExecutionException was caused by StreamAccessException.\\n StreamAccessException was caused by NotFoundException.\\n Found no resources for the input provided: '[REDACTED]'\\n| session_id=5d10dac1-f86e-485c-82a5-3a4fb755a9b1\"\n }\n}"}}, "traceback": null}
2020-08-28T18:33:13.346311951Z [2020-08-28 18:33:13,346: INFO/ForkPoolWorker-601] Task a2ml.tasks_queue.tasks_hub_api.import_data_task[2ed813cc-6bf1-4b3d-ab91-b312d8effb41] succeeded in 8.899521411003661s: None