|
1 | 1 | import struct |
2 | 2 | import decimal |
3 | 3 | import datetime |
| 4 | +import logging |
4 | 5 |
|
5 | 6 | from pymysql.charset import charset_by_name |
6 | 7 | from enum import Enum |
|
15 | 16 | from .bitmap import BitCount, BitGet |
16 | 17 |
|
17 | 18 |
|
| 19 | + |
| 20 | +# MySQL 5.7 compatibility: Cache for INFORMATION_SCHEMA column names |
| 21 | +_COLUMN_NAME_CACHE = {} |
| 22 | + |
18 | 23 | class RowsEvent(BinLogEvent): |
19 | 24 | def __init__(self, from_packet, event_size, table_map, ctl_connection, **kwargs): |
20 | 25 | super().__init__(from_packet, event_size, table_map, ctl_connection, **kwargs) |
@@ -746,6 +751,8 @@ def __init__(self, from_packet, event_size, table_map, ctl_connection, **kwargs) |
746 | 751 | self.__ignored_schemas = kwargs["ignored_schemas"] |
747 | 752 | self.__freeze_schema = kwargs["freeze_schema"] |
748 | 753 | self.__optional_meta_data = kwargs["optional_meta_data"] |
| 754 | + self.__enable_logging = kwargs.get("enable_logging", False) |
| 755 | + self.__use_column_name_cache = kwargs.get("use_column_name_cache", False) |
749 | 756 | # Post-Header |
750 | 757 | self.table_id = self._read_table_id() |
751 | 758 |
|
@@ -909,12 +916,70 @@ def _get_optional_meta_data(self): |
909 | 916 |
|
910 | 917 | return optional_metadata |
911 | 918 |
|
| 919 | + |
| 920 | + def _fetch_column_names_from_schema(self): |
| 921 | + """ |
| 922 | + Fetch column names from INFORMATION_SCHEMA for MySQL 5.7 compatibility. |
| 923 | +
|
| 924 | + Only executes if use_column_name_cache=True is enabled. |
| 925 | + Uses module-level cache to avoid repeated queries. |
| 926 | +
|
| 927 | + Returns: |
| 928 | + list: Column names in ORDINAL_POSITION order, or empty list |
| 929 | + """ |
| 930 | + # Only fetch if explicitly enabled (opt-in feature) |
| 931 | + if not self.__use_column_name_cache: |
| 932 | + return [] |
| 933 | + |
| 934 | + cache_key = f"{self.schema}.{self.table}" |
| 935 | + |
| 936 | + # Check cache first |
| 937 | + if cache_key in _COLUMN_NAME_CACHE: |
| 938 | + return _COLUMN_NAME_CACHE[cache_key] |
| 939 | + |
| 940 | + try: |
| 941 | + query = """ |
| 942 | + SELECT COLUMN_NAME |
| 943 | + FROM INFORMATION_SCHEMA.COLUMNS |
| 944 | + WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s |
| 945 | + ORDER BY ORDINAL_POSITION |
| 946 | + """ |
| 947 | + cursor = self._ctl_connection.cursor() |
| 948 | + cursor.execute(query, (self.schema, self.table)) |
| 949 | + rows = cursor.fetchall() |
| 950 | + # Handle both tuple and dict cursor results |
| 951 | + if rows and isinstance(rows[0], dict): |
| 952 | + column_names = [row['COLUMN_NAME'] for row in rows] |
| 953 | + else: |
| 954 | + column_names = [row[0] for row in rows] |
| 955 | + cursor.close() |
| 956 | + |
| 957 | + # Cache result |
| 958 | + _COLUMN_NAME_CACHE[cache_key] = column_names |
| 959 | + |
| 960 | + if self.__enable_logging and column_names: |
| 961 | + logging.info(f"Cached column names for {cache_key}: {len(column_names)} columns") |
| 962 | + |
| 963 | + return column_names |
| 964 | + except Exception as e: |
| 965 | + if self.__enable_logging: |
| 966 | + logging.warning(f"Failed to fetch column names for {cache_key}: {type(e).__name__}: {e}") |
| 967 | + # Cache empty result to avoid retry spam |
| 968 | + _COLUMN_NAME_CACHE[cache_key] = [] |
| 969 | + return [] |
| 970 | + |
912 | 971 | def _sync_column_info(self): |
913 | 972 | if not self.__optional_meta_data: |
914 | | - # If optional_meta_data is False Do not sync Event Time Column Schemas |
| 973 | + column_names = self._fetch_column_names_from_schema() |
| 974 | + if column_names and len(column_names) == self.column_count: |
| 975 | + for column_idx in range(self.column_count): |
| 976 | + self.columns[column_idx].name = column_names[column_idx] |
915 | 977 | return |
916 | 978 | if len(self.optional_metadata.column_name_list) == 0: |
917 | | - # May Be Now BINLOG_ROW_METADATA = FULL But Before Action BINLOG_ROW_METADATA Mode = MINIMAL |
| 979 | + column_names = self._fetch_column_names_from_schema() |
| 980 | + if column_names and len(column_names) == self.column_count: |
| 981 | + for column_idx in range(self.column_count): |
| 982 | + self.columns[column_idx].name = column_names[column_idx] |
918 | 983 | return |
919 | 984 | charset_pos = 0 |
920 | 985 | enum_or_set_pos = 0 |
|
0 commit comments