From 9abbba5b558b2bbe61c573c0825e921f3312968a Mon Sep 17 00:00:00 2001 From: KarlKallman Date: Thu, 21 May 2026 13:51:22 +0200 Subject: [PATCH 1/5] WIP --- .../FormulaParsing/Excel/Functions/Text/RegexTest.cs | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs new file mode 100644 index 000000000..0168f7e66 --- /dev/null +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs @@ -0,0 +1,12 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text +{ + internal class RegexTest + { + } +} From 85a1d40ad5ef1d63922b931b9201c3bfce6cca7a Mon Sep 17 00:00:00 2001 From: KarlKallman Date: Thu, 21 May 2026 16:24:51 +0200 Subject: [PATCH 2/5] WIP --- .../Excel/Functions/Text/RegexTest.cs | 103 +++++++++++++++++- .../TextFunctions/RegexFunctionsTests.cs | 23 ++++ 2 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs index 0168f7e66..70a472685 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs @@ -1,12 +1,103 @@ -using System; +using OfficeOpenXml.FormulaParsing.FormulaExpressions; +using OfficeOpenXml.FormulaParsing.Ranges; +using System; using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text.RegularExpressions; namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text { - internal class RegexTest + internal class RegexTest : ExcelFunction { + public override int ArgumentMinLength => 2; + public override string NamespacePrefix => "_xlfn."; + + public override CompileResult Execute(IList arguments, ParsingContext context) + { + bool textIsRange = arguments[0].IsExcelRange; + bool patternIsRange = arguments[1].IsExcelRange; + + if (!textIsRange && !patternIsRange) + { + // Skalär × skalär – ursprungligt beteende + var text = arguments[0].Value?.ToString(); + var pattern = arguments[1].Value?.ToString(); + + if (text == null || pattern == null) + return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); + + return CreateResult(GetRegexTest(text, pattern), DataType.Boolean); + } + + // Minst ett range-argument – bygg resultatmatrisen + var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null; + var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null; + + int textRows = texts != null ? texts.Size.NumberOfRows : 1; + int textCols = texts != null ? texts.Size.NumberOfCols : 1; + int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1; + int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1; + + // Broadcasting-regler: + // • Om en dimension är 1 → broadcastas till den andres storlek + // • Om båda > 1 → ta max (den kortare ger #N/A vid överflöd) + var nRows = ExpandedSize(textRows, patternRows); + var nCols = ExpandedSize(textCols, patternCols); + + var result = new InMemoryRange(nRows, nCols); + + for (int row = 0; row < nRows; row++) + { + for (int col = 0; col < nCols; col++) + { + var textValue = GetValue(texts, arguments[0], textRows, textCols, row, col); + var patternValue = GetValue(patterns, arguments[1], patternRows, patternCols, row, col); + + if (textValue == null || patternValue == null) + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); + else + result.SetValue(row, col, GetRegexTest(textValue, patternValue)); + } + } + + return CreateDynamicArrayResult(result, DataType.ExcelRange); + } + + /// + /// Hämtar strängvärdet för (row, col) med broadcasting. + /// Returnerar null om cellen är utanför räckvidden (→ #N/A). + /// + private static string GetValue( + IRangeInfo range, + FunctionArgument scalar, + int nRows, int nCols, + int row, int col) + { + if (range == null) + // Skalärargument – broadcastas alltid + return scalar.Value?.ToString(); + + // Beräkna verkligt index med broadcasting (storlek 1 → använd index 0) + int r = nRows == 1 ? 0 : row; + int c = nCols == 1 ? 0 : col; + + // Utanför räckvidden → #N/A + if (r >= nRows || c >= nCols) + return null; + + return range.GetOffset(r, c)?.ToString(); + } + + /// + /// Beräknar resultatdimensionen för en axel enligt Excels broadcasting-regler. + /// + private static short ExpandedSize(int a, int b) + { + if (a == 1) return (short)b; + if (b == 1) return (short)a; + return (short)Math.Max(a, b); // Båda > 1: max-storlek, överskott → #N/A + } + + private static bool GetRegexTest(string text, string pattern) + => Regex.IsMatch(text, pattern); } -} +} \ No newline at end of file diff --git a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs new file mode 100644 index 000000000..fcd33bdba --- /dev/null +++ b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs @@ -0,0 +1,23 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace EPPlusTest.FormulaParsing.Excel.Functions.TextFunctions +{ + [TestClass] + public class RegexFunctionsTests : TestBase + { + [TestMethod] + public void RegexTest() + { + using(var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + } + } + } +} From ae671d10bb00feaaf92a6c0bb621e9363812ec6a Mon Sep 17 00:00:00 2001 From: KarlKallman Date: Thu, 28 May 2026 16:36:37 +0200 Subject: [PATCH 3/5] WIP --- .../Excel/Functions/BuiltInFunctions.cs | 1 + .../Excel/Functions/Text/RegexTest.cs | 8 +- .../TextFunctions/RegexFunctionsTests.cs | 132 +++++++++++++++++- 3 files changed, 137 insertions(+), 4 deletions(-) diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs index 7108e5c99..e06515cec 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs @@ -64,6 +64,7 @@ public BuiltInFunctions() Functions["unichar"] = new Unichar(); Functions["numbervalue"] = new NumberValue(); Functions["dollar"] = new Dollar(); + Functions["regextest"] = new RegexTest(); Functions["textsplit"] = new TextSplit(); Functions["textbefore"] = new TextBefore(DelimiterFunction.TextBefore); Functions["textafter"] = new TextAfter(DelimiterFunction.TextAfter); diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs index 70a472685..cab0fa9bd 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs @@ -10,11 +10,12 @@ internal class RegexTest : ExcelFunction { public override int ArgumentMinLength => 2; public override string NamespacePrefix => "_xlfn."; - + public override CompileResult Execute(IList arguments, ParsingContext context) { bool textIsRange = arguments[0].IsExcelRange; bool patternIsRange = arguments[1].IsExcelRange; + int caseSensitivity = ArgToInt(arguments, 2, 0); if (!textIsRange && !patternIsRange) { @@ -25,7 +26,7 @@ public override CompileResult Execute(IList arguments, Parsing if (text == null || pattern == null) return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); - return CreateResult(GetRegexTest(text, pattern), DataType.Boolean); + return CreateResult(GetRegexTest(text, pattern, caseSensitivity), DataType.Boolean); } // Minst ett range-argument – bygg resultatmatrisen @@ -54,6 +55,7 @@ public override CompileResult Execute(IList arguments, Parsing if (textValue == null || patternValue == null) result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); + else if(Math.Abs(caseSensitivity) < else result.SetValue(row, col, GetRegexTest(textValue, patternValue)); } @@ -97,7 +99,7 @@ private static short ExpandedSize(int a, int b) return (short)Math.Max(a, b); // Båda > 1: max-storlek, överskott → #N/A } - private static bool GetRegexTest(string text, string pattern) + private static bool GetRegexTest(string text, string pattern, int caseSensitive) => Regex.IsMatch(text, pattern); } } \ No newline at end of file diff --git a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs index fcd33bdba..791d05c16 100644 --- a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs +++ b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs @@ -1,4 +1,7 @@ using Microsoft.VisualStudio.TestTools.UnitTesting; +using OfficeOpenXml; +using OfficeOpenXml.FormulaParsing.Excel.Functions.Information; +using OfficeOpenXml.FormulaParsing.Excel.Functions.Text; using System; using System.Collections.Generic; using System.Linq; @@ -16,7 +19,134 @@ public void RegexTest() using(var package = OpenPackage("Testpackage")) { var sheet = package.Workbook.Worksheets.Add("testsheet"); - + + sheet.Cells["A1"].Value = "Stockholm"; + sheet.Cells["A2"].Value = "Linköping"; + sheet.Cells["A3"].Value = "Örebro"; + sheet.Cells["A4"].Value = "Stockholm"; + sheet.Cells["A5"].Value = "Örebro"; + sheet.Cells["A6"].Value = "Linköping"; + + sheet.Cells["B1"].Value = "Stockholm"; + sheet.Cells["B2"].Value = "^S"; + sheet.Cells["B3"].Value = "Q[0-9]"; + sheet.Cells["B4"].Value = "202[456]"; + sheet.Cells["B5"].Value = "^[0-9]{5}$"; + sheet.Cells["B6"].Value = "[A-ZÅÄÖ][a-zåäö]+"; + + sheet.Cells["D1"].Formula = "REGEXTEST(A1:A6, B1:B6)"; + sheet.Calculate(); + Assert.AreEqual(true, sheet.Cells["D1"].Value); + Assert.AreEqual(false, sheet.Cells["D2"].Value); + Assert.AreEqual(false, sheet.Cells["D3"].Value); + Assert.AreEqual(false, sheet.Cells["D4"].Value); + Assert.AreEqual(false, sheet.Cells["D5"].Value); + Assert.AreEqual(true, sheet.Cells["D6"].Value); + } + } + + [TestMethod] + public void RegexTestMultiplCols() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Stockholm"; + sheet.Cells["A2"].Value = "Linköping"; + sheet.Cells["A3"].Value = "Örebro"; + sheet.Cells["A4"].Value = "Stockholm"; + sheet.Cells["A5"].Value = "Örebro"; + sheet.Cells["A6"].Value = "Linköping"; + + sheet.Cells["B1"].Value = "Stockholm"; + sheet.Cells["B2"].Value = "Linköping"; + sheet.Cells["B3"].Value = "Örebro"; + sheet.Cells["B4"].Value = "Stockholm"; + sheet.Cells["B5"].Value = "Örebro"; + sheet.Cells["B6"].Value = "Linköping"; + + sheet.Cells["C1"].Value = "Stockholm"; + sheet.Cells["C2"].Value = "^S"; + sheet.Cells["C3"].Value = "Q[0-9]"; + sheet.Cells["C4"].Value = "202[456]"; + sheet.Cells["C5"].Value = "^[0-9]{5}$"; + sheet.Cells["C6"].Value = "[A-ZÅÄÖ][a-zåäö]+"; + + sheet.Cells["D1"].Formula = "REGEXTEST(A1:B6, C1:C6)"; + sheet.Calculate(); + Assert.AreEqual(true, sheet.Cells["D1"].Value); + Assert.AreEqual(false, sheet.Cells["D2"].Value); + Assert.AreEqual(false, sheet.Cells["D3"].Value); + Assert.AreEqual(false, sheet.Cells["D4"].Value); + Assert.AreEqual(false, sheet.Cells["D5"].Value); + Assert.AreEqual(true, sheet.Cells["D6"].Value); + + Assert.AreEqual(true, sheet.Cells["E1"].Value); + Assert.AreEqual(false, sheet.Cells["E2"].Value); + Assert.AreEqual(false, sheet.Cells["E3"].Value); + Assert.AreEqual(false, sheet.Cells["E4"].Value); + Assert.AreEqual(false, sheet.Cells["E5"].Value); + Assert.AreEqual(true, sheet.Cells["E6"].Value); + } + } + + [TestMethod] + public void RegexUnevenInputRanges() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Stockholm"; + sheet.Cells["A2"].Value = "Linköping"; + sheet.Cells["A3"].Value = "Örebro"; + sheet.Cells["A4"].Value = "Stockholm"; + sheet.Cells["A5"].Value = "Örebro"; + sheet.Cells["A6"].Value = "Linköping"; + + sheet.Cells["B1"].Value = "Stockholm"; + sheet.Cells["B2"].Value = "Linköping"; + sheet.Cells["B3"].Value = "Örebro"; + sheet.Cells["B4"].Value = "Stockholm"; + sheet.Cells["B5"].Value = "Örebro"; + sheet.Cells["B6"].Value = "Linköping"; + + sheet.Cells["C1"].Value = 2026; + sheet.Cells["C2"].Value = 2026; + sheet.Cells["C3"].Value = 2025; + sheet.Cells["C4"].Value = 2025; + sheet.Cells["C5"].Value = 2025; + sheet.Cells["C6"].Value = 2024; + + sheet.Cells["D4"].Value = "202[456]"; + sheet.Cells["D5"].Value = "^[0-9]{5}$"; + sheet.Cells["D6"].Value = "[A-ZÅÄÖ][a-zåäö]+"; + sheet.Cells["D7"].Value = "[0-9]+"; + + sheet.Cells["E1"].Formula = "REGEXTEST(A1:C6, D4:D7)"; + sheet.Calculate(); + + Assert.AreEqual(false, sheet.Cells["E1"].Value); + Assert.AreEqual(false, sheet.Cells["E2"].Value); + Assert.AreEqual(true, sheet.Cells["E3"].Value); + Assert.AreEqual(false, sheet.Cells["E4"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["E5"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["E6"].Value); + + Assert.AreEqual(false, sheet.Cells["F1"].Value); + Assert.AreEqual(false, sheet.Cells["F2"].Value); + Assert.AreEqual(true, sheet.Cells["F3"].Value); + Assert.AreEqual(false, sheet.Cells["F4"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["F5"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["F6"].Value); + + Assert.AreEqual(true, sheet.Cells["G1"].Value); + Assert.AreEqual(false, sheet.Cells["G2"].Value); + Assert.AreEqual(false, sheet.Cells["G3"].Value); + Assert.AreEqual(true, sheet.Cells["G4"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["G5"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["G6"].Value); } } } From e11fbdf003c6a936f9c5d98e7045034b816e5a2a Mon Sep 17 00:00:00 2001 From: KarlKallman Date: Mon, 1 Jun 2026 16:35:10 +0200 Subject: [PATCH 4/5] WIP --- .../Excel/Functions/BuiltInFunctions.cs | 1 + .../Excel/Functions/Text/RegexExtract.cs | 153 ++++++++++++++++ .../Excel/Functions/Text/RegexFunctionBase.cs | 41 +++++ .../Excel/Functions/Text/RegexReplace.cs | 26 +++ .../Excel/Functions/Text/RegexTest.cs | 50 ++---- .../TextFunctions/RegexFunctionsTests.cs | 163 ++++++++++++++++++ 6 files changed, 394 insertions(+), 40 deletions(-) create mode 100644 src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs create mode 100644 src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs create mode 100644 src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs index e06515cec..82800b593 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs @@ -65,6 +65,7 @@ public BuiltInFunctions() Functions["numbervalue"] = new NumberValue(); Functions["dollar"] = new Dollar(); Functions["regextest"] = new RegexTest(); + Functions["regexextract"] = new RegexExtract(); Functions["textsplit"] = new TextSplit(); Functions["textbefore"] = new TextBefore(DelimiterFunction.TextBefore); Functions["textafter"] = new TextAfter(DelimiterFunction.TextAfter); diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs new file mode 100644 index 000000000..a83655395 --- /dev/null +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs @@ -0,0 +1,153 @@ +using OfficeOpenXml.FormulaParsing.Excel.Functions.MathFunctions; +using OfficeOpenXml.FormulaParsing.Excel.Functions.RefAndLookup; +using OfficeOpenXml.FormulaParsing.FormulaExpressions; +using OfficeOpenXml.FormulaParsing.Ranges; +using OfficeOpenXml.RichData.IndexRelations; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using System.Text; +using System.Text.RegularExpressions; + +namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text +{ + internal class RegexExtract : RegexFunctionBase + { + public override int ArgumentMinLength => 2; + + public override string NamespacePrefix => "_xlfn."; + + public override CompileResult Execute(IList arguments, ParsingContext context) + { + bool textIsRange = arguments[0].IsExcelRange; + bool patternIsRange = arguments[1].IsExcelRange; + int returnMode = arguments.Count > 2 ? ArgToInt(arguments, 2, 0) : 0; + int caseSensitivity = arguments.Count > 3 ? ArgToInt(arguments, 3, 0) : 0; + + if (!textIsRange && !patternIsRange) + { + var text = arguments[0].Value?.ToString(); + var pattern = arguments[1].Value?.ToString(); + + if (text == null || pattern == null) + return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); + if (caseSensitivity > 1 || caseSensitivity < 0 || returnMode < 0 || returnMode > 3) + return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError); + + if (returnMode == 1) + { + var matches = GetMatches(text, pattern, caseSensitivity); + if (matches.Length == 0) + return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); + + var arr = new InMemoryRange((short)1, (short)matches.Length); + for (int i = 0; i < matches.Length; i++) + arr.SetValue(0, i, matches[i]); + + return CreateDynamicArrayResult(arr, DataType.ExcelRange); + } + else if (returnMode == 2) + { + var match = Regex.Match(text, pattern, (RegexOptions)caseSensitivity); + if (!match.Success || match.Groups.Count <= 1) + return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); + + var groups = match.Groups + .Cast() + .Skip(1) + .Select(g => g.Value) + .ToArray(); + + var arr = new InMemoryRange((short)1, (short)groups.Length); + for (int i = 0; i < groups.Length; i++) + arr.SetValue(0, i, groups[i]); + + return CreateDynamicArrayResult(arr, DataType.ExcelRange); + } + + return CreateResult(GetRegexExtractSingle(text, pattern, caseSensitivity), DataType.String); + } + + // Minst ett range-argument – bygg resultatmatrisen + var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null; + var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null; + + int textRows = texts != null ? texts.Size.NumberOfRows : 1; + int textCols = texts != null ? texts.Size.NumberOfCols : 1; + int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1; + int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1; + + // Broadcasting-regler: + // • Om en dimension är 1 → broadcastas till den andres storlek + // • Om båda > 1 → ta max (den kortare ger #N/A vid överflöd) + var nRows = ExpandedSize(textRows, patternRows); + var nCols = ExpandedSize(textCols, patternCols); + + var result = new InMemoryRange(nRows, nCols); + + for (int row = 0; row < nRows; row++) + { + for (int col = 0; col < nCols; col++) + { + var textValue = GetValue(texts, arguments[0], textRows, textCols, row, col); + var patternValue = GetValue(patterns, arguments[1], patternRows, patternCols, row, col); + + if (textValue == null || patternValue == null) + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); + else if (Math.Abs(caseSensitivity) > 1 || Math.Abs(returnMode) > 2) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value)); + } + else + { + if(returnMode == 2) + { + var fullMatch = Regex.Match(textValue, patternValue, (RegexOptions)caseSensitivity); + var firstMatch = fullMatch.Groups + .Cast() + .Skip(1) + .Select(g => g.Value) + .ToArray().First().ToString(); // Excel only returns the first match and ignores following matches. + result.SetValue(row, col, firstMatch); + } + else if(returnMode == 1) + { + var firstMatch = GetMatches(textValue, patternValue, caseSensitivity).First().ToString(); + result.SetValue(row, col, firstMatch); + } + else + { + var match = GetRegexExtractSingle(textValue, patternValue, caseSensitivity); + if (match == string.Empty) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); + } + else + { + result.SetValue(row, col, GetRegexExtractSingle(textValue, patternValue, caseSensitivity)); + } + } + } + } + } + + return CreateDynamicArrayResult(result, DataType.ExcelRange); + } + + private string[] GetMatches(string text, string pattern, int caseSensitive) + { + return Regex.Matches(text, pattern, (RegexOptions)caseSensitive) + .Cast() + .Select(m => m.Value) + .ToArray(); + } + + + private string GetRegexExtractSingle(string text, string pattern, int caseSensitivity) + { + return Regex.Match(text, pattern, (RegexOptions)caseSensitivity).ToString(); + } + + } +} diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs new file mode 100644 index 000000000..9609a9531 --- /dev/null +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs @@ -0,0 +1,41 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text +{ + internal abstract class RegexFunctionBase : ExcelFunction + { + protected static string GetValue( + IRangeInfo range, + FunctionArgument scalar, + int nRows, int nCols, + int row, int col) + { + if (range == null) + // Skalärargument – broadcastas alltid + return scalar.Value?.ToString(); + + // Beräkna verkligt index med broadcasting (storlek 1 → använd index 0) + int r = nRows == 1 ? 0 : row; + int c = nCols == 1 ? 0 : col; + + // Utanför räckvidden → #N/A + if (r >= nRows || c >= nCols) + return null; + + return range.GetOffset(r, c)?.ToString(); + } + + /// + /// Beräknar resultatdimensionen för en axel enligt Excels broadcasting-regler. + /// + protected static short ExpandedSize(int a, int b) + { + if (a == 1) return (short)b; + if (b == 1) return (short)a; + return (short)Math.Max(a, b); // Båda > 1: max-storlek, överskott → #N/A + } + } +} diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs new file mode 100644 index 000000000..15c6bf96a --- /dev/null +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs @@ -0,0 +1,26 @@ +using OfficeOpenXml.FormulaParsing.FormulaExpressions; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; + +namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text +{ + internal class RegexReplace : RegexFunctionBase + { + public override int ArgumentMinLength => 3; + + public override string NamespacePrefix => "_xlfn."; + + public override CompileResult Execute(IList arguments, ParsingContext context) + { + bool textIsRange = arguments[0].IsExcelRange; + bool patternIsRange = arguments[1].IsExcelRange; + bool replacementIsRange = arguments[2].IsExcelRange; + + throw new NotImplementedException(); + } + + } +} diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs index cab0fa9bd..dec87fd94 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs @@ -6,7 +6,7 @@ namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text { - internal class RegexTest : ExcelFunction + internal class RegexTest : RegexFunctionBase { public override int ArgumentMinLength => 2; public override string NamespacePrefix => "_xlfn."; @@ -15,7 +15,7 @@ public override CompileResult Execute(IList arguments, Parsing { bool textIsRange = arguments[0].IsExcelRange; bool patternIsRange = arguments[1].IsExcelRange; - int caseSensitivity = ArgToInt(arguments, 2, 0); + int caseSensitivity = arguments.Count > 2 ? ArgToInt(arguments, 2, 0) : 0; if (!textIsRange && !patternIsRange) { @@ -25,6 +25,8 @@ public override CompileResult Execute(IList arguments, Parsing if (text == null || pattern == null) return CreateResult(ExcelErrorValue.Create(eErrorType.NA), DataType.ExcelError); + if (caseSensitivity > 1 || caseSensitivity < 0) + return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError); return CreateResult(GetRegexTest(text, pattern, caseSensitivity), DataType.Boolean); } @@ -55,51 +57,19 @@ public override CompileResult Execute(IList arguments, Parsing if (textValue == null || patternValue == null) result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); - else if(Math.Abs(caseSensitivity) < + else if(caseSensitivity > 1 || caseSensitivity < 0) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value)); + } else - result.SetValue(row, col, GetRegexTest(textValue, patternValue)); + result.SetValue(row, col, GetRegexTest(textValue, patternValue, caseSensitivity)); } } return CreateDynamicArrayResult(result, DataType.ExcelRange); } - /// - /// Hämtar strängvärdet för (row, col) med broadcasting. - /// Returnerar null om cellen är utanför räckvidden (→ #N/A). - /// - private static string GetValue( - IRangeInfo range, - FunctionArgument scalar, - int nRows, int nCols, - int row, int col) - { - if (range == null) - // Skalärargument – broadcastas alltid - return scalar.Value?.ToString(); - - // Beräkna verkligt index med broadcasting (storlek 1 → använd index 0) - int r = nRows == 1 ? 0 : row; - int c = nCols == 1 ? 0 : col; - - // Utanför räckvidden → #N/A - if (r >= nRows || c >= nCols) - return null; - - return range.GetOffset(r, c)?.ToString(); - } - - /// - /// Beräknar resultatdimensionen för en axel enligt Excels broadcasting-regler. - /// - private static short ExpandedSize(int a, int b) - { - if (a == 1) return (short)b; - if (b == 1) return (short)a; - return (short)Math.Max(a, b); // Båda > 1: max-storlek, överskott → #N/A - } - private static bool GetRegexTest(string text, string pattern, int caseSensitive) - => Regex.IsMatch(text, pattern); + => Regex.IsMatch(text, pattern, (RegexOptions)caseSensitive); } } \ No newline at end of file diff --git a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs index 791d05c16..c9f93b891 100644 --- a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs +++ b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs @@ -7,6 +7,7 @@ using System.Linq; using System.Text; using System.Threading.Tasks; +using static OfficeOpenXml.FormulaParsing.Excel.Functions.Engineering.Conversions; namespace EPPlusTest.FormulaParsing.Excel.Functions.TextFunctions { @@ -149,5 +150,167 @@ public void RegexUnevenInputRanges() Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["G6"].Value); } } + + [TestMethod] + public void RegexTestCaseSensitive() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Stockholm"; + sheet.Cells["A2"].Value = "Linköping"; + sheet.Cells["A3"].Value = "Örebro"; + sheet.Cells["A4"].Value = "Stockholm"; + sheet.Cells["A5"].Value = "Örebro"; + sheet.Cells["A6"].Value = "Linköping"; + + sheet.Cells["B1"].Value = "k"; + + sheet.Cells["D1"].Formula = "REGEXTEST(A1:A6, B1, 1)"; + sheet.Calculate(); + + Assert.AreEqual(true, sheet.Cells["D1"].Value); + Assert.AreEqual(true, sheet.Cells["D2"].Value); + Assert.AreEqual(false, sheet.Cells["D3"].Value); + Assert.AreEqual(true, sheet.Cells["D4"].Value); + Assert.AreEqual(false, sheet.Cells["D5"].Value); + Assert.AreEqual(true, sheet.Cells["D6"].Value); + } + } + + [TestMethod] + public void RegexTestCaseSensitiveError() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Stockholm"; + sheet.Cells["A2"].Value = "Linköping"; + sheet.Cells["A3"].Value = "Örebro"; + sheet.Cells["A4"].Value = "Stockholm"; + sheet.Cells["A5"].Value = "Örebro"; + sheet.Cells["A6"].Value = "Linköping"; + + sheet.Cells["B1"].Value = "k"; + + sheet.Cells["D1"].Formula = "REGEXTEST(A1:A6, B1, 2)"; + sheet.Calculate(); + + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D1"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D2"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D3"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D4"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D5"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["D6"].Value); + } + } + + [TestMethod] + public void RegexExtract() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Kossa Mail@mail.se"; + sheet.Cells["A2"].Value = "Får enmail@mef.se sd"; + sheet.Cells["A3"].Value = "mailens@hemma.com"; + sheet.Cells["A4"].Value = "mail@se.se"; + sheet.Cells["A5"].Value = "Tupp ska gala gmail@adress.net dwqdw"; + sheet.Cells["A6"].Value = "Katt"; + + sheet.Cells["B1"].Value = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+.[a-zA-Z]{2,}"; + + + sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1:A6, B1)"; + sheet.Calculate(); + Assert.AreEqual("Mail@mail.se", sheet.Cells["D1"].Value); + Assert.AreEqual("enmail@mef.se sd", sheet.Cells["D2"].Value); + Assert.AreEqual("mailens@hemma.com", sheet.Cells["D3"].Value); + Assert.AreEqual("mail@se.se", sheet.Cells["D4"].Value); + Assert.AreEqual("gmail@adress.net dwqdw", sheet.Cells["D5"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["D6"].Value); + } + } + + [TestMethod] + public void RegexExtractReturnMode1() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Just #fitness finished 5k! #running"; + + sheet.Cells["B1"].Value = "#\\w+"; + + sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1, B1, 1)"; + sheet.Calculate(); + Assert.AreEqual("#fitness", sheet.Cells["D1"].Value); + Assert.AreEqual("#running", sheet.Cells["E1"].Value); + } + } + + [TestMethod] + public void RegexExtractShouldReturnSingleWithReturnMode1() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "Just #fitness finished 5k! #running"; + sheet.Cells["A2"].Value = "Look at this picture #nature #instagram"; + sheet.Cells["B1"].Value = "#\\w+"; + + sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1:A2, B1, 1)"; + sheet.Calculate(); + Assert.AreEqual("#fitness", sheet.Cells["D1"].Value); + Assert.AreNotEqual("#running", sheet.Cells["E1"].Value); + Assert.AreEqual("#nature", sheet.Cells["D2"].Value); + Assert.AreNotEqual("#instagram", sheet.Cells["E2"].Value); + } + } + + + [TestMethod] + public void RegexExtractReturnMode2() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "9183-Green-M"; + + sheet.Cells["B1"].Value = "(\\d{4})-(\\w+)-(\\w+)"; + + sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1, B1, 2)"; + sheet.Calculate(); + Assert.AreEqual("9183", sheet.Cells["D1"].Value); + Assert.AreEqual("Green", sheet.Cells["E1"].Value); + Assert.AreEqual("M", sheet.Cells["F1"].Value); + } + } + + [TestMethod] + public void RegexExtractShouldReturnSingleWithReturnMode2() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "9183-Green-M"; + sheet.Cells["A2"].Value = "2546-Black-XL"; + + sheet.Cells["B1"].Value = "(\\d{4})-(\\w+)-(\\w+)"; + + sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1:A2, B1, 2)"; + sheet.Calculate(); + + Assert.AreEqual("9183", sheet.Cells["D1"].Value); + Assert.AreEqual("2546", sheet.Cells["D2"].Value); + } + } } } From 5adbe075f792de40da2746dfc13efe11de738566 Mon Sep 17 00:00:00 2001 From: KarlKallman Date: Wed, 3 Jun 2026 09:10:50 +0200 Subject: [PATCH 5/5] WIP --- .../Excel/Functions/BuiltInFunctions.cs | 1 + .../Excel/Functions/Text/RegexExtract.cs | 7 +- .../Excel/Functions/Text/RegexFunctionBase.cs | 12 +- .../Excel/Functions/Text/RegexReplace.cs | 132 +++++++++++++++++- .../Excel/Functions/Text/RegexTest.cs | 5 - .../TextFunctions/RegexFunctionsTests.cs | 132 +++++++++++++++++- 6 files changed, 264 insertions(+), 25 deletions(-) diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs index 82800b593..eb87c7976 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/BuiltInFunctions.cs @@ -66,6 +66,7 @@ public BuiltInFunctions() Functions["dollar"] = new Dollar(); Functions["regextest"] = new RegexTest(); Functions["regexextract"] = new RegexExtract(); + Functions["regexreplace"] = new RegexReplace(); Functions["textsplit"] = new TextSplit(); Functions["textbefore"] = new TextBefore(DelimiterFunction.TextBefore); Functions["textafter"] = new TextAfter(DelimiterFunction.TextAfter); diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs index a83655395..05a870a07 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexExtract.cs @@ -69,7 +69,6 @@ public override CompileResult Execute(IList arguments, Parsing return CreateResult(GetRegexExtractSingle(text, pattern, caseSensitivity), DataType.String); } - // Minst ett range-argument – bygg resultatmatrisen var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null; var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null; @@ -78,9 +77,6 @@ public override CompileResult Execute(IList arguments, Parsing int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1; int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1; - // Broadcasting-regler: - // • Om en dimension är 1 → broadcastas till den andres storlek - // • Om båda > 1 → ta max (den kortare ger #N/A vid överflöd) var nRows = ExpandedSize(textRows, patternRows); var nCols = ExpandedSize(textCols, patternCols); @@ -108,7 +104,7 @@ public override CompileResult Execute(IList arguments, Parsing .Cast() .Skip(1) .Select(g => g.Value) - .ToArray().First().ToString(); // Excel only returns the first match and ignores following matches. + .ToArray().First().ToString(); result.SetValue(row, col, firstMatch); } else if(returnMode == 1) @@ -143,7 +139,6 @@ private string[] GetMatches(string text, string pattern, int caseSensitive) .ToArray(); } - private string GetRegexExtractSingle(string text, string pattern, int caseSensitivity) { return Regex.Match(text, pattern, (RegexOptions)caseSensitivity).ToString(); diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs index 9609a9531..6ad366246 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexFunctionBase.cs @@ -13,29 +13,23 @@ protected static string GetValue( int nRows, int nCols, int row, int col) { - if (range == null) - // Skalärargument – broadcastas alltid + if (range == null) return scalar.Value?.ToString(); - - // Beräkna verkligt index med broadcasting (storlek 1 → använd index 0) + int r = nRows == 1 ? 0 : row; int c = nCols == 1 ? 0 : col; - // Utanför räckvidden → #N/A if (r >= nRows || c >= nCols) return null; return range.GetOffset(r, c)?.ToString(); } - /// - /// Beräknar resultatdimensionen för en axel enligt Excels broadcasting-regler. - /// protected static short ExpandedSize(int a, int b) { if (a == 1) return (short)b; if (b == 1) return (short)a; - return (short)Math.Max(a, b); // Båda > 1: max-storlek, överskott → #N/A + return (short)Math.Max(a, b); } } } diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs index 15c6bf96a..8da9f3bad 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexReplace.cs @@ -1,10 +1,13 @@ using OfficeOpenXml.FormulaParsing.FormulaExpressions; +using OfficeOpenXml.FormulaParsing.Ranges; using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; using System.Text; using System.Text.RegularExpressions; + namespace OfficeOpenXml.FormulaParsing.Excel.Functions.Text { internal class RegexReplace : RegexFunctionBase @@ -19,8 +22,135 @@ public override CompileResult Execute(IList arguments, Parsing bool patternIsRange = arguments[1].IsExcelRange; bool replacementIsRange = arguments[2].IsExcelRange; - throw new NotImplementedException(); + int occurnance = arguments.Count > 3 ? ArgToInt(arguments, 3, 0) : 0; + int caseSensitive = arguments.Count > 4 ? ArgToInt(arguments, 4, 0) : 0; + + if (!textIsRange && !patternIsRange && !replacementIsRange) + { + var text = arguments[0].Value?.ToString() ?? string.Empty; + var pattern = arguments[1].Value?.ToString() ?? string.Empty; + var replacement = arguments[2].Value?.ToString() ?? string.Empty; + + if (caseSensitive > 1 || caseSensitive < 0 || (text != null && pattern == string.Empty)) + return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError); + var res = GetRegexReplaced(text, pattern, replacement, occurnance, caseSensitive); + if (res == null) + return CreateResult(ExcelErrorValue.Create(eErrorType.Value), DataType.ExcelError); + return CreateResult(res, DataType.String); + } + + var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null; + var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null; + var replacements = replacementIsRange ? arguments[2].ValueAsRangeInfo : null; + + int textRows = texts != null ? texts.Size.NumberOfRows : 1; + int textCols = texts != null ? texts.Size.NumberOfCols : 1; + int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1; + int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1; + int replacementsRows = replacements != null ? replacements.Size.NumberOfRows : 1; + int replacementsCols = replacements != null ? replacements.Size.NumberOfCols : 1; + + + var nRows = ExpandedSizeRegexReplace(textRows, patternRows, replacementsRows); + var nCols = ExpandedSizeRegexReplace(textCols, patternCols, replacementsCols); + + var result = new InMemoryRange(nRows, nCols); + + for (int row = 0; row < nRows; row++) + { + for (int col = 0; col < nCols; col++) + { + var textValue = GetRegexReplaceValue(texts, arguments[0], textRows, textCols, row, col); + var patternValue = GetRegexReplaceValue(patterns, arguments[1], patternRows, patternCols, row, col); + var replacementValue = GetRegexReplaceValue(replacements, arguments[2], replacementsRows, replacementsCols, row, col) ?? string.Empty; + + if (textValue != null && patternValue == null) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value)); + } + else if (textValue == null || patternValue == null) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.NA)); + } + else if (caseSensitive > 1 || caseSensitive < 0) + { + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value)); + } + else + { + var val = GetRegexReplaced(textValue, patternValue, replacementValue, occurnance, caseSensitive); + if (val == null) + result.SetValue(row, col, ExcelErrorValue.Create(eErrorType.Value)); + else + result.SetValue(row, col, val); + } + } + } + + return CreateDynamicArrayResult(result, DataType.ExcelRange); } + private short ExpandedSizeRegexReplace(int a, int b, int c) + { + return (short)Math.Max(a, Math.Max(b,c)); + } + private string GetRegexReplaced(string text, string pattern, string replacement, int occurnance, int caseSensitive) + { + if (HasInvalidBackreference(pattern, replacement, (RegexOptions)caseSensitive)) + return null; + + if (Math.Abs(occurnance) > 0) + { + var allReplaceMatches = Regex.Matches(text, pattern, (RegexOptions)caseSensitive); + var targetIndex = occurnance > 0 ? occurnance - 1 + : allReplaceMatches.Count + occurnance; // search from end + if(targetIndex < 0 || targetIndex >= allReplaceMatches.Count) + { + return text; + } + var targetMatch = allReplaceMatches[targetIndex]; + return text.Substring(0, targetMatch.Index) + + targetMatch.Result(replacement) + + text.Substring(targetMatch.Index + targetMatch.Length); + } + else + { + return Regex.Replace(text, pattern, replacement, (RegexOptions)caseSensitive); + } + } + + private static string GetRegexReplaceValue( + IRangeInfo range, + FunctionArgument scalar, + int argRows, int argCols, + int row, int col) + { + if (range == null) + return scalar.Value?.ToString() ?? string.Empty; + + int r = argRows == 1 ? 0 : row; + int c = argCols == 1 ? 0 : col; + + if (r >= argRows || c >= argCols) + return null; + + return range.GetOffset(r, c)?.ToString() ?? string.Empty; + } + + + private static bool HasInvalidBackreference(string pattern, string replacement, RegexOptions options) + { + if (string.IsNullOrEmpty(replacement)) + return false; + + int maxGroup = new Regex(pattern, options).GetGroupNumbers().Max(); + + foreach (Match m in Regex.Matches(replacement, @"(? maxGroup) + return true; + } + return false; + } } } diff --git a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs index dec87fd94..359d27c7a 100644 --- a/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs +++ b/src/EPPlus/FormulaParsing/Excel/Functions/Text/RegexTest.cs @@ -19,7 +19,6 @@ public override CompileResult Execute(IList arguments, Parsing if (!textIsRange && !patternIsRange) { - // Skalär × skalär – ursprungligt beteende var text = arguments[0].Value?.ToString(); var pattern = arguments[1].Value?.ToString(); @@ -31,7 +30,6 @@ public override CompileResult Execute(IList arguments, Parsing return CreateResult(GetRegexTest(text, pattern, caseSensitivity), DataType.Boolean); } - // Minst ett range-argument – bygg resultatmatrisen var texts = textIsRange ? arguments[0].ValueAsRangeInfo : null; var patterns = patternIsRange ? arguments[1].ValueAsRangeInfo : null; @@ -40,9 +38,6 @@ public override CompileResult Execute(IList arguments, Parsing int patternRows = patterns != null ? patterns.Size.NumberOfRows : 1; int patternCols = patterns != null ? patterns.Size.NumberOfCols : 1; - // Broadcasting-regler: - // • Om en dimension är 1 → broadcastas till den andres storlek - // • Om båda > 1 → ta max (den kortare ger #N/A vid överflöd) var nRows = ExpandedSize(textRows, patternRows); var nCols = ExpandedSize(textCols, patternCols); diff --git a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs index c9f93b891..ae8cf8e08 100644 --- a/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs +++ b/src/EPPlusTest/FormulaParsing/Excel/Functions/TextFunctions/RegexFunctionsTests.cs @@ -1,4 +1,5 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; +using Microsoft.ApplicationInsights.DataContracts; +using Microsoft.VisualStudio.TestTools.UnitTesting; using OfficeOpenXml; using OfficeOpenXml.FormulaParsing.Excel.Functions.Information; using OfficeOpenXml.FormulaParsing.Excel.Functions.Text; @@ -17,7 +18,7 @@ public class RegexFunctionsTests : TestBase [TestMethod] public void RegexTest() { - using(var package = OpenPackage("Testpackage")) + using (var package = OpenPackage("Testpackage")) { var sheet = package.Workbook.Worksheets.Add("testsheet"); @@ -42,7 +43,7 @@ public void RegexTest() Assert.AreEqual(false, sheet.Cells["D3"].Value); Assert.AreEqual(false, sheet.Cells["D4"].Value); Assert.AreEqual(false, sheet.Cells["D5"].Value); - Assert.AreEqual(true, sheet.Cells["D6"].Value); + Assert.AreEqual(true, sheet.Cells["D6"].Value); } } @@ -308,9 +309,132 @@ public void RegexExtractShouldReturnSingleWithReturnMode2() sheet.Cells["D1"].Formula = "REGEXEXTRACT(A1:A2, B1, 2)"; sheet.Calculate(); - Assert.AreEqual("9183", sheet.Cells["D1"].Value); + Assert.AreEqual("9183", sheet.Cells["D1"].Value); Assert.AreEqual("2546", sheet.Cells["D2"].Value); } } + + [TestMethod] + public void RegexReplace() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "044-5654-6546"; + + sheet.Cells["B1"].Value = "[^0-9]"; + + sheet.Cells["D1"].Formula = "REGEXREPLACE(A1,B1,C1)"; + sheet.Calculate(); + + Assert.AreEqual("04456546546", sheet.Cells["D1"].Value); + } + } + + [TestMethod] + public void RegexReplaceWithOccurrance() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "044-5654-6546"; + + sheet.Cells["B1"].Value = "[^0-9]"; + + sheet.Cells["D1"].Formula = "REGEXREPLACE(A1,B1,C1, -1)"; + sheet.Calculate(); + + Assert.AreEqual("044-56546546", sheet.Cells["D1"].Value); + } + } + + [TestMethod] + public void RegexReplaceRangeInput() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "044-5654-6546"; + sheet.Cells["A2"].Value = "0546-4654-565"; + + sheet.Cells["D1"].Value = "[^0-9]"; + sheet.Cells["D2"].Value = "[^0-9]"; + + sheet.Cells["E1"].Formula = "REGEXREPLACE(A1:B2,D1:D3, B1:C2)"; + sheet.Calculate(); + + Assert.AreEqual("04456546546", sheet.Cells["E1"].Value); + Assert.AreEqual("05464654565", sheet.Cells["E2"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["E3"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.NA), sheet.Cells["F3"].Value); + } + } + + [TestMethod] + public void RegexReplaceValueError() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + sheet.Cells["A1"].Value = "044-5654-6546"; + sheet.Cells["D1"].Value = "(\\d{4})-(\\w+)-(\\w+)"; + sheet.Cells["B1"].Formula = "REGEXREPLACE(A1,C1,D1)"; + sheet.Calculate(); + + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["B1"].Value); + } + } + + [TestMethod] + public void RegexReplaceInvalidBackreference() + { + using (var package = OpenPackage("Testpackage")) + { + var sheet = package.Workbook.Worksheets.Add("testsheet"); + + // Text-kolumn (A1:A3) + sheet.Cells["A1"].Value = "2026-Stockholm-Q2"; + sheet.Cells["A2"].Value = "2025-Linkoping-Q1"; + sheet.Cells["A3"].Value = "2024-Orebro-Q4"; + + // Pattern: bara C1 satt (3 grupper), C2 och C3 tomma + sheet.Cells["C1"].Value = @"(\d{4})-(\w+)-(\w+)"; + + // Test 1: replacement med backreferenser ($3_$1) + // rad 1 har grupper → ok, rad 2-3 tomt pattern → 0 grupper → #VALUE! + sheet.Cells["E1"].Formula = "REGEXREPLACE(A1:A3, C1:C3, \"$3_$1\")"; + + // Test 2: samma uppsättning, replacement UTAN backreferens ("s") + // tomt pattern matchar varje position → "s" stoppas in överallt + sheet.Cells["G1"].Formula = "REGEXREPLACE(A1:A3, C1:C3, \"s\")"; + + // Test 3: skalärt – giltigt pattern + giltig backreferens + sheet.Cells["I1"].Formula = "REGEXREPLACE(A1, C1, \"$3_$1\")"; + + // Test 4: skalärt – pattern UTAN grupper + backreferens $1 → #VALUE! + sheet.Cells["I2"].Formula = "REGEXREPLACE(A1, \"[0-9]+\", \"$1\")"; + sheet.Calculate(); + + // Test 1 + Assert.AreEqual("Q2_2026", sheet.Cells["E1"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["E2"].Value); + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["E3"].Value); + + // Test 2 + Assert.AreEqual("s", sheet.Cells["G1"].Value); + Assert.AreEqual("s2s0s2s5s-sLsisnsksospsisnsgs-sQs1s", sheet.Cells["G2"].Value); + Assert.AreEqual("s2s0s2s4s-sOsrsesbsrsos-sQs4s", sheet.Cells["G3"].Value); + + // Test 3 + Assert.AreEqual("Q2_2026", sheet.Cells["I1"].Value); + + // Test 4: $1 finns inte ([0-9]+ har inga grupper) → #VALUE! + Assert.AreEqual(ExcelErrorValue.Create(eErrorType.Value), sheet.Cells["I2"].Value); + } + } } }