Skip to content

Commit 90e968e

Browse files
MichaelMWWMichael Wang (Centific Technologies Inc)
and
Michael Wang (Centific Technologies Inc)
authored
Support Chinese next next week day - first commit (#3184)
Co-authored-by: Michael Wang (Centific Technologies Inc) <[email protected]>
1 parent 25afa66 commit 90e968e

13 files changed

+344
-7
lines changed

.NET/Microsoft.Recognizers.Definitions.Common/Chinese/DateTimeDefinitions.cs

+7-3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ public static class DateTimeDefinitions
4242
public static readonly string DateThisRegex = $@"(这个|这一个|这|这一|本){WeekDayRegex}";
4343
public static readonly string DateLastRegex = $@"(上一个|上个|上一|上|最后一个|最后)(的)?{WeekDayRegex}";
4444
public static readonly string DateNextRegex = $@"(下一个|下个|下一|下)(的)?{WeekDayRegex}";
45+
public static readonly string DateNextNextRegex = $@"(下下|下下[个個]){WeekDayRegex}";
46+
public static readonly string DateLastLastRegex = $@"(上上|上上[个個]){WeekDayRegex}";
4547
public const string WeekWithWeekDayRangeRegex = @"^[.]";
4648
public const string WoMLastRegex = @"最后一";
4749
public const string WoMPreviousRegex = @"上个";
@@ -78,7 +80,8 @@ public static class DateTimeDefinitions
7880
public const string DatePeriodThisRegex = @"这个|这一个|这|这一|本";
7981
public const string DatePeriodLastRegex = @"上个|上一个|上|上一";
8082
public const string DatePeriodNextRegex = @"下个|下一个|下|下一";
81-
public const string DatePeriodNextNextRegex = @"下下";
83+
public const string DatePeriodNextNextRegex = @"下下|下下[个個]";
84+
public const string DatePeriodLastLastRegex = @"上上|上上[个個]";
8285
public static readonly string RelativeMonthRegex = $@"(?<relmonth>({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)";
8386
public const string HalfYearRegex = @"((?<firstHalf>(上|前)半年)|(?<secondHalf>(下|后)半年))";
8487
public static readonly string YearRegex = $@"(({YearNumRegex})(\s*年)?|({SimpleYearRegex})\s*年){HalfYearRegex}?";
@@ -90,7 +93,7 @@ public static class DateTimeDefinitions
9093
public static readonly string YearAndMonth = $@"({DatePeriodYearInCJKRegex}|{YearRegex}|(?<yearrel>明年|今年|去年))\s*({MonthRegex}|的?(?<cardinal>第一|第二|第三|第四|第五|第六|第七|第八|第九|第十|第十一|第十二|最后一)\s*个月\s*)";
9194
public static readonly string SimpleYearAndMonth = $@"({YearNumRegex}[/\\\-]{MonthNumRegex}\b$)";
9295
public static readonly string PureNumYearAndMonth = $@"({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})";
93-
public static readonly string OneWordPeriodRegex = $@"(((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*(周末|周|月|年)|周末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)";
96+
public static readonly string OneWordPeriodRegex = $@"(((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastLastRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*([周週]末|[周週]|月|年)|[周週]末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)";
9497
public const string LaterEarlyPeriodRegex = @"^[.]";
9598
public const string DatePointWithAgoAndLater = @"^[.]";
9699
public static readonly string WeekOfMonthRegex = $@"(?<wom>{MonthSuffixRegex}的(?<cardinal>第一|第二|第三|第四|第五|最后一)\s*周\s*)";
@@ -286,7 +289,8 @@ public static class DateTimeDefinitions
286289
};
287290
public static readonly IList<string> WeekendTerms = new List<string>
288291
{
289-
@"周末"
292+
@"周末",
293+
@"週末"
290294
};
291295
public static readonly IList<string> WeekTerms = new List<string>
292296
{

.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDateExtractorConfiguration.cs

+5-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ public class ChineseDateExtractorConfiguration : BaseDateTimeOptionsConfiguratio
2727

2828
public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DateNextRegex, RegexFlags, RegexTimeOut);
2929

30+
public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DateNextNextRegex, RegexFlags, RegexTimeOut);
31+
32+
public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DateLastLastRegex, RegexFlags, RegexTimeOut);
33+
3034
public static readonly Regex SpecialDayRegex = new Regex(DateTimeDefinitions.SpecialDayRegex, RegexFlags, RegexTimeOut);
3135

3236
public static readonly Regex WeekDayOfMonthRegex = new Regex(DateTimeDefinitions.WeekDayOfMonthRegex, RegexFlags, RegexTimeOut);
@@ -78,7 +82,7 @@ public ChineseDateExtractorConfiguration(IDateTimeOptionsConfiguration config)
7882

7983
ImplicitDateList = new List<Regex>
8084
{
81-
LunarRegex, SpecialDayRegex, ThisRegex, LastRegex, NextRegex,
85+
LunarRegex, SpecialDayRegex, ThisRegex, LastLastRegex, LastRegex, NextNextRegex, NextRegex,
8286
WeekDayRegex, WeekDayOfMonthRegex, SpecialDate,
8387
};
8488

.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDatePeriodExtractorConfiguration.cs

+1
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ public class ChineseDatePeriodExtractorConfiguration : BaseDateTimeOptionsConfig
9393
public static readonly Regex DateUnitRegex = new Regex(DateTimeDefinitions.DateUnitRegex, RegexFlags, RegexTimeOut);
9494
public static readonly Regex LastRegex = new Regex(DateTimeDefinitions.DatePeriodLastRegex, RegexFlags, RegexTimeOut);
9595
public static readonly Regex NextNextRegex = new Regex(DateTimeDefinitions.DatePeriodNextNextRegex, RegexFlags, RegexTimeOut);
96+
public static readonly Regex LastLastRegex = new Regex(DateTimeDefinitions.DatePeriodLastLastRegex, RegexFlags, RegexTimeOut);
9697
public static readonly Regex NextRegex = new Regex(DateTimeDefinitions.DatePeriodNextRegex, RegexFlags, RegexTimeOut);
9798
public static readonly Regex RelativeMonthRegex = new Regex(DateTimeDefinitions.RelativeMonthRegex, RegexFlags, RegexTimeOut);
9899
public static readonly Regex LaterEarlyPeriodRegex = new Regex(DateTimeDefinitions.LaterEarlyPeriodRegex, RegexFlags, RegexTimeOut);

.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDateParserConfiguration.cs

+6
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ public ChineseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration conf
5151
WeekDayAndDayRegex = ChineseDateExtractorConfiguration.WeekDayAndDayRegex;
5252
DurationRelativeDurationUnitRegex = ChineseDateExtractorConfiguration.DurationRelativeDurationUnitRegex;
5353
SpecialDayWithNumRegex = ChineseDateExtractorConfiguration.SpecialDayWithNumRegex;
54+
NextNextRegex = ChineseDateExtractorConfiguration.NextNextRegex;
55+
LastLastRegex = ChineseDateExtractorConfiguration.LastLastRegex;
5456

5557
CardinalMap = config.CardinalMap;
5658
UnitMap = config.UnitMap;
@@ -94,6 +96,10 @@ public ChineseDateParserConfiguration(ICJKCommonDateTimeParserConfiguration conf
9496

9597
public Regex NextRegex { get; }
9698

99+
public Regex NextNextRegex { get; }
100+
101+
public Regex LastLastRegex { get; }
102+
97103
public Regex ThisRegex { get; }
98104

99105
public Regex LastRegex { get; }

.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Parsers/ChineseDatePeriodParserConfiguration.cs

+3
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public ChineseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguratio
3838
SimpleCasesRegex = ChineseDatePeriodExtractorConfiguration.SimpleCasesRegex;
3939
ThisRegex = ChineseDatePeriodExtractorConfiguration.ThisRegex;
4040
NextNextRegex = ChineseDatePeriodExtractorConfiguration.NextNextRegex;
41+
LastLastRegex = ChineseDatePeriodExtractorConfiguration.LastLastRegex;
4142
NextRegex = ChineseDatePeriodExtractorConfiguration.NextRegex;
4243
LastRegex = ChineseDatePeriodExtractorConfiguration.LastRegex;
4344
YearToYear = ChineseDatePeriodExtractorConfiguration.YearToYear;
@@ -124,6 +125,8 @@ public ChineseDatePeriodParserConfiguration(ICJKCommonDateTimeParserConfiguratio
124125

125126
public Regex NextNextRegex { get; }
126127

128+
public Regex LastLastRegex { get; }
129+
127130
public Regex NextRegex { get; }
128131

129132
public Regex LastRegex { get; }

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs

+56
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Collections.Generic;
66
using System.Globalization;
77
using System.Text.RegularExpressions;
8+
using Microsoft.Recognizers.Text.DateTime.Chinese;
89
using Microsoft.Recognizers.Text.Utilities;
910
using DateObject = System.DateTime;
1011

@@ -355,11 +356,21 @@ protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject ref
355356
ret = MatchThisWeekday(text, referenceDate);
356357
}
357358

359+
if (!ret.Success)
360+
{
361+
ret = MatchNextNextWeekday(text, referenceDate);
362+
}
363+
358364
if (!ret.Success)
359365
{
360366
ret = MatchNextWeekday(text, referenceDate);
361367
}
362368

369+
if (!ret.Success)
370+
{
371+
ret = MatchLastLastWeekday(text, referenceDate);
372+
}
373+
363374
if (!ret.Success)
364375
{
365376
ret = MatchLastWeekday(text, referenceDate);
@@ -497,6 +508,28 @@ protected DateTimeResolutionResult MatchNextWeekday(string text, DateObject refe
497508
return result;
498509
}
499510

511+
protected DateTimeResolutionResult MatchNextNextWeekday(string text, DateObject reference)
512+
{
513+
var result = new DateTimeResolutionResult();
514+
var cnConfig = this.config as ChineseDateParserConfiguration;
515+
if (cnConfig != null)
516+
{
517+
var match = cnConfig.NextNextRegex.MatchExact(text, trim: true);
518+
if (match.Success)
519+
{
520+
var weekdayKey = match.Groups["weekday"].Value;
521+
var value = reference.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
522+
value = value.Next((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
523+
524+
result.Timex = DateTimeFormatUtil.LuisDate(value);
525+
result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day);
526+
result.Success = true;
527+
}
528+
}
529+
530+
return result;
531+
}
532+
500533
protected DateTimeResolutionResult MatchThisWeekday(string text, DateObject reference)
501534
{
502535
var result = new DateTimeResolutionResult();
@@ -533,6 +566,29 @@ protected DateTimeResolutionResult MatchLastWeekday(string text, DateObject refe
533566
return result;
534567
}
535568

569+
protected DateTimeResolutionResult MatchLastLastWeekday(string text, DateObject reference)
570+
{
571+
var result = new DateTimeResolutionResult();
572+
var cnConfig = this.config as ChineseDateParserConfiguration;
573+
if (cnConfig != null)
574+
{
575+
var match = cnConfig.LastLastRegex.MatchExact(text, trim: true);
576+
577+
if (match.Success)
578+
{
579+
var weekdayKey = match.Groups["weekday"].Value;
580+
var value = reference.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
581+
value = value.Last((DayOfWeek)this.config.DayOfWeek[weekdayKey]);
582+
583+
result.Timex = DateTimeFormatUtil.LuisDate(value);
584+
result.FutureValue = result.PastValue = DateObject.MinValue.SafeCreateFromValue(value.Year, value.Month, value.Day);
585+
result.Success = true;
586+
}
587+
}
588+
589+
return result;
590+
}
591+
536592
protected DateTimeResolutionResult MatchWeekdayAlone(string text, DateObject reference)
537593
{
538594
var result = new DateTimeResolutionResult();

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDatePeriodParser.cs

+7
Original file line numberDiff line numberDiff line change
@@ -1023,8 +1023,10 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe
10231023
}
10241024

10251025
// In Chinese, "下" means next, "下下周" means next next week, "下下周末" means next next weekend, need to check whether the text match "下下"
1026+
// "上" means last, "上上周" means last last week, "上上周末" means last last weekend, need to check whether the text match "上上"
10261027
ChineseDatePeriodParserConfiguration config = this.config as ChineseDatePeriodParserConfiguration;
10271028
bool nextNextMatch = config == null ? false : config.NextNextRegex.Match(trimmedText).Success;
1029+
bool lastlastMatch = config == null ? false : config.LastLastRegex.Match(trimmedText).Success;
10281030

10291031
var nextMatch = this.config.NextRegex.Match(trimmedText);
10301032
var lastMatch = this.config.LastRegex.Match(trimmedText);
@@ -1081,6 +1083,11 @@ private DateTimeResolutionResult ParseOneWordPeriod(string text, DateObject refe
10811083
// If it is Chinese "下下周" (next next week), "下下周末" (next next weekend), then swift is 2
10821084
swift = 2;
10831085
}
1086+
else if (lastlastMatch)
1087+
{
1088+
// If it is Chinese "上上周" (last last week), "上上周末" (last last weekend), then swift is -2
1089+
swift = -2;
1090+
}
10841091
else if (nextMatch.Success)
10851092
{
10861093
if (nextMatch.Groups[Constants.AfterGroupName].Success)

Patterns/Chinese/Chinese-DateTime.yaml

+12-3
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@ DateLastRegex: !nestedRegex
4848
DateNextRegex: !nestedRegex
4949
def: (下一个|下个|下一|下)(的)?{WeekDayRegex}
5050
references: [WeekDayRegex]
51+
DateNextNextRegex: !nestedRegex
52+
def: (下下|下下[个個]){WeekDayRegex}
53+
references: [WeekDayRegex]
54+
DateLastLastRegex: !nestedRegex
55+
def: (上上|上上[个個]){WeekDayRegex}
56+
references: [WeekDayRegex]
5157
WeekWithWeekDayRangeRegex: !simpleRegex
5258
# TODO: modify below regex according to the counterpart in Japanese
5359
def: ^[.]
@@ -150,7 +156,9 @@ DatePeriodLastRegex: !simpleRegex
150156
DatePeriodNextRegex: !simpleRegex
151157
def: 下个|下一个|下|下一
152158
DatePeriodNextNextRegex: !simpleRegex
153-
def: 下下
159+
def: 下下|下下[个個]
160+
DatePeriodLastLastRegex: !simpleRegex
161+
def: 上上|上上[个個]
154162
RelativeMonthRegex: !nestedRegex
155163
def: (?<relmonth>({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextRegex})\s*月)
156164
references: [DatePeriodThisRegex, DatePeriodLastRegex, DatePeriodNextRegex]
@@ -183,8 +191,8 @@ PureNumYearAndMonth: !nestedRegex
183191
def: ({YearRegexInNumber}\s*[-\.\/]\s*{MonthNumRegex})|({MonthNumRegex}\s*\/\s*{YearRegexInNumber})
184192
references: [YearRegexInNumber, MonthNumRegex]
185193
OneWordPeriodRegex: !nestedRegex
186-
def: (((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*(周末|周|月|年)|周末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)
187-
references: [MonthRegex, DatePeriodThisRegex, DatePeriodLastRegex, DatePeriodNextNextRegex, DatePeriodNextRegex, HalfYearRegex]
194+
def: (((?<yearrel>(明|今|去)年)\s*)?{MonthRegex}|({DatePeriodThisRegex}|{DatePeriodLastLastRegex}|{DatePeriodLastRegex}|{DatePeriodNextNextRegex}|{DatePeriodNextRegex})(?<halfTag>半)?\s*([周週]末|[周週]|月|年)|[周週]末|(今|明|去|前|后)年(\s*{HalfYearRegex})?)
195+
references: [MonthRegex, DatePeriodThisRegex, DatePeriodLastLastRegex, DatePeriodLastRegex, DatePeriodNextNextRegex, DatePeriodNextRegex, HalfYearRegex]
188196
LaterEarlyPeriodRegex: !simpleRegex
189197
# TODO: modify below regex according to the counterpart in Japanese
190198
def: ^[.]
@@ -567,6 +575,7 @@ WeekendTerms: !list
567575
types: [ string ]
568576
entries:
569577
- 周末
578+
- 週末
570579
WeekTerms: !list
571580
types: [ string ]
572581
entries:

Specs/DateTime/Chinese/DateExtractor.json

+24
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,30 @@
245245
}
246246
]
247247
},
248+
{
249+
"Input": "马拉松在下下周日举行",
250+
"NotSupported": "java, javascript, python",
251+
"Results": [
252+
{
253+
"Text": "下下周日",
254+
"Type": "date",
255+
"Start": 4,
256+
"Length": 4
257+
}
258+
]
259+
},
260+
{
261+
"Input": "任務是在上上個週三完成的",
262+
"NotSupported": "java, javascript, python",
263+
"Results": [
264+
{
265+
"Text": "上上個週三",
266+
"Type": "date",
267+
"Start": 4,
268+
"Length": 5
269+
}
270+
]
271+
},
248272
{
249273
"Input": "下次的12号",
250274
"Results": [

Specs/DateTime/Chinese/DateParser.json

+48
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,54 @@
581581
}
582582
]
583583
},
584+
{
585+
"Input": "马拉松在下下周日举行",
586+
"Context": {
587+
"ReferenceDateTime": "2024-11-15T00:00:00"
588+
},
589+
"NotSupported": "java, javascript, python",
590+
"Results": [
591+
{
592+
"Text": "下下周日",
593+
"Type": "date",
594+
"Value": {
595+
"Timex": "2024-12-01",
596+
"FutureResolution": {
597+
"date": "2024-12-01"
598+
},
599+
"PastResolution": {
600+
"date": "2024-12-01"
601+
}
602+
},
603+
"Start": 4,
604+
"Length": 4
605+
}
606+
]
607+
},
608+
{
609+
"Input": "任務是在上上個週三完成的",
610+
"Context": {
611+
"ReferenceDateTime": "2024-11-15T00:00:00"
612+
},
613+
"NotSupported": "java, javascript, python",
614+
"Results": [
615+
{
616+
"Text": "上上個週三",
617+
"Type": "date",
618+
"Value": {
619+
"Timex": "2024-10-30",
620+
"FutureResolution": {
621+
"date": "2024-10-30"
622+
},
623+
"PastResolution": {
624+
"date": "2024-10-30"
625+
}
626+
},
627+
"Start": 4,
628+
"Length": 5
629+
}
630+
]
631+
},
584632
{
585633
"Input": "12号",
586634
"Context": {

Specs/DateTime/Chinese/DatePeriodExtractor.json

+24
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,30 @@
100100
}
101101
]
102102
},
103+
{
104+
"Input": "我是上上周回来的",
105+
"NotSupported": "java, javascript, python",
106+
"Results": [
107+
{
108+
"Text": "上上周",
109+
"Type": "daterange",
110+
"Start": 2,
111+
"Length": 3
112+
}
113+
]
114+
},
115+
{
116+
"Input": "你上上個週末干嘛了",
117+
"NotSupported": "java, javascript, python",
118+
"Results": [
119+
{
120+
"Text": "上上個週末",
121+
"Type": "daterange",
122+
"Start": 1,
123+
"Length": 5
124+
}
125+
]
126+
},
103127
{
104128
"Input": "下个月完工",
105129
"Results": [

0 commit comments

Comments
 (0)